1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/os_interface/linux/drm_memory_manager.h"
9 
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/execution_environment/execution_environment.h"
12 #include "shared/source/execution_environment/root_device_environment.h"
13 #include "shared/source/gmm_helper/gmm.h"
14 #include "shared/source/gmm_helper/gmm_helper.h"
15 #include "shared/source/gmm_helper/resource_info.h"
16 #include "shared/source/helpers/heap_assigner.h"
17 #include "shared/source/helpers/hw_info.h"
18 #include "shared/source/helpers/ptr_math.h"
19 #include "shared/source/helpers/string.h"
20 #include "shared/source/helpers/surface_format_info.h"
21 #include "shared/source/memory_manager/host_ptr_manager.h"
22 #include "shared/source/memory_manager/residency.h"
23 #include "shared/source/os_interface/linux/allocator_helper.h"
24 #include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
25 #include "shared/source/os_interface/linux/os_context_linux.h"
26 #include "shared/source/os_interface/os_interface.h"
27 
28 #include "drm/i915_drm.h"
29 
30 #include <cstring>
31 #include <iostream>
32 #include <memory>
33 
34 namespace NEO {
35 
DrmMemoryManager(gemCloseWorkerMode mode,bool forcePinAllowed,bool validateHostPtrMemory,ExecutionEnvironment & executionEnvironment)36 DrmMemoryManager::DrmMemoryManager(gemCloseWorkerMode mode,
37                                    bool forcePinAllowed,
38                                    bool validateHostPtrMemory,
39                                    ExecutionEnvironment &executionEnvironment) : MemoryManager(executionEnvironment),
40                                                                                  forcePinEnabled(forcePinAllowed),
41                                                                                  validateHostPtrMemory(validateHostPtrMemory) {
42 
43     alignmentSelector.addCandidateAlignment(MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB);
44     if (DebugManager.flags.AlignLocalMemoryVaTo2MB.get() != 0) {
45         alignmentSelector.addCandidateAlignment(MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB);
46     }
47     const size_t customAlignment = static_cast<size_t>(DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.get());
48     if (customAlignment > 0) {
49         const auto heapIndex = customAlignment >= MemoryConstants::pageSize2Mb ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD64KB;
50         alignmentSelector.addCandidateAlignment(customAlignment, true, AlignmentSelector::anyWastage, heapIndex);
51     }
52 
53     initialize(mode);
54 }
55 
initialize(gemCloseWorkerMode mode)56 void DrmMemoryManager::initialize(gemCloseWorkerMode mode) {
57     bool disableGemCloseWorker = true;
58 
59     for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
60         auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace;
61         if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh)) {
62             initialized = false;
63             return;
64         }
65         localMemAllocs.emplace_back();
66         disableGemCloseWorker &= getDrm(rootDeviceIndex).isVmBindAvailable();
67     }
68     MemoryManager::virtualPaddingAvailable = true;
69 
70     if (disableGemCloseWorker) {
71         mode = gemCloseWorkerMode::gemCloseWorkerInactive;
72     }
73 
74     if (DebugManager.flags.EnableGemCloseWorker.get() != -1) {
75         mode = DebugManager.flags.EnableGemCloseWorker.get() ? gemCloseWorkerMode::gemCloseWorkerActive : gemCloseWorkerMode::gemCloseWorkerInactive;
76     }
77 
78     if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) {
79         gemCloseWorker.reset(new DrmGemCloseWorker(*this));
80     }
81 
82     for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
83         if (forcePinEnabled || validateHostPtrMemory) {
84             auto cpuAddrBo = alignedMallocWrapper(MemoryConstants::pageSize, MemoryConstants::pageSize);
85             UNRECOVERABLE_IF(cpuAddrBo == nullptr);
86             // Preprogram the Bo with MI_BATCH_BUFFER_END and MI_NOOP. This BO will be used as the last BB in a series to indicate the end of submission.
87             reinterpret_cast<uint32_t *>(cpuAddrBo)[0] = 0x05000000; // MI_BATCH_BUFFER_END
88             reinterpret_cast<uint32_t *>(cpuAddrBo)[1] = 0;          // MI_NOOP
89             memoryForPinBBs.push_back(cpuAddrBo);
90             DEBUG_BREAK_IF(memoryForPinBBs[rootDeviceIndex] == nullptr);
91         }
92         pinBBs.push_back(createRootDeviceBufferObject(rootDeviceIndex));
93     }
94 
95     initialized = true;
96 }
97 
createRootDeviceBufferObject(uint32_t rootDeviceIndex)98 BufferObject *DrmMemoryManager::createRootDeviceBufferObject(uint32_t rootDeviceIndex) {
99     BufferObject *bo = nullptr;
100     if (forcePinEnabled || validateHostPtrMemory) {
101         bo = allocUserptr(reinterpret_cast<uintptr_t>(memoryForPinBBs[rootDeviceIndex]), MemoryConstants::pageSize, 0, rootDeviceIndex);
102         if (bo) {
103             if (isLimitedRange(rootDeviceIndex)) {
104                 auto boSize = bo->peekSize();
105                 bo->setAddress(acquireGpuRange(boSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD));
106                 UNRECOVERABLE_IF(boSize < bo->peekSize());
107             }
108         } else {
109             alignedFreeWrapper(memoryForPinBBs[rootDeviceIndex]);
110             memoryForPinBBs[rootDeviceIndex] = nullptr;
111             DEBUG_BREAK_IF(true);
112             UNRECOVERABLE_IF(validateHostPtrMemory);
113         }
114     }
115     return bo;
116 }
117 
createDeviceSpecificMemResources(uint32_t rootDeviceIndex)118 void DrmMemoryManager::createDeviceSpecificMemResources(uint32_t rootDeviceIndex) {
119     pinBBs[rootDeviceIndex] = createRootDeviceBufferObject(rootDeviceIndex);
120 }
121 
~DrmMemoryManager()122 DrmMemoryManager::~DrmMemoryManager() {
123     for (auto &memoryForPinBB : memoryForPinBBs) {
124         if (memoryForPinBB) {
125             MemoryManager::alignedFreeWrapper(memoryForPinBB);
126         }
127     }
128 }
129 
releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex)130 void DrmMemoryManager::releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex) {
131     return releaseBufferObject(rootDeviceIndex);
132 }
133 
releaseBufferObject(uint32_t rootDeviceIndex)134 void DrmMemoryManager::releaseBufferObject(uint32_t rootDeviceIndex) {
135     if (auto bo = pinBBs[rootDeviceIndex]) {
136         if (isLimitedRange(rootDeviceIndex)) {
137             releaseGpuRange(reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(), rootDeviceIndex);
138         }
139         DrmMemoryManager::unreference(bo, true);
140         pinBBs[rootDeviceIndex] = nullptr;
141     }
142 }
143 
commonCleanup()144 void DrmMemoryManager::commonCleanup() {
145     if (gemCloseWorker) {
146         gemCloseWorker->close(true);
147     }
148 
149     for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < pinBBs.size(); ++rootDeviceIndex) {
150         releaseBufferObject(rootDeviceIndex);
151     }
152     pinBBs.clear();
153 }
154 
eraseSharedBufferObject(NEO::BufferObject * bo)155 void DrmMemoryManager::eraseSharedBufferObject(NEO::BufferObject *bo) {
156     auto it = std::find(sharingBufferObjects.begin(), sharingBufferObjects.end(), bo);
157     DEBUG_BREAK_IF(it == sharingBufferObjects.end());
158     releaseGpuRange(reinterpret_cast<void *>((*it)->peekAddress()), (*it)->peekUnmapSize(), this->getRootDeviceIndex(bo->peekDrm()));
159     sharingBufferObjects.erase(it);
160 }
161 
pushSharedBufferObject(NEO::BufferObject * bo)162 void DrmMemoryManager::pushSharedBufferObject(NEO::BufferObject *bo) {
163     bo->markAsReusableAllocation();
164     sharingBufferObjects.push_back(bo);
165 }
166 
unreference(NEO::BufferObject * bo,bool synchronousDestroy)167 uint32_t DrmMemoryManager::unreference(NEO::BufferObject *bo, bool synchronousDestroy) {
168     if (!bo)
169         return -1;
170 
171     if (synchronousDestroy) {
172         while (bo->getRefCount() > 1)
173             ;
174     }
175 
176     std::unique_lock<std::mutex> lock(mtx, std::defer_lock);
177     if (bo->peekIsReusableAllocation()) {
178         lock.lock();
179     }
180 
181     uint32_t r = bo->unreference();
182 
183     if (r == 1) {
184         if (bo->peekIsReusableAllocation()) {
185             eraseSharedBufferObject(bo);
186         }
187 
188         bo->close();
189 
190         if (lock) {
191             lock.unlock();
192         }
193 
194         delete bo;
195     }
196     return r;
197 }
198 
acquireGpuRange(size_t & size,uint32_t rootDeviceIndex,HeapIndex heapIndex)199 uint64_t DrmMemoryManager::acquireGpuRange(size_t &size, uint32_t rootDeviceIndex, HeapIndex heapIndex) {
200     auto gfxPartition = getGfxPartition(rootDeviceIndex);
201     return GmmHelper::canonize(gfxPartition->heapAllocate(heapIndex, size));
202 }
203 
releaseGpuRange(void * address,size_t unmapSize,uint32_t rootDeviceIndex)204 void DrmMemoryManager::releaseGpuRange(void *address, size_t unmapSize, uint32_t rootDeviceIndex) {
205     uint64_t graphicsAddress = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(address));
206     graphicsAddress = GmmHelper::decanonize(graphicsAddress);
207     auto gfxPartition = getGfxPartition(rootDeviceIndex);
208     gfxPartition->freeGpuAddressRange(graphicsAddress, unmapSize);
209 }
210 
isKmdMigrationAvailable(uint32_t rootDeviceIndex)211 bool DrmMemoryManager::isKmdMigrationAvailable(uint32_t rootDeviceIndex) {
212     auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
213     auto &hwHelper = NEO::HwHelper::get(hwInfo->platform.eRenderCoreFamily);
214 
215     auto useKmdMigration = hwHelper.isKmdMigrationSupported(*hwInfo);
216 
217     if (DebugManager.flags.UseKmdMigration.get() != -1) {
218         useKmdMigration = DebugManager.flags.UseKmdMigration.get();
219     }
220 
221     return useKmdMigration;
222 }
223 
setMemAdvise(GraphicsAllocation * gfxAllocation,MemAdviseFlags flags,uint32_t rootDeviceIndex)224 bool DrmMemoryManager::setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) {
225     auto drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
226 
227     return drmAllocation->setMemAdvise(&this->getDrm(rootDeviceIndex), flags);
228 }
229 
allocUserptr(uintptr_t address,size_t size,uint64_t flags,uint32_t rootDeviceIndex)230 NEO::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, uint32_t rootDeviceIndex) {
231     drm_i915_gem_userptr userptr = {};
232     userptr.user_ptr = address;
233     userptr.user_size = size;
234     userptr.flags = static_cast<uint32_t>(flags);
235 
236     if (this->getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_USERPTR, &userptr) != 0) {
237         if (errno == ENODEV && userptr.flags == 0) {
238             userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
239             if (this->getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_USERPTR, &userptr) != 0) {
240                 if (geteuid() != 0) {
241                     printDebugString(true, stderr, "%s", "ioctl(I915_GEM_USERPTR) failed. Try running as root but expect poor stability.\n");
242                 }
243                 return nullptr;
244             }
245         } else {
246             return nullptr;
247         }
248     }
249 
250     PRINT_DEBUG_STRING(DebugManager.flags.PrintBOCreateDestroyResult.get(), stdout, "Created new BO with GEM_USERPTR, handle: BO-%d\n", userptr.handle);
251 
252     auto res = new (std::nothrow) BufferObject(&getDrm(rootDeviceIndex), userptr.handle, size, maxOsContextCount);
253     if (!res) {
254         DEBUG_BREAK_IF(true);
255         return nullptr;
256     }
257     res->setAddress(address);
258 
259     return res;
260 }
261 
emitPinningRequest(BufferObject * bo,const AllocationData & allocationData) const262 void DrmMemoryManager::emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const {
263     auto rootDeviceIndex = allocationData.rootDeviceIndex;
264     if (forcePinEnabled && pinBBs.at(rootDeviceIndex) != nullptr && allocationData.flags.forcePin && allocationData.size >= this->pinThreshold) {
265         pinBBs.at(rootDeviceIndex)->pin(&bo, 1, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, 0, getDefaultDrmContextId(rootDeviceIndex));
266     }
267 }
268 
createGraphicsAllocation(OsHandleStorage & handleStorage,const AllocationData & allocationData)269 DrmAllocation *DrmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) {
270     auto hostPtr = const_cast<void *>(allocationData.hostPtr);
271     auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, nullptr, hostPtr, castToUint64(hostPtr), allocationData.size, MemoryPool::System4KBPages);
272     allocation->fragmentsStorage = handleStorage;
273     if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
274         return nullptr;
275     }
276     return allocation.release();
277 }
278 
allocateGraphicsMemoryWithAlignment(const AllocationData & allocationData)279 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) {
280     if (allocationData.type == NEO::GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA) {
281         return createMultiHostAllocation(allocationData);
282     }
283 
284     return allocateGraphicsMemoryWithAlignmentImpl(allocationData);
285 }
286 
allocateGraphicsMemoryWithAlignmentImpl(const AllocationData & allocationData)287 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const AllocationData &allocationData) {
288     const size_t minAlignment = getUserptrAlignment();
289     size_t cAlignment = alignUp(std::max(allocationData.alignment, minAlignment), minAlignment);
290     // When size == 0 allocate allocationAlignment
291     // It's needed to prevent overlapping pages with user pointers
292     size_t cSize = std::max(alignUp(allocationData.size, minAlignment), minAlignment);
293 
294     uint64_t gpuReservationAddress = 0;
295     uint64_t alignedGpuAddress = 0;
296     size_t alignedStorageSize = cSize;
297     size_t alignedVirtualAdressRangeSize = cSize;
298     auto svmCpuAllocation = allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU;
299     if (svmCpuAllocation) {
300         //add padding in case reserved addr is not aligned
301         alignedStorageSize = alignUp(cSize, cAlignment);
302         alignedVirtualAdressRangeSize = alignedStorageSize + cAlignment;
303     }
304 
305     // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required
306     if ((isLimitedRange(allocationData.rootDeviceIndex) || svmCpuAllocation) && !allocationData.flags.isUSMHostAllocation) {
307         gpuReservationAddress = acquireGpuRange(alignedVirtualAdressRangeSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
308         if (!gpuReservationAddress) {
309             return nullptr;
310         }
311 
312         alignedGpuAddress = gpuReservationAddress;
313         if (svmCpuAllocation) {
314             alignedGpuAddress = alignUp(gpuReservationAddress, cAlignment);
315         }
316     }
317 
318     auto drmAllocation = createAllocWithAlignment(allocationData, cSize, cAlignment, alignedStorageSize, alignedGpuAddress);
319     if (drmAllocation != nullptr) {
320         drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(gpuReservationAddress), alignedVirtualAdressRangeSize);
321     }
322 
323     return drmAllocation;
324 }
325 
createAllocWithAlignmentFromUserptr(const AllocationData & allocationData,size_t size,size_t alignment,size_t alignedSVMSize,uint64_t gpuAddress)326 DrmAllocation *DrmMemoryManager::createAllocWithAlignmentFromUserptr(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress) {
327     auto res = alignedMallocWrapper(size, alignment);
328     if (!res) {
329         return nullptr;
330     }
331 
332     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(res), size, 0, allocationData.rootDeviceIndex));
333     if (!bo) {
334         alignedFreeWrapper(res);
335         return nullptr;
336     }
337 
338     zeroCpuMemoryIfRequested(allocationData, res, size);
339     obtainGpuAddress(allocationData, bo.get(), gpuAddress);
340     emitPinningRequest(bo.get(), allocationData);
341 
342     auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->peekAddress(), size, MemoryPool::System4KBPages);
343     allocation->setDriverAllocatedCpuPtr(res);
344     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSVMSize);
345     if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
346         alignedFreeWrapper(res);
347         return nullptr;
348     }
349 
350     bo.release();
351 
352     return allocation.release();
353 }
354 
obtainGpuAddress(const AllocationData & allocationData,BufferObject * bo,uint64_t gpuAddress)355 void DrmMemoryManager::obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress) {
356     if ((isLimitedRange(allocationData.rootDeviceIndex) || allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) &&
357         !allocationData.flags.isUSMHostAllocation) {
358         bo->setAddress(gpuAddress);
359     }
360 }
361 
allocateUSMHostGraphicsMemory(const AllocationData & allocationData)362 DrmAllocation *DrmMemoryManager::allocateUSMHostGraphicsMemory(const AllocationData &allocationData) {
363     const size_t minAlignment = getUserptrAlignment();
364     // When size == 0 allocate allocationAlignment
365     // It's needed to prevent overlapping pages with user pointers
366     size_t cSize = std::max(alignUp(allocationData.size, minAlignment), minAlignment);
367 
368     void *bufferPtr = const_cast<void *>(allocationData.hostPtr);
369     DEBUG_BREAK_IF(nullptr == bufferPtr);
370 
371     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(bufferPtr),
372                                                                          cSize,
373                                                                          0,
374                                                                          allocationData.rootDeviceIndex));
375     if (!bo) {
376         return nullptr;
377     }
378 
379     // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required
380     uint64_t gpuAddress = 0;
381     if (isLimitedRange(allocationData.rootDeviceIndex)) {
382         gpuAddress = acquireGpuRange(cSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
383         if (!gpuAddress) {
384             return nullptr;
385         }
386         bo->setAddress(gpuAddress);
387     }
388 
389     emitPinningRequest(bo.get(), allocationData);
390 
391     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex,
392                                         allocationData.type,
393                                         bo.get(),
394                                         bufferPtr,
395                                         bo->peekAddress(),
396                                         cSize,
397                                         MemoryPool::System4KBPages);
398 
399     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), cSize);
400     bo.release();
401 
402     return allocation;
403 }
404 
allocateGraphicsMemoryWithHostPtr(const AllocationData & allocationData)405 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) {
406     auto res = static_cast<DrmAllocation *>(MemoryManager::allocateGraphicsMemoryWithHostPtr(allocationData));
407 
408     if (res != nullptr && !validateHostPtrMemory) {
409         emitPinningRequest(res->getBO(), allocationData);
410     }
411     return res;
412 }
413 
allocateGraphicsMemoryWithGpuVa(const AllocationData & allocationData)414 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) {
415     auto osContextLinux = static_cast<OsContextLinux *>(allocationData.osContext);
416 
417     const size_t minAlignment = getUserptrAlignment();
418     size_t alignedSize = alignUp(allocationData.size, minAlignment);
419 
420     auto res = alignedMallocWrapper(alignedSize, minAlignment);
421     if (!res)
422         return nullptr;
423 
424     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(res), alignedSize, 0, allocationData.rootDeviceIndex));
425 
426     if (!bo) {
427         alignedFreeWrapper(res);
428         return nullptr;
429     }
430 
431     UNRECOVERABLE_IF(allocationData.gpuAddress == 0);
432     bo->setAddress(allocationData.gpuAddress);
433 
434     BufferObject *boPtr = bo.get();
435     if (forcePinEnabled && pinBBs.at(allocationData.rootDeviceIndex) != nullptr && alignedSize >= this->pinThreshold) {
436         pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, osContextLinux, 0, osContextLinux->getDrmContextIds()[0]);
437     }
438 
439     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->peekAddress(), alignedSize, MemoryPool::System4KBPages);
440     allocation->setDriverAllocatedCpuPtr(res);
441     bo.release();
442 
443     return allocation;
444 }
445 
allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData & allocationData)446 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) {
447     if (allocationData.size == 0 || !allocationData.hostPtr)
448         return nullptr;
449 
450     auto alignedPtr = alignDown(allocationData.hostPtr, MemoryConstants::pageSize);
451     auto alignedSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size);
452     auto realAllocationSize = alignedSize;
453     auto offsetInPage = ptrDiff(allocationData.hostPtr, alignedPtr);
454     auto rootDeviceIndex = allocationData.rootDeviceIndex;
455 
456     auto gpuVirtualAddress = acquireGpuRange(alignedSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
457     if (!gpuVirtualAddress) {
458         return nullptr;
459     }
460 
461     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(alignedPtr), realAllocationSize, 0, rootDeviceIndex));
462     if (!bo) {
463         releaseGpuRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize, rootDeviceIndex);
464         return nullptr;
465     }
466 
467     bo->setAddress(gpuVirtualAddress);
468 
469     if (validateHostPtrMemory) {
470         auto boPtr = bo.get();
471         auto vmHandleId = Math::getMinLsbSet(static_cast<uint32_t>(allocationData.storageInfo.subDeviceBitfield.to_ulong()));
472         int result = pinBBs.at(rootDeviceIndex)->validateHostPtr(&boPtr, 1, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, vmHandleId, getDefaultDrmContextId(rootDeviceIndex));
473         if (result != 0) {
474             unreference(bo.release(), true);
475             releaseGpuRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize, rootDeviceIndex);
476             return nullptr;
477         }
478     }
479 
480     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), const_cast<void *>(allocationData.hostPtr),
481                                         gpuVirtualAddress, allocationData.size, MemoryPool::System4KBPages);
482     allocation->setAllocationOffset(offsetInPage);
483 
484     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize);
485     bo.release();
486     return allocation;
487 }
488 
allocateGraphicsMemory64kb(const AllocationData & allocationData)489 DrmAllocation *DrmMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) {
490     return nullptr;
491 }
492 
allocateMemoryByKMD(const AllocationData & allocationData)493 GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &allocationData) {
494     auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, 0u, false);
495     size_t bufferSize = allocationData.size;
496     uint64_t gpuRange = acquireGpuRange(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB);
497 
498     drm_i915_gem_create create = {0, 0, 0};
499     create.size = bufferSize;
500 
501     [[maybe_unused]] auto ret = this->getDrm(allocationData.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
502     DEBUG_BREAK_IF(ret != 0);
503 
504     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new BufferObject(&getDrm(allocationData.rootDeviceIndex), create.handle, bufferSize, maxOsContextCount));
505     bo->setAddress(gpuRange);
506 
507     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), nullptr, gpuRange, bufferSize, MemoryPool::SystemCpuInaccessible);
508     allocation->setDefaultGmm(gmm.release());
509 
510     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), bufferSize);
511     bo.release();
512     return allocation;
513 }
514 
allocateGraphicsMemoryForImageImpl(const AllocationData & allocationData,std::unique_ptr<Gmm> gmm)515 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) {
516     if (allocationData.imgInfo->linearStorage) {
517         auto alloc = allocateGraphicsMemoryWithAlignment(allocationData);
518         if (alloc) {
519             alloc->setDefaultGmm(gmm.release());
520         }
521         return alloc;
522     }
523 
524     uint64_t gpuRange = acquireGpuRange(allocationData.imgInfo->size, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
525 
526     drm_i915_gem_create create = {0, 0, 0};
527     create.size = allocationData.imgInfo->size;
528 
529     [[maybe_unused]] auto ret = this->getDrm(allocationData.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
530     DEBUG_BREAK_IF(ret != 0);
531 
532     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new (std::nothrow) BufferObject(&getDrm(allocationData.rootDeviceIndex), create.handle, allocationData.imgInfo->size, maxOsContextCount));
533     if (!bo) {
534         return nullptr;
535     }
536     bo->setAddress(gpuRange);
537 
538     [[maybe_unused]] auto ret2 = bo->setTiling(I915_TILING_Y, static_cast<uint32_t>(allocationData.imgInfo->rowPitch));
539     DEBUG_BREAK_IF(ret2 != true);
540 
541     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), nullptr, gpuRange, allocationData.imgInfo->size, MemoryPool::SystemCpuInaccessible);
542     allocation->setDefaultGmm(gmm.release());
543 
544     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), allocationData.imgInfo->size);
545     bo.release();
546     return allocation;
547 }
548 
allocate32BitGraphicsMemoryImpl(const AllocationData & allocationData,bool useLocalMemory)549 DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) {
550     auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
551     auto allocatorToUse = heapAssigner.get32BitHeapIndex(allocationData.type, useLocalMemory, *hwInfo, allocationData.flags.use32BitFrontWindow);
552 
553     if (allocationData.hostPtr) {
554         uintptr_t inputPtr = reinterpret_cast<uintptr_t>(allocationData.hostPtr);
555         auto allocationSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size);
556         auto realAllocationSize = allocationSize;
557         auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
558         auto gpuVirtualAddress = gfxPartition->heapAllocate(allocatorToUse, realAllocationSize);
559         if (!gpuVirtualAddress) {
560             return nullptr;
561         }
562         auto alignedUserPointer = reinterpret_cast<uintptr_t>(alignDown(allocationData.hostPtr, MemoryConstants::pageSize));
563         auto inputPointerOffset = inputPtr - alignedUserPointer;
564 
565         std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(alignedUserPointer, allocationSize, 0, allocationData.rootDeviceIndex));
566         if (!bo) {
567             gfxPartition->heapFree(allocatorToUse, gpuVirtualAddress, realAllocationSize);
568             return nullptr;
569         }
570 
571         bo->setAddress(gpuVirtualAddress);
572         auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), const_cast<void *>(allocationData.hostPtr), GmmHelper::canonize(ptrOffset(gpuVirtualAddress, inputPointerOffset)),
573                                             allocationSize, MemoryPool::System4KBPagesWith32BitGpuAddressing);
574         allocation->set32BitAllocation(true);
575         allocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(allocatorToUse)));
576         allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVirtualAddress), realAllocationSize);
577         bo.release();
578         return allocation;
579     }
580 
581     size_t alignedAllocationSize = alignUp(allocationData.size, MemoryConstants::pageSize);
582     auto allocationSize = alignedAllocationSize;
583     auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
584     auto gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize);
585 
586     if (!gpuVA) {
587         return nullptr;
588     }
589 
590     auto ptrAlloc = alignedMallocWrapper(alignedAllocationSize, getUserptrAlignment());
591 
592     if (!ptrAlloc) {
593         gfxPartition->heapFree(allocatorToUse, gpuVA, allocationSize);
594         return nullptr;
595     }
596 
597     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(ptrAlloc), alignedAllocationSize, 0, allocationData.rootDeviceIndex));
598 
599     if (!bo) {
600         alignedFreeWrapper(ptrAlloc);
601         gfxPartition->heapFree(allocatorToUse, gpuVA, allocationSize);
602         return nullptr;
603     }
604 
605     bo->setAddress(gpuVA);
606 
607     // softpin to the GPU address, res if it uses limitedRange Allocation
608     auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), ptrAlloc, GmmHelper::canonize(gpuVA), alignedAllocationSize,
609                                         MemoryPool::System4KBPagesWith32BitGpuAddressing);
610 
611     allocation->set32BitAllocation(true);
612     allocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(allocatorToUse)));
613     allocation->setDriverAllocatedCpuPtr(ptrAlloc);
614     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVA), allocationSize);
615     bo.release();
616     return allocation;
617 }
618 
findAndReferenceSharedBufferObject(int boHandle,uint32_t rootDeviceIndex)619 BufferObject *DrmMemoryManager::findAndReferenceSharedBufferObject(int boHandle, uint32_t rootDeviceIndex) {
620     BufferObject *bo = nullptr;
621     for (const auto &i : sharingBufferObjects) {
622         if (i->getHandle() == boHandle && i->getRootDeviceIndex() == rootDeviceIndex) {
623             bo = i;
624             bo->reference();
625             break;
626         }
627     }
628 
629     return bo;
630 }
631 
createGraphicsAllocationFromSharedHandle(osHandle handle,const AllocationProperties & properties,bool requireSpecificBitness,bool isHostIpcAllocation)632 GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) {
633     if (isHostIpcAllocation) {
634         return createUSMHostAllocationFromSharedHandle(handle, properties, false);
635     }
636 
637     std::unique_lock<std::mutex> lock(mtx);
638 
639     drm_prime_handle openFd = {0, 0, 0};
640     openFd.fd = handle;
641 
642     auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
643 
644     if (ret != 0) {
645         [[maybe_unused]] int err = errno;
646         PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
647 
648         return nullptr;
649     }
650 
651     auto boHandle = openFd.handle;
652     auto bo = findAndReferenceSharedBufferObject(boHandle, properties.rootDeviceIndex);
653 
654     if (bo == nullptr) {
655         size_t size = lseekFunction(handle, 0, SEEK_END);
656 
657         bo = new (std::nothrow) BufferObject(&getDrm(properties.rootDeviceIndex), boHandle, size, maxOsContextCount);
658 
659         if (!bo) {
660             return nullptr;
661         }
662 
663         auto heapIndex = isLocalMemorySupported(properties.rootDeviceIndex) ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD;
664         if (requireSpecificBitness && this->force32bitAllocations) {
665             heapIndex = HeapIndex::HEAP_EXTERNAL;
666         }
667         auto gpuRange = acquireGpuRange(size, properties.rootDeviceIndex, heapIndex);
668 
669         bo->setAddress(gpuRange);
670         bo->setUnmapSize(size);
671         bo->setRootDeviceIndex(properties.rootDeviceIndex);
672 
673         pushSharedBufferObject(bo);
674     }
675 
676     lock.unlock();
677 
678     auto drmAllocation = new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
679                                            handle, MemoryPool::SystemCpuInaccessible);
680 
681     if (requireSpecificBitness && this->force32bitAllocations) {
682         drmAllocation->set32BitAllocation(true);
683         drmAllocation->setGpuBaseAddress(GmmHelper::canonize(getExternalHeapBaseAddress(properties.rootDeviceIndex, drmAllocation->isAllocatedInLocalMemoryPool())));
684     }
685 
686     if (properties.imgInfo) {
687         drm_i915_gem_get_tiling getTiling = {0};
688         getTiling.handle = boHandle;
689         ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_GET_TILING, &getTiling);
690 
691         if (ret == 0) {
692             if (getTiling.tiling_mode == I915_TILING_NONE) {
693                 properties.imgInfo->linearStorage = true;
694             }
695         }
696 
697         Gmm *gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->getGmmClientContext(), *properties.imgInfo,
698                            createStorageInfoFromProperties(properties), properties.flags.preferCompressed);
699         drmAllocation->setDefaultGmm(gmm);
700     }
701     return drmAllocation;
702 }
703 
closeSharedHandle(GraphicsAllocation * gfxAllocation)704 void DrmMemoryManager::closeSharedHandle(GraphicsAllocation *gfxAllocation) {
705     DrmAllocation *drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
706     if (drmAllocation->peekSharedHandle() != Sharing::nonSharedResource) {
707         closeFunction(drmAllocation->peekSharedHandle());
708         drmAllocation->setSharedHandle(Sharing::nonSharedResource);
709     }
710 }
711 
createPaddedAllocation(GraphicsAllocation * inputGraphicsAllocation,size_t sizeWithPadding)712 GraphicsAllocation *DrmMemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
713     uint64_t gpuRange = 0llu;
714 
715     auto rootDeviceIndex = inputGraphicsAllocation->getRootDeviceIndex();
716     gpuRange = acquireGpuRange(sizeWithPadding, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
717 
718     void *srcPtr = nullptr;
719     auto drmInputAllocation = static_cast<DrmAllocation *>(inputGraphicsAllocation);
720     if (drmInputAllocation->getMmapPtr()) {
721         auto bo = drmInputAllocation->getBO();
722         drm_i915_gem_mmap mmap_arg = {};
723         mmap_arg.handle = bo->peekHandle();
724         mmap_arg.size = bo->peekSize();
725         if (getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_MMAP, &mmap_arg) != 0) {
726             return nullptr;
727         }
728         srcPtr = addrToPtr(mmap_arg.addr_ptr);
729         inputGraphicsAllocation->lock(srcPtr);
730     } else {
731         srcPtr = inputGraphicsAllocation->getUnderlyingBuffer();
732     }
733     auto srcSize = inputGraphicsAllocation->getUnderlyingBufferSize();
734     auto alignedSrcSize = alignUp(srcSize, MemoryConstants::pageSize);
735     auto alignedPtr = reinterpret_cast<uintptr_t>(alignDown(srcPtr, MemoryConstants::pageSize));
736     auto offset = ptrDiff(srcPtr, alignedPtr);
737 
738     std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(alignedPtr, alignedSrcSize, 0, rootDeviceIndex));
739     if (!bo) {
740         return nullptr;
741     }
742     bo->setAddress(gpuRange);
743     auto allocation = new DrmAllocation(rootDeviceIndex, inputGraphicsAllocation->getAllocationType(), bo.get(), srcPtr, GmmHelper::canonize(ptrOffset(gpuRange, offset)), sizeWithPadding,
744                                         inputGraphicsAllocation->getMemoryPool());
745 
746     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), sizeWithPadding);
747     bo.release();
748     return allocation;
749 }
750 
addAllocationToHostPtrManager(GraphicsAllocation * gfxAllocation)751 void DrmMemoryManager::addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) {
752     DrmAllocation *drmMemory = static_cast<DrmAllocation *>(gfxAllocation);
753 
754     FragmentStorage fragment = {};
755     fragment.driverAllocation = true;
756     fragment.fragmentCpuPointer = gfxAllocation->getUnderlyingBuffer();
757     fragment.fragmentSize = alignUp(gfxAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize);
758 
759     auto osHandle = new OsHandleLinux();
760     osHandle->bo = drmMemory->getBO();
761 
762     fragment.osInternalStorage = osHandle;
763     fragment.residency = new ResidencyData(maxOsContextCount);
764     hostPtrManager->storeFragment(gfxAllocation->getRootDeviceIndex(), fragment);
765 }
766 
removeAllocationFromHostPtrManager(GraphicsAllocation * gfxAllocation)767 void DrmMemoryManager::removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) {
768     auto buffer = gfxAllocation->getUnderlyingBuffer();
769     auto fragment = hostPtrManager->getFragment({buffer, gfxAllocation->getRootDeviceIndex()});
770     if (fragment && fragment->driverAllocation) {
771         OsHandle *osStorageToRelease = fragment->osInternalStorage;
772         ResidencyData *residencyDataToRelease = fragment->residency;
773         if (hostPtrManager->releaseHostPtr(gfxAllocation->getRootDeviceIndex(), buffer)) {
774             delete osStorageToRelease;
775             delete residencyDataToRelease;
776         }
777     }
778 }
779 
freeGraphicsMemoryImpl(GraphicsAllocation * gfxAllocation)780 void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) {
781     if (DebugManager.flags.DoNotFreeResources.get()) {
782         return;
783     }
784     DrmAllocation *drmAlloc = static_cast<DrmAllocation *>(gfxAllocation);
785     this->unregisterAllocation(gfxAllocation);
786 
787     for (auto &engine : this->registeredEngines) {
788         auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->memoryOperationsInterface.get());
789         memoryOperationsInterface->evictWithinOsContext(engine.osContext, *gfxAllocation);
790     }
791 
792     if (drmAlloc->getMmapPtr()) {
793         this->munmapFunction(drmAlloc->getMmapPtr(), drmAlloc->getMmapSize());
794     }
795 
796     for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
797         delete gfxAllocation->getGmm(handleId);
798     }
799 
800     if (gfxAllocation->fragmentsStorage.fragmentCount) {
801         cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation);
802     } else {
803         auto &bos = static_cast<DrmAllocation *>(gfxAllocation)->getBOs();
804         for (auto bo : bos) {
805             unreference(bo, bo && bo->peekIsReusableAllocation() ? false : true);
806         }
807         closeSharedHandle(gfxAllocation);
808     }
809 
810     releaseGpuRange(gfxAllocation->getReservedAddressPtr(), gfxAllocation->getReservedAddressSize(), gfxAllocation->getRootDeviceIndex());
811     alignedFreeWrapper(gfxAllocation->getDriverAllocatedCpuPtr());
812 
813     drmAlloc->freeRegisteredBOBindExtHandles(&getDrm(drmAlloc->getRootDeviceIndex()));
814 
815     delete gfxAllocation;
816 }
817 
handleFenceCompletion(GraphicsAllocation * allocation)818 void DrmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
819     if (this->getDrm(allocation->getRootDeviceIndex()).isVmBindAvailable()) {
820         waitForEnginesCompletion(*allocation);
821     } else {
822         static_cast<DrmAllocation *>(allocation)->getBO()->wait(-1);
823     }
824 }
825 
createGraphicsAllocationFromExistingStorage(AllocationProperties & properties,void * ptr,MultiGraphicsAllocation & multiGraphicsAllocation)826 GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromExistingStorage(AllocationProperties &properties, void *ptr, MultiGraphicsAllocation &multiGraphicsAllocation) {
827     auto defaultAlloc = multiGraphicsAllocation.getDefaultGraphicsAllocation();
828     if (static_cast<DrmAllocation *>(defaultAlloc)->getMmapPtr()) {
829         properties.size = defaultAlloc->getUnderlyingBufferSize();
830         properties.gpuAddress = castToUint64(ptr);
831 
832         auto internalHandle = defaultAlloc->peekInternalHandle(this);
833         return createUSMHostAllocationFromSharedHandle(static_cast<osHandle>(internalHandle), properties, true);
834     } else {
835         return allocateGraphicsMemoryWithProperties(properties, ptr);
836     }
837 }
838 
getSystemSharedMemory(uint32_t rootDeviceIndex)839 uint64_t DrmMemoryManager::getSystemSharedMemory(uint32_t rootDeviceIndex) {
840     uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES));
841 
842     drm_i915_gem_context_param getContextParam = {};
843     getContextParam.param = I915_CONTEXT_PARAM_GTT_SIZE;
844     [[maybe_unused]] auto ret = getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &getContextParam);
845     DEBUG_BREAK_IF(ret != 0);
846 
847     uint64_t gpuMemorySize = getContextParam.value;
848 
849     return std::min(hostMemorySize, gpuMemorySize);
850 }
851 
getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex)852 double DrmMemoryManager::getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex) {
853     if (isLocalMemorySupported(rootDeviceIndex)) {
854         return 0.95;
855     }
856     return 0.8;
857 }
858 
populateOsHandles(OsHandleStorage & handleStorage,uint32_t rootDeviceIndex)859 MemoryManager::AllocationStatus DrmMemoryManager::populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) {
860     BufferObject *allocatedBos[maxFragmentsCount];
861     uint32_t numberOfBosAllocated = 0;
862     uint32_t indexesOfAllocatedBos[maxFragmentsCount];
863 
864     for (unsigned int i = 0; i < maxFragmentsCount; i++) {
865         // If there is no fragment it means it already exists.
866         if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].fragmentSize) {
867             auto osHandle = new OsHandleLinux();
868 
869             handleStorage.fragmentStorageData[i].osHandleStorage = osHandle;
870             handleStorage.fragmentStorageData[i].residency = new ResidencyData(maxOsContextCount);
871 
872             osHandle->bo = allocUserptr((uintptr_t)handleStorage.fragmentStorageData[i].cpuPtr,
873                                         handleStorage.fragmentStorageData[i].fragmentSize,
874                                         0, rootDeviceIndex);
875             if (!osHandle->bo) {
876                 handleStorage.fragmentStorageData[i].freeTheFragment = true;
877                 return AllocationStatus::Error;
878             }
879 
880             allocatedBos[numberOfBosAllocated] = osHandle->bo;
881             indexesOfAllocatedBos[numberOfBosAllocated] = i;
882             numberOfBosAllocated++;
883         }
884     }
885 
886     if (validateHostPtrMemory) {
887         int result = pinBBs.at(rootDeviceIndex)->validateHostPtr(allocatedBos, numberOfBosAllocated, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, 0, getDefaultDrmContextId(rootDeviceIndex));
888 
889         if (result == EFAULT) {
890             for (uint32_t i = 0; i < numberOfBosAllocated; i++) {
891                 handleStorage.fragmentStorageData[indexesOfAllocatedBos[i]].freeTheFragment = true;
892             }
893             return AllocationStatus::InvalidHostPointer;
894         } else if (result != 0) {
895             return AllocationStatus::Error;
896         }
897     }
898 
899     for (uint32_t i = 0; i < numberOfBosAllocated; i++) {
900         hostPtrManager->storeFragment(rootDeviceIndex, handleStorage.fragmentStorageData[indexesOfAllocatedBos[i]]);
901     }
902     return AllocationStatus::Success;
903 }
904 
cleanOsHandles(OsHandleStorage & handleStorage,uint32_t rootDeviceIndex)905 void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) {
906     for (unsigned int i = 0; i < maxFragmentsCount; i++) {
907         if (handleStorage.fragmentStorageData[i].freeTheFragment) {
908             auto osHandle = static_cast<OsHandleLinux *>(handleStorage.fragmentStorageData[i].osHandleStorage);
909             if (osHandle->bo) {
910                 BufferObject *search = osHandle->bo;
911                 search->wait(-1);
912                 [[maybe_unused]] auto refCount = unreference(search, true);
913                 DEBUG_BREAK_IF(refCount != 1u);
914             }
915             delete handleStorage.fragmentStorageData[i].osHandleStorage;
916             handleStorage.fragmentStorageData[i].osHandleStorage = nullptr;
917             delete handleStorage.fragmentStorageData[i].residency;
918             handleStorage.fragmentStorageData[i].residency = nullptr;
919         }
920     }
921 }
922 
setDomainCpu(GraphicsAllocation & graphicsAllocation,bool writeEnable)923 bool DrmMemoryManager::setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable) {
924     DEBUG_BREAK_IF(writeEnable); //unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource)
925 
926     auto bo = static_cast<DrmAllocation *>(&graphicsAllocation)->getBO();
927     if (bo == nullptr)
928         return false;
929 
930     // move a buffer object to the CPU read, and possibly write domain, including waiting on flushes to occur
931     drm_i915_gem_set_domain set_domain = {};
932     set_domain.handle = bo->peekHandle();
933     set_domain.read_domains = I915_GEM_DOMAIN_CPU;
934     set_domain.write_domain = writeEnable ? I915_GEM_DOMAIN_CPU : 0;
935 
936     return getDrm(graphicsAllocation.getRootDeviceIndex()).ioctl(DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0;
937 }
938 
lockResourceImpl(GraphicsAllocation & graphicsAllocation)939 void *DrmMemoryManager::lockResourceImpl(GraphicsAllocation &graphicsAllocation) {
940     if (MemoryPool::LocalMemory == graphicsAllocation.getMemoryPool()) {
941         return lockResourceInLocalMemoryImpl(graphicsAllocation);
942     }
943 
944     auto cpuPtr = graphicsAllocation.getUnderlyingBuffer();
945     if (cpuPtr != nullptr) {
946         [[maybe_unused]] auto success = setDomainCpu(graphicsAllocation, false);
947         DEBUG_BREAK_IF(!success);
948         return cpuPtr;
949     }
950 
951     auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
952     if (bo == nullptr)
953         return nullptr;
954 
955     drm_i915_gem_mmap mmap_arg = {};
956     mmap_arg.handle = bo->peekHandle();
957     mmap_arg.size = bo->peekSize();
958     if (getDrm(graphicsAllocation.getRootDeviceIndex()).ioctl(DRM_IOCTL_I915_GEM_MMAP, &mmap_arg) != 0) {
959         return nullptr;
960     }
961 
962     bo->setLockedAddress(reinterpret_cast<void *>(mmap_arg.addr_ptr));
963 
964     [[maybe_unused]] auto success = setDomainCpu(graphicsAllocation, false);
965     DEBUG_BREAK_IF(!success);
966 
967     return bo->peekLockedAddress();
968 }
969 
unlockResourceImpl(GraphicsAllocation & graphicsAllocation)970 void DrmMemoryManager::unlockResourceImpl(GraphicsAllocation &graphicsAllocation) {
971     if (MemoryPool::LocalMemory == graphicsAllocation.getMemoryPool()) {
972         return unlockResourceInLocalMemoryImpl(static_cast<DrmAllocation &>(graphicsAllocation).getBO());
973     }
974 
975     auto cpuPtr = graphicsAllocation.getUnderlyingBuffer();
976     if (cpuPtr != nullptr) {
977         return;
978     }
979 
980     auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
981     if (bo == nullptr)
982         return;
983 
984     releaseReservedCpuAddressRange(bo->peekLockedAddress(), bo->peekSize(), graphicsAllocation.getRootDeviceIndex());
985 
986     bo->setLockedAddress(nullptr);
987 }
988 
obtainFdFromHandle(int boHandle,uint32_t rootDeviceindex)989 int DrmMemoryManager::obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex) {
990     drm_prime_handle openFd = {0, 0, 0};
991 
992     openFd.flags = DRM_CLOEXEC | DRM_RDWR;
993     openFd.handle = boHandle;
994 
995     getDrm(rootDeviceindex).ioctl(DRM_IOCTL_PRIME_HANDLE_TO_FD, &openFd);
996 
997     return openFd.fd;
998 }
999 
getDefaultDrmContextId(uint32_t rootDeviceIndex) const1000 uint32_t DrmMemoryManager::getDefaultDrmContextId(uint32_t rootDeviceIndex) const {
1001     auto osContextLinux = static_cast<OsContextLinux *>(registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext);
1002     return osContextLinux->getDrmContextIds()[0];
1003 }
1004 
getUserptrAlignment()1005 size_t DrmMemoryManager::getUserptrAlignment() {
1006     auto alignment = MemoryConstants::allocationAlignment;
1007 
1008     if (DebugManager.flags.ForceUserptrAlignment.get() != -1) {
1009         alignment = DebugManager.flags.ForceUserptrAlignment.get() * MemoryConstants::kiloByte;
1010     }
1011 
1012     return alignment;
1013 }
1014 
getDrm(uint32_t rootDeviceIndex) const1015 Drm &DrmMemoryManager::getDrm(uint32_t rootDeviceIndex) const {
1016     return *this->executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as<Drm>();
1017 }
1018 
getRootDeviceIndex(const Drm * drm)1019 uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
1020     auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size();
1021 
1022     for (auto rootDeviceIndex = 0u; rootDeviceIndex < rootDeviceCount; rootDeviceIndex++) {
1023         if (&getDrm(rootDeviceIndex) == drm) {
1024             return rootDeviceIndex;
1025         }
1026     }
1027     return CommonConstants::unspecifiedDeviceIndex;
1028 }
1029 
reserveGpuAddress(size_t size,uint32_t rootDeviceIndex)1030 AddressRange DrmMemoryManager::reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) {
1031     auto gpuVa = acquireGpuRange(size, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
1032     return AddressRange{gpuVa, size};
1033 }
1034 
freeGpuAddress(AddressRange addressRange,uint32_t rootDeviceIndex)1035 void DrmMemoryManager::freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) {
1036     releaseGpuRange(reinterpret_cast<void *>(addressRange.address), addressRange.size, rootDeviceIndex);
1037 }
1038 
acquireAllocLock()1039 std::unique_lock<std::mutex> DrmMemoryManager::acquireAllocLock() {
1040     return std::unique_lock<std::mutex>(this->allocMutex);
1041 }
1042 
getSysMemAllocs()1043 std::vector<GraphicsAllocation *> &DrmMemoryManager::getSysMemAllocs() {
1044     return this->sysMemAllocs;
1045 }
1046 
getLocalMemAllocs(uint32_t rootDeviceIndex)1047 std::vector<GraphicsAllocation *> &DrmMemoryManager::getLocalMemAllocs(uint32_t rootDeviceIndex) {
1048     return this->localMemAllocs[rootDeviceIndex];
1049 }
1050 
registerSysMemAlloc(GraphicsAllocation * allocation)1051 void DrmMemoryManager::registerSysMemAlloc(GraphicsAllocation *allocation) {
1052     std::lock_guard<std::mutex> lock(this->allocMutex);
1053     this->sysMemAllocs.push_back(allocation);
1054 }
1055 
registerLocalMemAlloc(GraphicsAllocation * allocation,uint32_t rootDeviceIndex)1056 void DrmMemoryManager::registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex) {
1057     std::lock_guard<std::mutex> lock(this->allocMutex);
1058     this->localMemAllocs[rootDeviceIndex].push_back(allocation);
1059 }
unregisterAllocation(GraphicsAllocation * allocation)1060 void DrmMemoryManager::unregisterAllocation(GraphicsAllocation *allocation) {
1061     std::lock_guard<std::mutex> lock(this->allocMutex);
1062     sysMemAllocs.erase(std::remove(sysMemAllocs.begin(), sysMemAllocs.end(), allocation),
1063                        sysMemAllocs.end());
1064     localMemAllocs[allocation->getRootDeviceIndex()].erase(std::remove(localMemAllocs[allocation->getRootDeviceIndex()].begin(),
1065                                                                        localMemAllocs[allocation->getRootDeviceIndex()].end(),
1066                                                                        allocation),
1067                                                            localMemAllocs[allocation->getRootDeviceIndex()].end());
1068 }
1069 
registerAllocationInOs(GraphicsAllocation * allocation)1070 void DrmMemoryManager::registerAllocationInOs(GraphicsAllocation *allocation) {
1071     if (allocation && getDrm(allocation->getRootDeviceIndex()).resourceRegistrationEnabled()) {
1072         auto drmAllocation = static_cast<DrmAllocation *>(allocation);
1073         drmAllocation->registerBOBindExtHandle(&getDrm(drmAllocation->getRootDeviceIndex()));
1074 
1075         if (isAllocationTypeToCapture(drmAllocation->getAllocationType())) {
1076             drmAllocation->markForCapture();
1077         }
1078     }
1079 }
1080 
create(ExecutionEnvironment & executionEnvironment)1081 std::unique_ptr<MemoryManager> DrmMemoryManager::create(ExecutionEnvironment &executionEnvironment) {
1082     bool validateHostPtr = true;
1083 
1084     if (DebugManager.flags.EnableHostPtrValidation.get() != -1) {
1085         validateHostPtr = DebugManager.flags.EnableHostPtrValidation.get();
1086     }
1087 
1088     return std::make_unique<DrmMemoryManager>(gemCloseWorkerMode::gemCloseWorkerActive,
1089                                               DebugManager.flags.EnableForcePin.get(),
1090                                               validateHostPtr,
1091                                               executionEnvironment);
1092 }
1093 
getLocalMemorySize(uint32_t rootDeviceIndex,uint32_t deviceBitfield)1094 uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
1095     auto memoryInfo = getDrm(rootDeviceIndex).getMemoryInfo();
1096     if (!memoryInfo) {
1097         return 0;
1098     }
1099 
1100     auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
1101     uint32_t subDevicesCount = HwHelper::getSubDevicesCount(hwInfo);
1102     size_t size = 0;
1103 
1104     for (uint32_t i = 0; i < subDevicesCount; i++) {
1105         auto memoryBank = (1 << i);
1106 
1107         if (deviceBitfield & memoryBank) {
1108             size += memoryInfo->getMemoryRegionSize(memoryBank);
1109         }
1110     }
1111 
1112     return size;
1113 }
lockResourceInLocalMemoryImpl(GraphicsAllocation & graphicsAllocation)1114 void *DrmMemoryManager::lockResourceInLocalMemoryImpl(GraphicsAllocation &graphicsAllocation) {
1115     if (!isLocalMemorySupported(graphicsAllocation.getRootDeviceIndex())) {
1116         return nullptr;
1117     }
1118     auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
1119     if (graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1120         auto addr = lockResourceInLocalMemoryImpl(bo);
1121         auto alignedAddr = alignUp(addr, MemoryConstants::pageSize64k);
1122         auto notUsedSize = ptrDiff(alignedAddr, addr);
1123         //call unmap to free the unaligned pages preceding the BO allocation and
1124         //adjust the pointer in the CPU mapping to the beginning of the BO allocation
1125         munmapFunction(addr, notUsedSize);
1126         bo->setLockedAddress(alignedAddr);
1127         return bo->peekLockedAddress();
1128     }
1129     return lockResourceInLocalMemoryImpl(bo);
1130 }
1131 
copyMemoryToAllocation(GraphicsAllocation * graphicsAllocation,size_t destinationOffset,const void * memoryToCopy,size_t sizeToCopy)1132 bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
1133     if (graphicsAllocation->getUnderlyingBuffer() || !isLocalMemorySupported(graphicsAllocation->getRootDeviceIndex())) {
1134         return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
1135     }
1136     return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, maxNBitValue(graphicsAllocation->storageInfo.getNumBanks()));
1137 }
copyMemoryToAllocationBanks(GraphicsAllocation * graphicsAllocation,size_t destinationOffset,const void * memoryToCopy,size_t sizeToCopy,DeviceBitfield handleMask)1138 bool DrmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
1139     if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) {
1140         return false;
1141     }
1142     auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
1143     for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
1144         if (!handleMask.test(handleId)) {
1145             continue;
1146         }
1147         auto ptr = lockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
1148         if (!ptr) {
1149             return false;
1150         }
1151         memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy);
1152         this->unlockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
1153     }
1154     return true;
1155 }
1156 
unlockResourceInLocalMemoryImpl(BufferObject * bo)1157 void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
1158     if (bo == nullptr)
1159         return;
1160 
1161     releaseReservedCpuAddressRange(bo->peekLockedAddress(), bo->peekSize(), this->getRootDeviceIndex(bo->peekDrm()));
1162 
1163     [[maybe_unused]] auto ret = munmapFunction(bo->peekLockedAddress(), bo->peekSize());
1164     DEBUG_BREAK_IF(ret != 0);
1165 
1166     bo->setLockedAddress(nullptr);
1167 }
1168 
createColouredGmms(GmmClientContext * clientContext,DrmAllocation & allocation,const StorageInfo & storageInfo,bool compression)1169 void createColouredGmms(GmmClientContext *clientContext, DrmAllocation &allocation, const StorageInfo &storageInfo, bool compression) {
1170     DEBUG_BREAK_IF(storageInfo.colouringPolicy == ColouringPolicy::DeviceCountBased && storageInfo.colouringGranularity != MemoryConstants::pageSize64k);
1171 
1172     auto remainingSize = alignUp(allocation.getUnderlyingBufferSize(), storageInfo.colouringGranularity);
1173     auto handles = storageInfo.getNumBanks();
1174     auto banksCnt = storageInfo.getTotalBanksCnt();
1175 
1176     if (storageInfo.colouringPolicy == ColouringPolicy::ChunkSizeBased) {
1177         handles = static_cast<uint32_t>(remainingSize / storageInfo.colouringGranularity);
1178         allocation.resizeGmms(handles);
1179     }
1180     /* This logic is to colour resource as equally as possible.
1181     Divide size by number of devices and align result up to 64kb page, then subtract it from whole size and allocate it on the first tile. First tile has it's chunk.
1182     In the following iteration divide rest of a size by remaining devices and again subtract it.
1183     Notice that if allocation size (in pages) is not divisible by 4 then remainder can be equal to 1,2,3 and by using this algorithm it can be spread efficiently.
1184 
1185     For example: 18 pages allocation and 4 devices. Page size is 64kb.
1186     Divide by 4 and align up to page size and result is 5 pages. After subtract, remaining size is 13 pages.
1187     Now divide 13 by 3 and align up - result is 5 pages. After subtract, remaining size is 8 pages.
1188     Divide 8 by 2 - result is 4 pages.
1189     In last iteration remaining 4 pages go to last tile.
1190     18 pages is coloured to (5, 5, 4, 4).
1191 
1192     It was tested and doesn't require any debug*/
1193     for (auto handleId = 0u; handleId < handles; handleId++) {
1194         auto currentSize = alignUp(remainingSize / (handles - handleId), storageInfo.colouringGranularity);
1195         remainingSize -= currentSize;
1196         StorageInfo limitedStorageInfo = storageInfo;
1197         limitedStorageInfo.memoryBanks &= (1u << (handleId % banksCnt));
1198         auto gmm = new Gmm(clientContext,
1199                            nullptr,
1200                            currentSize,
1201                            0u,
1202                            false,
1203                            compression,
1204                            false,
1205                            limitedStorageInfo);
1206         allocation.setGmm(gmm, handleId);
1207     }
1208 }
1209 
fillGmmsInAllocation(GmmClientContext * clientContext,DrmAllocation * allocation,const StorageInfo & storageInfo)1210 void fillGmmsInAllocation(GmmClientContext *clientContext, DrmAllocation *allocation, const StorageInfo &storageInfo) {
1211     auto alignedSize = alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k);
1212     for (auto handleId = 0u; handleId < storageInfo.getNumBanks(); handleId++) {
1213         StorageInfo limitedStorageInfo = storageInfo;
1214         limitedStorageInfo.memoryBanks &= 1u << handleId;
1215         limitedStorageInfo.pageTablesVisibility &= 1u << handleId;
1216         auto gmm = new Gmm(clientContext, nullptr, alignedSize, 0u, false, false, false, limitedStorageInfo);
1217         allocation->setGmm(gmm, handleId);
1218     }
1219 }
1220 
getGpuAddress(const AlignmentSelector & alignmentSelector,HeapAssigner & heapAssigner,const HardwareInfo & hwInfo,GraphicsAllocation::AllocationType allocType,GfxPartition * gfxPartition,size_t & sizeAllocated,const void * hostPtr,bool resource48Bit,bool useFrontWindow)1221 uint64_t getGpuAddress(const AlignmentSelector &alignmentSelector, HeapAssigner &heapAssigner, const HardwareInfo &hwInfo, GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition,
1222                        size_t &sizeAllocated, const void *hostPtr, bool resource48Bit, bool useFrontWindow) {
1223     uint64_t gpuAddress = 0;
1224     switch (allocType) {
1225     case GraphicsAllocation::AllocationType::SVM_GPU:
1226         gpuAddress = reinterpret_cast<uint64_t>(hostPtr);
1227         sizeAllocated = 0;
1228         break;
1229     case GraphicsAllocation::AllocationType::KERNEL_ISA:
1230     case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL:
1231     case GraphicsAllocation::AllocationType::INTERNAL_HEAP:
1232     case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: {
1233         auto heap = heapAssigner.get32BitHeapIndex(allocType, true, hwInfo, useFrontWindow);
1234         gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heap, sizeAllocated));
1235     } break;
1236     case GraphicsAllocation::AllocationType::WRITE_COMBINED:
1237         sizeAllocated = 0;
1238         break;
1239     default:
1240         AlignmentSelector::CandidateAlignment alignment = alignmentSelector.selectAlignment(sizeAllocated);
1241         if (gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !resource48Bit) {
1242             alignment.heap = HeapIndex::HEAP_EXTENDED;
1243         }
1244         gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocateWithCustomAlignment(alignment.heap, sizeAllocated, alignment.alignment));
1245         break;
1246     }
1247     return gpuAddress;
1248 }
1249 
allocateGraphicsMemoryInDevicePool(const AllocationData & allocationData,AllocationStatus & status)1250 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) {
1251     status = AllocationStatus::RetryInNonDevicePool;
1252     if (!this->localMemorySupported[allocationData.rootDeviceIndex] ||
1253         allocationData.flags.useSystemMemory ||
1254         (allocationData.flags.allow32Bit && this->force32bitAllocations) ||
1255         allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) {
1256         return nullptr;
1257     }
1258 
1259     if (allocationData.type == GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY) {
1260         auto allocation = this->createSharedUnifiedMemoryAllocation(allocationData);
1261         status = allocation ? AllocationStatus::Success : AllocationStatus::Error;
1262         return allocation;
1263     }
1264 
1265     std::unique_ptr<Gmm> gmm;
1266     size_t sizeAligned = 0;
1267     auto numHandles = allocationData.storageInfo.getNumBanks();
1268     bool createSingleHandle = 1 == numHandles;
1269     if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE) {
1270         allocationData.imgInfo->useLocalMemory = true;
1271         gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo,
1272                                     allocationData.storageInfo, allocationData.flags.preferCompressed);
1273         sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
1274     } else {
1275         if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1276             sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
1277         } else {
1278             sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
1279         }
1280         if (createSingleHandle) {
1281             gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
1282                                         nullptr,
1283                                         sizeAligned,
1284                                         0u,
1285                                         allocationData.flags.uncacheable,
1286                                         allocationData.flags.preferCompressed,
1287                                         false,
1288                                         allocationData.storageInfo);
1289         }
1290     }
1291 
1292     auto sizeAllocated = sizeAligned;
1293     auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
1294     auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
1295     auto gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo,
1296                                     allocationData.type, gfxPartition, sizeAllocated,
1297                                     allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow);
1298 
1299     auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, gpuAddress, sizeAligned, MemoryPool::LocalMemory);
1300     if (createSingleHandle) {
1301         allocation->setDefaultGmm(gmm.release());
1302     } else if (allocationData.storageInfo.multiStorage) {
1303         createColouredGmms(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
1304                            *allocation,
1305                            allocationData.storageInfo,
1306                            allocationData.flags.preferCompressed);
1307     } else {
1308         fillGmmsInAllocation(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocation.get(), allocationData.storageInfo);
1309     }
1310     allocation->storageInfo = allocationData.storageInfo;
1311     allocation->setFlushL3Required(allocationData.flags.flushL3);
1312     allocation->setUncacheable(allocationData.flags.uncacheable);
1313     allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), sizeAllocated);
1314 
1315     if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) {
1316         for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
1317             delete allocation->getGmm(handleId);
1318         }
1319         gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated);
1320         status = AllocationStatus::Error;
1321         return nullptr;
1322     }
1323     if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1324         auto cpuAddress = lockResource(allocation.get());
1325         auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte);
1326         auto offset = ptrDiff(cpuAddress, alignedCpuAddress);
1327         allocation->setAllocationOffset(offset);
1328         allocation->setCpuPtrAndGpuAddress(cpuAddress, reinterpret_cast<uint64_t>(alignedCpuAddress));
1329         DEBUG_BREAK_IF(allocation->storageInfo.multiStorage);
1330         allocation->getBO()->setAddress(reinterpret_cast<uint64_t>(cpuAddress));
1331     }
1332     if (allocationData.flags.requiresCpuAccess) {
1333         auto cpuAddress = lockResource(allocation.get());
1334         allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress);
1335     }
1336     if (heapAssigner.useInternal32BitHeap(allocationData.type)) {
1337         allocation->setGpuBaseAddress(GmmHelper::canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex, true)));
1338     }
1339     if (!allocation->setCacheRegion(&getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
1340         for (auto bo : allocation->getBOs()) {
1341             delete bo;
1342         }
1343         for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
1344             delete allocation->getGmm(handleId);
1345         }
1346         gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated);
1347         status = AllocationStatus::Error;
1348         return nullptr;
1349     }
1350 
1351     status = AllocationStatus::Success;
1352     return allocation.release();
1353 }
1354 
createBufferObjectInMemoryRegion(Drm * drm,uint64_t gpuAddress,size_t size,uint32_t memoryBanks,size_t maxOsContextCount)1355 BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(Drm *drm,
1356                                                                  uint64_t gpuAddress,
1357                                                                  size_t size,
1358                                                                  uint32_t memoryBanks,
1359                                                                  size_t maxOsContextCount) {
1360     auto memoryInfo = drm->getMemoryInfo();
1361     if (!memoryInfo) {
1362         return nullptr;
1363     }
1364 
1365     uint32_t handle = 0;
1366     auto ret = memoryInfo->createGemExtWithSingleRegion(drm, memoryBanks, size, handle);
1367 
1368     if (ret != 0) {
1369         return nullptr;
1370     }
1371 
1372     auto bo = new (std::nothrow) BufferObject(drm, handle, size, maxOsContextCount);
1373     if (!bo) {
1374         return nullptr;
1375     }
1376 
1377     bo->setAddress(gpuAddress);
1378 
1379     return bo;
1380 }
1381 
createDrmAllocation(Drm * drm,DrmAllocation * allocation,uint64_t gpuAddress,size_t maxOsContextCount)1382 bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) {
1383     BufferObjects bos{};
1384     auto &storageInfo = allocation->storageInfo;
1385     auto boAddress = gpuAddress;
1386     auto currentBank = 0u;
1387     auto iterationOffset = 0u;
1388     auto banksCnt = storageInfo.getTotalBanksCnt();
1389 
1390     auto handles = storageInfo.getNumBanks();
1391     if (storageInfo.colouringPolicy == ColouringPolicy::ChunkSizeBased) {
1392         handles = allocation->getNumGmms();
1393         allocation->resizeBufferObjects(handles);
1394         bos.resize(handles);
1395     }
1396 
1397     for (auto handleId = 0u; handleId < handles; handleId++, currentBank++) {
1398         if (currentBank == banksCnt) {
1399             currentBank = 0;
1400             iterationOffset += banksCnt;
1401         }
1402         uint32_t memoryBanks = static_cast<uint32_t>(storageInfo.memoryBanks.to_ulong());
1403         if (storageInfo.getNumBanks() > 1) {
1404             //check if we have this bank, if not move to next one
1405             //we may have holes in memoryBanks that we need to skip i.e. memoryBanks 1101 and 3 handle allocation
1406             while (!(memoryBanks & (1u << currentBank))) {
1407                 currentBank++;
1408             }
1409             memoryBanks &= 1u << currentBank;
1410         }
1411         auto boSize = alignUp(allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k);
1412         bos[handleId] = createBufferObjectInMemoryRegion(drm, boAddress, boSize, memoryBanks, maxOsContextCount);
1413         if (nullptr == bos[handleId]) {
1414             return false;
1415         }
1416         allocation->getBufferObjectToModify(currentBank + iterationOffset) = bos[handleId];
1417         if (storageInfo.multiStorage) {
1418             boAddress += boSize;
1419         }
1420     }
1421 
1422     if (storageInfo.colouringPolicy == ColouringPolicy::MappingBased) {
1423         auto size = alignUp(allocation->getUnderlyingBufferSize(), storageInfo.colouringGranularity);
1424         auto chunks = static_cast<uint32_t>(size / storageInfo.colouringGranularity);
1425         auto granularity = storageInfo.colouringGranularity;
1426 
1427         for (uint32_t boHandle = 0; boHandle < handles; boHandle++) {
1428             bos[boHandle]->setColourWithBind();
1429             bos[boHandle]->setColourChunk(granularity);
1430             bos[boHandle]->reserveAddressVector(alignUp(chunks, handles) / handles);
1431         }
1432 
1433         auto boHandle = 0u;
1434         auto colourAddress = gpuAddress;
1435         for (auto chunk = 0u; chunk < chunks; chunk++) {
1436             if (boHandle == handles) {
1437                 boHandle = 0u;
1438             }
1439 
1440             bos[boHandle]->addColouringAddress(colourAddress);
1441             colourAddress += granularity;
1442 
1443             boHandle++;
1444         }
1445     }
1446 
1447     return true;
1448 }
1449 
retrieveMmapOffsetForBufferObject(uint32_t rootDeviceIndex,BufferObject & bo,uint64_t flags,uint64_t & offset)1450 bool DrmMemoryManager::retrieveMmapOffsetForBufferObject(uint32_t rootDeviceIndex, BufferObject &bo, uint64_t flags, uint64_t &offset) {
1451     constexpr uint64_t mmapOffsetFixed = 4;
1452 
1453     drm_i915_gem_mmap_offset mmapOffset = {};
1454     mmapOffset.handle = bo.peekHandle();
1455     mmapOffset.flags = isLocalMemorySupported(rootDeviceIndex) ? mmapOffsetFixed : flags;
1456     auto &drm = getDrm(rootDeviceIndex);
1457     auto ret = drm.ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmapOffset);
1458     if (ret != 0 && isLocalMemorySupported(rootDeviceIndex)) {
1459         mmapOffset.flags = flags;
1460         ret = drm.ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmapOffset);
1461     }
1462     if (ret != 0) {
1463         int err = drm.getErrno();
1464         PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
1465         DEBUG_BREAK_IF(ret != 0);
1466         return false;
1467     }
1468 
1469     offset = mmapOffset.offset;
1470     return true;
1471 }
1472 
1473 } // namespace NEO
1474