1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/os_interface/linux/drm_memory_manager.h"
9
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/execution_environment/execution_environment.h"
12 #include "shared/source/execution_environment/root_device_environment.h"
13 #include "shared/source/gmm_helper/gmm.h"
14 #include "shared/source/gmm_helper/gmm_helper.h"
15 #include "shared/source/gmm_helper/resource_info.h"
16 #include "shared/source/helpers/heap_assigner.h"
17 #include "shared/source/helpers/hw_info.h"
18 #include "shared/source/helpers/ptr_math.h"
19 #include "shared/source/helpers/string.h"
20 #include "shared/source/helpers/surface_format_info.h"
21 #include "shared/source/memory_manager/host_ptr_manager.h"
22 #include "shared/source/memory_manager/residency.h"
23 #include "shared/source/os_interface/linux/allocator_helper.h"
24 #include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
25 #include "shared/source/os_interface/linux/os_context_linux.h"
26 #include "shared/source/os_interface/os_interface.h"
27
28 #include "drm/i915_drm.h"
29
30 #include <cstring>
31 #include <iostream>
32 #include <memory>
33
34 namespace NEO {
35
DrmMemoryManager(gemCloseWorkerMode mode,bool forcePinAllowed,bool validateHostPtrMemory,ExecutionEnvironment & executionEnvironment)36 DrmMemoryManager::DrmMemoryManager(gemCloseWorkerMode mode,
37 bool forcePinAllowed,
38 bool validateHostPtrMemory,
39 ExecutionEnvironment &executionEnvironment) : MemoryManager(executionEnvironment),
40 forcePinEnabled(forcePinAllowed),
41 validateHostPtrMemory(validateHostPtrMemory) {
42
43 alignmentSelector.addCandidateAlignment(MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB);
44 if (DebugManager.flags.AlignLocalMemoryVaTo2MB.get() != 0) {
45 alignmentSelector.addCandidateAlignment(MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB);
46 }
47 const size_t customAlignment = static_cast<size_t>(DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.get());
48 if (customAlignment > 0) {
49 const auto heapIndex = customAlignment >= MemoryConstants::pageSize2Mb ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD64KB;
50 alignmentSelector.addCandidateAlignment(customAlignment, true, AlignmentSelector::anyWastage, heapIndex);
51 }
52
53 initialize(mode);
54 }
55
initialize(gemCloseWorkerMode mode)56 void DrmMemoryManager::initialize(gemCloseWorkerMode mode) {
57 bool disableGemCloseWorker = true;
58
59 for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
60 auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace;
61 if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh)) {
62 initialized = false;
63 return;
64 }
65 localMemAllocs.emplace_back();
66 disableGemCloseWorker &= getDrm(rootDeviceIndex).isVmBindAvailable();
67 }
68 MemoryManager::virtualPaddingAvailable = true;
69
70 if (disableGemCloseWorker) {
71 mode = gemCloseWorkerMode::gemCloseWorkerInactive;
72 }
73
74 if (DebugManager.flags.EnableGemCloseWorker.get() != -1) {
75 mode = DebugManager.flags.EnableGemCloseWorker.get() ? gemCloseWorkerMode::gemCloseWorkerActive : gemCloseWorkerMode::gemCloseWorkerInactive;
76 }
77
78 if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) {
79 gemCloseWorker.reset(new DrmGemCloseWorker(*this));
80 }
81
82 for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
83 if (forcePinEnabled || validateHostPtrMemory) {
84 auto cpuAddrBo = alignedMallocWrapper(MemoryConstants::pageSize, MemoryConstants::pageSize);
85 UNRECOVERABLE_IF(cpuAddrBo == nullptr);
86 // Preprogram the Bo with MI_BATCH_BUFFER_END and MI_NOOP. This BO will be used as the last BB in a series to indicate the end of submission.
87 reinterpret_cast<uint32_t *>(cpuAddrBo)[0] = 0x05000000; // MI_BATCH_BUFFER_END
88 reinterpret_cast<uint32_t *>(cpuAddrBo)[1] = 0; // MI_NOOP
89 memoryForPinBBs.push_back(cpuAddrBo);
90 DEBUG_BREAK_IF(memoryForPinBBs[rootDeviceIndex] == nullptr);
91 }
92 pinBBs.push_back(createRootDeviceBufferObject(rootDeviceIndex));
93 }
94
95 initialized = true;
96 }
97
createRootDeviceBufferObject(uint32_t rootDeviceIndex)98 BufferObject *DrmMemoryManager::createRootDeviceBufferObject(uint32_t rootDeviceIndex) {
99 BufferObject *bo = nullptr;
100 if (forcePinEnabled || validateHostPtrMemory) {
101 bo = allocUserptr(reinterpret_cast<uintptr_t>(memoryForPinBBs[rootDeviceIndex]), MemoryConstants::pageSize, 0, rootDeviceIndex);
102 if (bo) {
103 if (isLimitedRange(rootDeviceIndex)) {
104 auto boSize = bo->peekSize();
105 bo->setAddress(acquireGpuRange(boSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD));
106 UNRECOVERABLE_IF(boSize < bo->peekSize());
107 }
108 } else {
109 alignedFreeWrapper(memoryForPinBBs[rootDeviceIndex]);
110 memoryForPinBBs[rootDeviceIndex] = nullptr;
111 DEBUG_BREAK_IF(true);
112 UNRECOVERABLE_IF(validateHostPtrMemory);
113 }
114 }
115 return bo;
116 }
117
createDeviceSpecificMemResources(uint32_t rootDeviceIndex)118 void DrmMemoryManager::createDeviceSpecificMemResources(uint32_t rootDeviceIndex) {
119 pinBBs[rootDeviceIndex] = createRootDeviceBufferObject(rootDeviceIndex);
120 }
121
~DrmMemoryManager()122 DrmMemoryManager::~DrmMemoryManager() {
123 for (auto &memoryForPinBB : memoryForPinBBs) {
124 if (memoryForPinBB) {
125 MemoryManager::alignedFreeWrapper(memoryForPinBB);
126 }
127 }
128 }
129
releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex)130 void DrmMemoryManager::releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex) {
131 return releaseBufferObject(rootDeviceIndex);
132 }
133
releaseBufferObject(uint32_t rootDeviceIndex)134 void DrmMemoryManager::releaseBufferObject(uint32_t rootDeviceIndex) {
135 if (auto bo = pinBBs[rootDeviceIndex]) {
136 if (isLimitedRange(rootDeviceIndex)) {
137 releaseGpuRange(reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(), rootDeviceIndex);
138 }
139 DrmMemoryManager::unreference(bo, true);
140 pinBBs[rootDeviceIndex] = nullptr;
141 }
142 }
143
commonCleanup()144 void DrmMemoryManager::commonCleanup() {
145 if (gemCloseWorker) {
146 gemCloseWorker->close(true);
147 }
148
149 for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < pinBBs.size(); ++rootDeviceIndex) {
150 releaseBufferObject(rootDeviceIndex);
151 }
152 pinBBs.clear();
153 }
154
eraseSharedBufferObject(NEO::BufferObject * bo)155 void DrmMemoryManager::eraseSharedBufferObject(NEO::BufferObject *bo) {
156 auto it = std::find(sharingBufferObjects.begin(), sharingBufferObjects.end(), bo);
157 DEBUG_BREAK_IF(it == sharingBufferObjects.end());
158 releaseGpuRange(reinterpret_cast<void *>((*it)->peekAddress()), (*it)->peekUnmapSize(), this->getRootDeviceIndex(bo->peekDrm()));
159 sharingBufferObjects.erase(it);
160 }
161
pushSharedBufferObject(NEO::BufferObject * bo)162 void DrmMemoryManager::pushSharedBufferObject(NEO::BufferObject *bo) {
163 bo->markAsReusableAllocation();
164 sharingBufferObjects.push_back(bo);
165 }
166
unreference(NEO::BufferObject * bo,bool synchronousDestroy)167 uint32_t DrmMemoryManager::unreference(NEO::BufferObject *bo, bool synchronousDestroy) {
168 if (!bo)
169 return -1;
170
171 if (synchronousDestroy) {
172 while (bo->getRefCount() > 1)
173 ;
174 }
175
176 std::unique_lock<std::mutex> lock(mtx, std::defer_lock);
177 if (bo->peekIsReusableAllocation()) {
178 lock.lock();
179 }
180
181 uint32_t r = bo->unreference();
182
183 if (r == 1) {
184 if (bo->peekIsReusableAllocation()) {
185 eraseSharedBufferObject(bo);
186 }
187
188 bo->close();
189
190 if (lock) {
191 lock.unlock();
192 }
193
194 delete bo;
195 }
196 return r;
197 }
198
acquireGpuRange(size_t & size,uint32_t rootDeviceIndex,HeapIndex heapIndex)199 uint64_t DrmMemoryManager::acquireGpuRange(size_t &size, uint32_t rootDeviceIndex, HeapIndex heapIndex) {
200 auto gfxPartition = getGfxPartition(rootDeviceIndex);
201 return GmmHelper::canonize(gfxPartition->heapAllocate(heapIndex, size));
202 }
203
releaseGpuRange(void * address,size_t unmapSize,uint32_t rootDeviceIndex)204 void DrmMemoryManager::releaseGpuRange(void *address, size_t unmapSize, uint32_t rootDeviceIndex) {
205 uint64_t graphicsAddress = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(address));
206 graphicsAddress = GmmHelper::decanonize(graphicsAddress);
207 auto gfxPartition = getGfxPartition(rootDeviceIndex);
208 gfxPartition->freeGpuAddressRange(graphicsAddress, unmapSize);
209 }
210
isKmdMigrationAvailable(uint32_t rootDeviceIndex)211 bool DrmMemoryManager::isKmdMigrationAvailable(uint32_t rootDeviceIndex) {
212 auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
213 auto &hwHelper = NEO::HwHelper::get(hwInfo->platform.eRenderCoreFamily);
214
215 auto useKmdMigration = hwHelper.isKmdMigrationSupported(*hwInfo);
216
217 if (DebugManager.flags.UseKmdMigration.get() != -1) {
218 useKmdMigration = DebugManager.flags.UseKmdMigration.get();
219 }
220
221 return useKmdMigration;
222 }
223
setMemAdvise(GraphicsAllocation * gfxAllocation,MemAdviseFlags flags,uint32_t rootDeviceIndex)224 bool DrmMemoryManager::setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) {
225 auto drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
226
227 return drmAllocation->setMemAdvise(&this->getDrm(rootDeviceIndex), flags);
228 }
229
allocUserptr(uintptr_t address,size_t size,uint64_t flags,uint32_t rootDeviceIndex)230 NEO::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, uint32_t rootDeviceIndex) {
231 drm_i915_gem_userptr userptr = {};
232 userptr.user_ptr = address;
233 userptr.user_size = size;
234 userptr.flags = static_cast<uint32_t>(flags);
235
236 if (this->getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_USERPTR, &userptr) != 0) {
237 if (errno == ENODEV && userptr.flags == 0) {
238 userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
239 if (this->getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_USERPTR, &userptr) != 0) {
240 if (geteuid() != 0) {
241 printDebugString(true, stderr, "%s", "ioctl(I915_GEM_USERPTR) failed. Try running as root but expect poor stability.\n");
242 }
243 return nullptr;
244 }
245 } else {
246 return nullptr;
247 }
248 }
249
250 PRINT_DEBUG_STRING(DebugManager.flags.PrintBOCreateDestroyResult.get(), stdout, "Created new BO with GEM_USERPTR, handle: BO-%d\n", userptr.handle);
251
252 auto res = new (std::nothrow) BufferObject(&getDrm(rootDeviceIndex), userptr.handle, size, maxOsContextCount);
253 if (!res) {
254 DEBUG_BREAK_IF(true);
255 return nullptr;
256 }
257 res->setAddress(address);
258
259 return res;
260 }
261
emitPinningRequest(BufferObject * bo,const AllocationData & allocationData) const262 void DrmMemoryManager::emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const {
263 auto rootDeviceIndex = allocationData.rootDeviceIndex;
264 if (forcePinEnabled && pinBBs.at(rootDeviceIndex) != nullptr && allocationData.flags.forcePin && allocationData.size >= this->pinThreshold) {
265 pinBBs.at(rootDeviceIndex)->pin(&bo, 1, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, 0, getDefaultDrmContextId(rootDeviceIndex));
266 }
267 }
268
createGraphicsAllocation(OsHandleStorage & handleStorage,const AllocationData & allocationData)269 DrmAllocation *DrmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) {
270 auto hostPtr = const_cast<void *>(allocationData.hostPtr);
271 auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, nullptr, hostPtr, castToUint64(hostPtr), allocationData.size, MemoryPool::System4KBPages);
272 allocation->fragmentsStorage = handleStorage;
273 if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
274 return nullptr;
275 }
276 return allocation.release();
277 }
278
allocateGraphicsMemoryWithAlignment(const AllocationData & allocationData)279 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) {
280 if (allocationData.type == NEO::GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA) {
281 return createMultiHostAllocation(allocationData);
282 }
283
284 return allocateGraphicsMemoryWithAlignmentImpl(allocationData);
285 }
286
allocateGraphicsMemoryWithAlignmentImpl(const AllocationData & allocationData)287 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const AllocationData &allocationData) {
288 const size_t minAlignment = getUserptrAlignment();
289 size_t cAlignment = alignUp(std::max(allocationData.alignment, minAlignment), minAlignment);
290 // When size == 0 allocate allocationAlignment
291 // It's needed to prevent overlapping pages with user pointers
292 size_t cSize = std::max(alignUp(allocationData.size, minAlignment), minAlignment);
293
294 uint64_t gpuReservationAddress = 0;
295 uint64_t alignedGpuAddress = 0;
296 size_t alignedStorageSize = cSize;
297 size_t alignedVirtualAdressRangeSize = cSize;
298 auto svmCpuAllocation = allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU;
299 if (svmCpuAllocation) {
300 //add padding in case reserved addr is not aligned
301 alignedStorageSize = alignUp(cSize, cAlignment);
302 alignedVirtualAdressRangeSize = alignedStorageSize + cAlignment;
303 }
304
305 // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required
306 if ((isLimitedRange(allocationData.rootDeviceIndex) || svmCpuAllocation) && !allocationData.flags.isUSMHostAllocation) {
307 gpuReservationAddress = acquireGpuRange(alignedVirtualAdressRangeSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
308 if (!gpuReservationAddress) {
309 return nullptr;
310 }
311
312 alignedGpuAddress = gpuReservationAddress;
313 if (svmCpuAllocation) {
314 alignedGpuAddress = alignUp(gpuReservationAddress, cAlignment);
315 }
316 }
317
318 auto drmAllocation = createAllocWithAlignment(allocationData, cSize, cAlignment, alignedStorageSize, alignedGpuAddress);
319 if (drmAllocation != nullptr) {
320 drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(gpuReservationAddress), alignedVirtualAdressRangeSize);
321 }
322
323 return drmAllocation;
324 }
325
createAllocWithAlignmentFromUserptr(const AllocationData & allocationData,size_t size,size_t alignment,size_t alignedSVMSize,uint64_t gpuAddress)326 DrmAllocation *DrmMemoryManager::createAllocWithAlignmentFromUserptr(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress) {
327 auto res = alignedMallocWrapper(size, alignment);
328 if (!res) {
329 return nullptr;
330 }
331
332 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(res), size, 0, allocationData.rootDeviceIndex));
333 if (!bo) {
334 alignedFreeWrapper(res);
335 return nullptr;
336 }
337
338 zeroCpuMemoryIfRequested(allocationData, res, size);
339 obtainGpuAddress(allocationData, bo.get(), gpuAddress);
340 emitPinningRequest(bo.get(), allocationData);
341
342 auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->peekAddress(), size, MemoryPool::System4KBPages);
343 allocation->setDriverAllocatedCpuPtr(res);
344 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSVMSize);
345 if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
346 alignedFreeWrapper(res);
347 return nullptr;
348 }
349
350 bo.release();
351
352 return allocation.release();
353 }
354
obtainGpuAddress(const AllocationData & allocationData,BufferObject * bo,uint64_t gpuAddress)355 void DrmMemoryManager::obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress) {
356 if ((isLimitedRange(allocationData.rootDeviceIndex) || allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) &&
357 !allocationData.flags.isUSMHostAllocation) {
358 bo->setAddress(gpuAddress);
359 }
360 }
361
allocateUSMHostGraphicsMemory(const AllocationData & allocationData)362 DrmAllocation *DrmMemoryManager::allocateUSMHostGraphicsMemory(const AllocationData &allocationData) {
363 const size_t minAlignment = getUserptrAlignment();
364 // When size == 0 allocate allocationAlignment
365 // It's needed to prevent overlapping pages with user pointers
366 size_t cSize = std::max(alignUp(allocationData.size, minAlignment), minAlignment);
367
368 void *bufferPtr = const_cast<void *>(allocationData.hostPtr);
369 DEBUG_BREAK_IF(nullptr == bufferPtr);
370
371 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(bufferPtr),
372 cSize,
373 0,
374 allocationData.rootDeviceIndex));
375 if (!bo) {
376 return nullptr;
377 }
378
379 // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required
380 uint64_t gpuAddress = 0;
381 if (isLimitedRange(allocationData.rootDeviceIndex)) {
382 gpuAddress = acquireGpuRange(cSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
383 if (!gpuAddress) {
384 return nullptr;
385 }
386 bo->setAddress(gpuAddress);
387 }
388
389 emitPinningRequest(bo.get(), allocationData);
390
391 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex,
392 allocationData.type,
393 bo.get(),
394 bufferPtr,
395 bo->peekAddress(),
396 cSize,
397 MemoryPool::System4KBPages);
398
399 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), cSize);
400 bo.release();
401
402 return allocation;
403 }
404
allocateGraphicsMemoryWithHostPtr(const AllocationData & allocationData)405 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) {
406 auto res = static_cast<DrmAllocation *>(MemoryManager::allocateGraphicsMemoryWithHostPtr(allocationData));
407
408 if (res != nullptr && !validateHostPtrMemory) {
409 emitPinningRequest(res->getBO(), allocationData);
410 }
411 return res;
412 }
413
allocateGraphicsMemoryWithGpuVa(const AllocationData & allocationData)414 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) {
415 auto osContextLinux = static_cast<OsContextLinux *>(allocationData.osContext);
416
417 const size_t minAlignment = getUserptrAlignment();
418 size_t alignedSize = alignUp(allocationData.size, minAlignment);
419
420 auto res = alignedMallocWrapper(alignedSize, minAlignment);
421 if (!res)
422 return nullptr;
423
424 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(res), alignedSize, 0, allocationData.rootDeviceIndex));
425
426 if (!bo) {
427 alignedFreeWrapper(res);
428 return nullptr;
429 }
430
431 UNRECOVERABLE_IF(allocationData.gpuAddress == 0);
432 bo->setAddress(allocationData.gpuAddress);
433
434 BufferObject *boPtr = bo.get();
435 if (forcePinEnabled && pinBBs.at(allocationData.rootDeviceIndex) != nullptr && alignedSize >= this->pinThreshold) {
436 pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, osContextLinux, 0, osContextLinux->getDrmContextIds()[0]);
437 }
438
439 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->peekAddress(), alignedSize, MemoryPool::System4KBPages);
440 allocation->setDriverAllocatedCpuPtr(res);
441 bo.release();
442
443 return allocation;
444 }
445
allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData & allocationData)446 DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) {
447 if (allocationData.size == 0 || !allocationData.hostPtr)
448 return nullptr;
449
450 auto alignedPtr = alignDown(allocationData.hostPtr, MemoryConstants::pageSize);
451 auto alignedSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size);
452 auto realAllocationSize = alignedSize;
453 auto offsetInPage = ptrDiff(allocationData.hostPtr, alignedPtr);
454 auto rootDeviceIndex = allocationData.rootDeviceIndex;
455
456 auto gpuVirtualAddress = acquireGpuRange(alignedSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
457 if (!gpuVirtualAddress) {
458 return nullptr;
459 }
460
461 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(alignedPtr), realAllocationSize, 0, rootDeviceIndex));
462 if (!bo) {
463 releaseGpuRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize, rootDeviceIndex);
464 return nullptr;
465 }
466
467 bo->setAddress(gpuVirtualAddress);
468
469 if (validateHostPtrMemory) {
470 auto boPtr = bo.get();
471 auto vmHandleId = Math::getMinLsbSet(static_cast<uint32_t>(allocationData.storageInfo.subDeviceBitfield.to_ulong()));
472 int result = pinBBs.at(rootDeviceIndex)->validateHostPtr(&boPtr, 1, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, vmHandleId, getDefaultDrmContextId(rootDeviceIndex));
473 if (result != 0) {
474 unreference(bo.release(), true);
475 releaseGpuRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize, rootDeviceIndex);
476 return nullptr;
477 }
478 }
479
480 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), const_cast<void *>(allocationData.hostPtr),
481 gpuVirtualAddress, allocationData.size, MemoryPool::System4KBPages);
482 allocation->setAllocationOffset(offsetInPage);
483
484 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVirtualAddress), alignedSize);
485 bo.release();
486 return allocation;
487 }
488
allocateGraphicsMemory64kb(const AllocationData & allocationData)489 DrmAllocation *DrmMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) {
490 return nullptr;
491 }
492
allocateMemoryByKMD(const AllocationData & allocationData)493 GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &allocationData) {
494 auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, 0u, false);
495 size_t bufferSize = allocationData.size;
496 uint64_t gpuRange = acquireGpuRange(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB);
497
498 drm_i915_gem_create create = {0, 0, 0};
499 create.size = bufferSize;
500
501 [[maybe_unused]] auto ret = this->getDrm(allocationData.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
502 DEBUG_BREAK_IF(ret != 0);
503
504 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new BufferObject(&getDrm(allocationData.rootDeviceIndex), create.handle, bufferSize, maxOsContextCount));
505 bo->setAddress(gpuRange);
506
507 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), nullptr, gpuRange, bufferSize, MemoryPool::SystemCpuInaccessible);
508 allocation->setDefaultGmm(gmm.release());
509
510 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), bufferSize);
511 bo.release();
512 return allocation;
513 }
514
allocateGraphicsMemoryForImageImpl(const AllocationData & allocationData,std::unique_ptr<Gmm> gmm)515 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) {
516 if (allocationData.imgInfo->linearStorage) {
517 auto alloc = allocateGraphicsMemoryWithAlignment(allocationData);
518 if (alloc) {
519 alloc->setDefaultGmm(gmm.release());
520 }
521 return alloc;
522 }
523
524 uint64_t gpuRange = acquireGpuRange(allocationData.imgInfo->size, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD);
525
526 drm_i915_gem_create create = {0, 0, 0};
527 create.size = allocationData.imgInfo->size;
528
529 [[maybe_unused]] auto ret = this->getDrm(allocationData.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
530 DEBUG_BREAK_IF(ret != 0);
531
532 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new (std::nothrow) BufferObject(&getDrm(allocationData.rootDeviceIndex), create.handle, allocationData.imgInfo->size, maxOsContextCount));
533 if (!bo) {
534 return nullptr;
535 }
536 bo->setAddress(gpuRange);
537
538 [[maybe_unused]] auto ret2 = bo->setTiling(I915_TILING_Y, static_cast<uint32_t>(allocationData.imgInfo->rowPitch));
539 DEBUG_BREAK_IF(ret2 != true);
540
541 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), nullptr, gpuRange, allocationData.imgInfo->size, MemoryPool::SystemCpuInaccessible);
542 allocation->setDefaultGmm(gmm.release());
543
544 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), allocationData.imgInfo->size);
545 bo.release();
546 return allocation;
547 }
548
allocate32BitGraphicsMemoryImpl(const AllocationData & allocationData,bool useLocalMemory)549 DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) {
550 auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
551 auto allocatorToUse = heapAssigner.get32BitHeapIndex(allocationData.type, useLocalMemory, *hwInfo, allocationData.flags.use32BitFrontWindow);
552
553 if (allocationData.hostPtr) {
554 uintptr_t inputPtr = reinterpret_cast<uintptr_t>(allocationData.hostPtr);
555 auto allocationSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size);
556 auto realAllocationSize = allocationSize;
557 auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
558 auto gpuVirtualAddress = gfxPartition->heapAllocate(allocatorToUse, realAllocationSize);
559 if (!gpuVirtualAddress) {
560 return nullptr;
561 }
562 auto alignedUserPointer = reinterpret_cast<uintptr_t>(alignDown(allocationData.hostPtr, MemoryConstants::pageSize));
563 auto inputPointerOffset = inputPtr - alignedUserPointer;
564
565 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(alignedUserPointer, allocationSize, 0, allocationData.rootDeviceIndex));
566 if (!bo) {
567 gfxPartition->heapFree(allocatorToUse, gpuVirtualAddress, realAllocationSize);
568 return nullptr;
569 }
570
571 bo->setAddress(gpuVirtualAddress);
572 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), const_cast<void *>(allocationData.hostPtr), GmmHelper::canonize(ptrOffset(gpuVirtualAddress, inputPointerOffset)),
573 allocationSize, MemoryPool::System4KBPagesWith32BitGpuAddressing);
574 allocation->set32BitAllocation(true);
575 allocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(allocatorToUse)));
576 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVirtualAddress), realAllocationSize);
577 bo.release();
578 return allocation;
579 }
580
581 size_t alignedAllocationSize = alignUp(allocationData.size, MemoryConstants::pageSize);
582 auto allocationSize = alignedAllocationSize;
583 auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
584 auto gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize);
585
586 if (!gpuVA) {
587 return nullptr;
588 }
589
590 auto ptrAlloc = alignedMallocWrapper(alignedAllocationSize, getUserptrAlignment());
591
592 if (!ptrAlloc) {
593 gfxPartition->heapFree(allocatorToUse, gpuVA, allocationSize);
594 return nullptr;
595 }
596
597 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(reinterpret_cast<uintptr_t>(ptrAlloc), alignedAllocationSize, 0, allocationData.rootDeviceIndex));
598
599 if (!bo) {
600 alignedFreeWrapper(ptrAlloc);
601 gfxPartition->heapFree(allocatorToUse, gpuVA, allocationSize);
602 return nullptr;
603 }
604
605 bo->setAddress(gpuVA);
606
607 // softpin to the GPU address, res if it uses limitedRange Allocation
608 auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), ptrAlloc, GmmHelper::canonize(gpuVA), alignedAllocationSize,
609 MemoryPool::System4KBPagesWith32BitGpuAddressing);
610
611 allocation->set32BitAllocation(true);
612 allocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(allocatorToUse)));
613 allocation->setDriverAllocatedCpuPtr(ptrAlloc);
614 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuVA), allocationSize);
615 bo.release();
616 return allocation;
617 }
618
findAndReferenceSharedBufferObject(int boHandle,uint32_t rootDeviceIndex)619 BufferObject *DrmMemoryManager::findAndReferenceSharedBufferObject(int boHandle, uint32_t rootDeviceIndex) {
620 BufferObject *bo = nullptr;
621 for (const auto &i : sharingBufferObjects) {
622 if (i->getHandle() == boHandle && i->getRootDeviceIndex() == rootDeviceIndex) {
623 bo = i;
624 bo->reference();
625 break;
626 }
627 }
628
629 return bo;
630 }
631
createGraphicsAllocationFromSharedHandle(osHandle handle,const AllocationProperties & properties,bool requireSpecificBitness,bool isHostIpcAllocation)632 GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) {
633 if (isHostIpcAllocation) {
634 return createUSMHostAllocationFromSharedHandle(handle, properties, false);
635 }
636
637 std::unique_lock<std::mutex> lock(mtx);
638
639 drm_prime_handle openFd = {0, 0, 0};
640 openFd.fd = handle;
641
642 auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
643
644 if (ret != 0) {
645 [[maybe_unused]] int err = errno;
646 PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
647
648 return nullptr;
649 }
650
651 auto boHandle = openFd.handle;
652 auto bo = findAndReferenceSharedBufferObject(boHandle, properties.rootDeviceIndex);
653
654 if (bo == nullptr) {
655 size_t size = lseekFunction(handle, 0, SEEK_END);
656
657 bo = new (std::nothrow) BufferObject(&getDrm(properties.rootDeviceIndex), boHandle, size, maxOsContextCount);
658
659 if (!bo) {
660 return nullptr;
661 }
662
663 auto heapIndex = isLocalMemorySupported(properties.rootDeviceIndex) ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD;
664 if (requireSpecificBitness && this->force32bitAllocations) {
665 heapIndex = HeapIndex::HEAP_EXTERNAL;
666 }
667 auto gpuRange = acquireGpuRange(size, properties.rootDeviceIndex, heapIndex);
668
669 bo->setAddress(gpuRange);
670 bo->setUnmapSize(size);
671 bo->setRootDeviceIndex(properties.rootDeviceIndex);
672
673 pushSharedBufferObject(bo);
674 }
675
676 lock.unlock();
677
678 auto drmAllocation = new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
679 handle, MemoryPool::SystemCpuInaccessible);
680
681 if (requireSpecificBitness && this->force32bitAllocations) {
682 drmAllocation->set32BitAllocation(true);
683 drmAllocation->setGpuBaseAddress(GmmHelper::canonize(getExternalHeapBaseAddress(properties.rootDeviceIndex, drmAllocation->isAllocatedInLocalMemoryPool())));
684 }
685
686 if (properties.imgInfo) {
687 drm_i915_gem_get_tiling getTiling = {0};
688 getTiling.handle = boHandle;
689 ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_GET_TILING, &getTiling);
690
691 if (ret == 0) {
692 if (getTiling.tiling_mode == I915_TILING_NONE) {
693 properties.imgInfo->linearStorage = true;
694 }
695 }
696
697 Gmm *gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->getGmmClientContext(), *properties.imgInfo,
698 createStorageInfoFromProperties(properties), properties.flags.preferCompressed);
699 drmAllocation->setDefaultGmm(gmm);
700 }
701 return drmAllocation;
702 }
703
closeSharedHandle(GraphicsAllocation * gfxAllocation)704 void DrmMemoryManager::closeSharedHandle(GraphicsAllocation *gfxAllocation) {
705 DrmAllocation *drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
706 if (drmAllocation->peekSharedHandle() != Sharing::nonSharedResource) {
707 closeFunction(drmAllocation->peekSharedHandle());
708 drmAllocation->setSharedHandle(Sharing::nonSharedResource);
709 }
710 }
711
createPaddedAllocation(GraphicsAllocation * inputGraphicsAllocation,size_t sizeWithPadding)712 GraphicsAllocation *DrmMemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
713 uint64_t gpuRange = 0llu;
714
715 auto rootDeviceIndex = inputGraphicsAllocation->getRootDeviceIndex();
716 gpuRange = acquireGpuRange(sizeWithPadding, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
717
718 void *srcPtr = nullptr;
719 auto drmInputAllocation = static_cast<DrmAllocation *>(inputGraphicsAllocation);
720 if (drmInputAllocation->getMmapPtr()) {
721 auto bo = drmInputAllocation->getBO();
722 drm_i915_gem_mmap mmap_arg = {};
723 mmap_arg.handle = bo->peekHandle();
724 mmap_arg.size = bo->peekSize();
725 if (getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_MMAP, &mmap_arg) != 0) {
726 return nullptr;
727 }
728 srcPtr = addrToPtr(mmap_arg.addr_ptr);
729 inputGraphicsAllocation->lock(srcPtr);
730 } else {
731 srcPtr = inputGraphicsAllocation->getUnderlyingBuffer();
732 }
733 auto srcSize = inputGraphicsAllocation->getUnderlyingBufferSize();
734 auto alignedSrcSize = alignUp(srcSize, MemoryConstants::pageSize);
735 auto alignedPtr = reinterpret_cast<uintptr_t>(alignDown(srcPtr, MemoryConstants::pageSize));
736 auto offset = ptrDiff(srcPtr, alignedPtr);
737
738 std::unique_ptr<BufferObject, BufferObject::Deleter> bo(allocUserptr(alignedPtr, alignedSrcSize, 0, rootDeviceIndex));
739 if (!bo) {
740 return nullptr;
741 }
742 bo->setAddress(gpuRange);
743 auto allocation = new DrmAllocation(rootDeviceIndex, inputGraphicsAllocation->getAllocationType(), bo.get(), srcPtr, GmmHelper::canonize(ptrOffset(gpuRange, offset)), sizeWithPadding,
744 inputGraphicsAllocation->getMemoryPool());
745
746 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), sizeWithPadding);
747 bo.release();
748 return allocation;
749 }
750
addAllocationToHostPtrManager(GraphicsAllocation * gfxAllocation)751 void DrmMemoryManager::addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) {
752 DrmAllocation *drmMemory = static_cast<DrmAllocation *>(gfxAllocation);
753
754 FragmentStorage fragment = {};
755 fragment.driverAllocation = true;
756 fragment.fragmentCpuPointer = gfxAllocation->getUnderlyingBuffer();
757 fragment.fragmentSize = alignUp(gfxAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize);
758
759 auto osHandle = new OsHandleLinux();
760 osHandle->bo = drmMemory->getBO();
761
762 fragment.osInternalStorage = osHandle;
763 fragment.residency = new ResidencyData(maxOsContextCount);
764 hostPtrManager->storeFragment(gfxAllocation->getRootDeviceIndex(), fragment);
765 }
766
removeAllocationFromHostPtrManager(GraphicsAllocation * gfxAllocation)767 void DrmMemoryManager::removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) {
768 auto buffer = gfxAllocation->getUnderlyingBuffer();
769 auto fragment = hostPtrManager->getFragment({buffer, gfxAllocation->getRootDeviceIndex()});
770 if (fragment && fragment->driverAllocation) {
771 OsHandle *osStorageToRelease = fragment->osInternalStorage;
772 ResidencyData *residencyDataToRelease = fragment->residency;
773 if (hostPtrManager->releaseHostPtr(gfxAllocation->getRootDeviceIndex(), buffer)) {
774 delete osStorageToRelease;
775 delete residencyDataToRelease;
776 }
777 }
778 }
779
freeGraphicsMemoryImpl(GraphicsAllocation * gfxAllocation)780 void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) {
781 if (DebugManager.flags.DoNotFreeResources.get()) {
782 return;
783 }
784 DrmAllocation *drmAlloc = static_cast<DrmAllocation *>(gfxAllocation);
785 this->unregisterAllocation(gfxAllocation);
786
787 for (auto &engine : this->registeredEngines) {
788 auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->memoryOperationsInterface.get());
789 memoryOperationsInterface->evictWithinOsContext(engine.osContext, *gfxAllocation);
790 }
791
792 if (drmAlloc->getMmapPtr()) {
793 this->munmapFunction(drmAlloc->getMmapPtr(), drmAlloc->getMmapSize());
794 }
795
796 for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
797 delete gfxAllocation->getGmm(handleId);
798 }
799
800 if (gfxAllocation->fragmentsStorage.fragmentCount) {
801 cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation);
802 } else {
803 auto &bos = static_cast<DrmAllocation *>(gfxAllocation)->getBOs();
804 for (auto bo : bos) {
805 unreference(bo, bo && bo->peekIsReusableAllocation() ? false : true);
806 }
807 closeSharedHandle(gfxAllocation);
808 }
809
810 releaseGpuRange(gfxAllocation->getReservedAddressPtr(), gfxAllocation->getReservedAddressSize(), gfxAllocation->getRootDeviceIndex());
811 alignedFreeWrapper(gfxAllocation->getDriverAllocatedCpuPtr());
812
813 drmAlloc->freeRegisteredBOBindExtHandles(&getDrm(drmAlloc->getRootDeviceIndex()));
814
815 delete gfxAllocation;
816 }
817
handleFenceCompletion(GraphicsAllocation * allocation)818 void DrmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
819 if (this->getDrm(allocation->getRootDeviceIndex()).isVmBindAvailable()) {
820 waitForEnginesCompletion(*allocation);
821 } else {
822 static_cast<DrmAllocation *>(allocation)->getBO()->wait(-1);
823 }
824 }
825
createGraphicsAllocationFromExistingStorage(AllocationProperties & properties,void * ptr,MultiGraphicsAllocation & multiGraphicsAllocation)826 GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromExistingStorage(AllocationProperties &properties, void *ptr, MultiGraphicsAllocation &multiGraphicsAllocation) {
827 auto defaultAlloc = multiGraphicsAllocation.getDefaultGraphicsAllocation();
828 if (static_cast<DrmAllocation *>(defaultAlloc)->getMmapPtr()) {
829 properties.size = defaultAlloc->getUnderlyingBufferSize();
830 properties.gpuAddress = castToUint64(ptr);
831
832 auto internalHandle = defaultAlloc->peekInternalHandle(this);
833 return createUSMHostAllocationFromSharedHandle(static_cast<osHandle>(internalHandle), properties, true);
834 } else {
835 return allocateGraphicsMemoryWithProperties(properties, ptr);
836 }
837 }
838
getSystemSharedMemory(uint32_t rootDeviceIndex)839 uint64_t DrmMemoryManager::getSystemSharedMemory(uint32_t rootDeviceIndex) {
840 uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES));
841
842 drm_i915_gem_context_param getContextParam = {};
843 getContextParam.param = I915_CONTEXT_PARAM_GTT_SIZE;
844 [[maybe_unused]] auto ret = getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &getContextParam);
845 DEBUG_BREAK_IF(ret != 0);
846
847 uint64_t gpuMemorySize = getContextParam.value;
848
849 return std::min(hostMemorySize, gpuMemorySize);
850 }
851
getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex)852 double DrmMemoryManager::getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex) {
853 if (isLocalMemorySupported(rootDeviceIndex)) {
854 return 0.95;
855 }
856 return 0.8;
857 }
858
populateOsHandles(OsHandleStorage & handleStorage,uint32_t rootDeviceIndex)859 MemoryManager::AllocationStatus DrmMemoryManager::populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) {
860 BufferObject *allocatedBos[maxFragmentsCount];
861 uint32_t numberOfBosAllocated = 0;
862 uint32_t indexesOfAllocatedBos[maxFragmentsCount];
863
864 for (unsigned int i = 0; i < maxFragmentsCount; i++) {
865 // If there is no fragment it means it already exists.
866 if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].fragmentSize) {
867 auto osHandle = new OsHandleLinux();
868
869 handleStorage.fragmentStorageData[i].osHandleStorage = osHandle;
870 handleStorage.fragmentStorageData[i].residency = new ResidencyData(maxOsContextCount);
871
872 osHandle->bo = allocUserptr((uintptr_t)handleStorage.fragmentStorageData[i].cpuPtr,
873 handleStorage.fragmentStorageData[i].fragmentSize,
874 0, rootDeviceIndex);
875 if (!osHandle->bo) {
876 handleStorage.fragmentStorageData[i].freeTheFragment = true;
877 return AllocationStatus::Error;
878 }
879
880 allocatedBos[numberOfBosAllocated] = osHandle->bo;
881 indexesOfAllocatedBos[numberOfBosAllocated] = i;
882 numberOfBosAllocated++;
883 }
884 }
885
886 if (validateHostPtrMemory) {
887 int result = pinBBs.at(rootDeviceIndex)->validateHostPtr(allocatedBos, numberOfBosAllocated, registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext, 0, getDefaultDrmContextId(rootDeviceIndex));
888
889 if (result == EFAULT) {
890 for (uint32_t i = 0; i < numberOfBosAllocated; i++) {
891 handleStorage.fragmentStorageData[indexesOfAllocatedBos[i]].freeTheFragment = true;
892 }
893 return AllocationStatus::InvalidHostPointer;
894 } else if (result != 0) {
895 return AllocationStatus::Error;
896 }
897 }
898
899 for (uint32_t i = 0; i < numberOfBosAllocated; i++) {
900 hostPtrManager->storeFragment(rootDeviceIndex, handleStorage.fragmentStorageData[indexesOfAllocatedBos[i]]);
901 }
902 return AllocationStatus::Success;
903 }
904
cleanOsHandles(OsHandleStorage & handleStorage,uint32_t rootDeviceIndex)905 void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) {
906 for (unsigned int i = 0; i < maxFragmentsCount; i++) {
907 if (handleStorage.fragmentStorageData[i].freeTheFragment) {
908 auto osHandle = static_cast<OsHandleLinux *>(handleStorage.fragmentStorageData[i].osHandleStorage);
909 if (osHandle->bo) {
910 BufferObject *search = osHandle->bo;
911 search->wait(-1);
912 [[maybe_unused]] auto refCount = unreference(search, true);
913 DEBUG_BREAK_IF(refCount != 1u);
914 }
915 delete handleStorage.fragmentStorageData[i].osHandleStorage;
916 handleStorage.fragmentStorageData[i].osHandleStorage = nullptr;
917 delete handleStorage.fragmentStorageData[i].residency;
918 handleStorage.fragmentStorageData[i].residency = nullptr;
919 }
920 }
921 }
922
setDomainCpu(GraphicsAllocation & graphicsAllocation,bool writeEnable)923 bool DrmMemoryManager::setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable) {
924 DEBUG_BREAK_IF(writeEnable); //unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource)
925
926 auto bo = static_cast<DrmAllocation *>(&graphicsAllocation)->getBO();
927 if (bo == nullptr)
928 return false;
929
930 // move a buffer object to the CPU read, and possibly write domain, including waiting on flushes to occur
931 drm_i915_gem_set_domain set_domain = {};
932 set_domain.handle = bo->peekHandle();
933 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
934 set_domain.write_domain = writeEnable ? I915_GEM_DOMAIN_CPU : 0;
935
936 return getDrm(graphicsAllocation.getRootDeviceIndex()).ioctl(DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0;
937 }
938
lockResourceImpl(GraphicsAllocation & graphicsAllocation)939 void *DrmMemoryManager::lockResourceImpl(GraphicsAllocation &graphicsAllocation) {
940 if (MemoryPool::LocalMemory == graphicsAllocation.getMemoryPool()) {
941 return lockResourceInLocalMemoryImpl(graphicsAllocation);
942 }
943
944 auto cpuPtr = graphicsAllocation.getUnderlyingBuffer();
945 if (cpuPtr != nullptr) {
946 [[maybe_unused]] auto success = setDomainCpu(graphicsAllocation, false);
947 DEBUG_BREAK_IF(!success);
948 return cpuPtr;
949 }
950
951 auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
952 if (bo == nullptr)
953 return nullptr;
954
955 drm_i915_gem_mmap mmap_arg = {};
956 mmap_arg.handle = bo->peekHandle();
957 mmap_arg.size = bo->peekSize();
958 if (getDrm(graphicsAllocation.getRootDeviceIndex()).ioctl(DRM_IOCTL_I915_GEM_MMAP, &mmap_arg) != 0) {
959 return nullptr;
960 }
961
962 bo->setLockedAddress(reinterpret_cast<void *>(mmap_arg.addr_ptr));
963
964 [[maybe_unused]] auto success = setDomainCpu(graphicsAllocation, false);
965 DEBUG_BREAK_IF(!success);
966
967 return bo->peekLockedAddress();
968 }
969
unlockResourceImpl(GraphicsAllocation & graphicsAllocation)970 void DrmMemoryManager::unlockResourceImpl(GraphicsAllocation &graphicsAllocation) {
971 if (MemoryPool::LocalMemory == graphicsAllocation.getMemoryPool()) {
972 return unlockResourceInLocalMemoryImpl(static_cast<DrmAllocation &>(graphicsAllocation).getBO());
973 }
974
975 auto cpuPtr = graphicsAllocation.getUnderlyingBuffer();
976 if (cpuPtr != nullptr) {
977 return;
978 }
979
980 auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
981 if (bo == nullptr)
982 return;
983
984 releaseReservedCpuAddressRange(bo->peekLockedAddress(), bo->peekSize(), graphicsAllocation.getRootDeviceIndex());
985
986 bo->setLockedAddress(nullptr);
987 }
988
obtainFdFromHandle(int boHandle,uint32_t rootDeviceindex)989 int DrmMemoryManager::obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex) {
990 drm_prime_handle openFd = {0, 0, 0};
991
992 openFd.flags = DRM_CLOEXEC | DRM_RDWR;
993 openFd.handle = boHandle;
994
995 getDrm(rootDeviceindex).ioctl(DRM_IOCTL_PRIME_HANDLE_TO_FD, &openFd);
996
997 return openFd.fd;
998 }
999
getDefaultDrmContextId(uint32_t rootDeviceIndex) const1000 uint32_t DrmMemoryManager::getDefaultDrmContextId(uint32_t rootDeviceIndex) const {
1001 auto osContextLinux = static_cast<OsContextLinux *>(registeredEngines[defaultEngineIndex[rootDeviceIndex]].osContext);
1002 return osContextLinux->getDrmContextIds()[0];
1003 }
1004
getUserptrAlignment()1005 size_t DrmMemoryManager::getUserptrAlignment() {
1006 auto alignment = MemoryConstants::allocationAlignment;
1007
1008 if (DebugManager.flags.ForceUserptrAlignment.get() != -1) {
1009 alignment = DebugManager.flags.ForceUserptrAlignment.get() * MemoryConstants::kiloByte;
1010 }
1011
1012 return alignment;
1013 }
1014
getDrm(uint32_t rootDeviceIndex) const1015 Drm &DrmMemoryManager::getDrm(uint32_t rootDeviceIndex) const {
1016 return *this->executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as<Drm>();
1017 }
1018
getRootDeviceIndex(const Drm * drm)1019 uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
1020 auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size();
1021
1022 for (auto rootDeviceIndex = 0u; rootDeviceIndex < rootDeviceCount; rootDeviceIndex++) {
1023 if (&getDrm(rootDeviceIndex) == drm) {
1024 return rootDeviceIndex;
1025 }
1026 }
1027 return CommonConstants::unspecifiedDeviceIndex;
1028 }
1029
reserveGpuAddress(size_t size,uint32_t rootDeviceIndex)1030 AddressRange DrmMemoryManager::reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) {
1031 auto gpuVa = acquireGpuRange(size, rootDeviceIndex, HeapIndex::HEAP_STANDARD);
1032 return AddressRange{gpuVa, size};
1033 }
1034
freeGpuAddress(AddressRange addressRange,uint32_t rootDeviceIndex)1035 void DrmMemoryManager::freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) {
1036 releaseGpuRange(reinterpret_cast<void *>(addressRange.address), addressRange.size, rootDeviceIndex);
1037 }
1038
acquireAllocLock()1039 std::unique_lock<std::mutex> DrmMemoryManager::acquireAllocLock() {
1040 return std::unique_lock<std::mutex>(this->allocMutex);
1041 }
1042
getSysMemAllocs()1043 std::vector<GraphicsAllocation *> &DrmMemoryManager::getSysMemAllocs() {
1044 return this->sysMemAllocs;
1045 }
1046
getLocalMemAllocs(uint32_t rootDeviceIndex)1047 std::vector<GraphicsAllocation *> &DrmMemoryManager::getLocalMemAllocs(uint32_t rootDeviceIndex) {
1048 return this->localMemAllocs[rootDeviceIndex];
1049 }
1050
registerSysMemAlloc(GraphicsAllocation * allocation)1051 void DrmMemoryManager::registerSysMemAlloc(GraphicsAllocation *allocation) {
1052 std::lock_guard<std::mutex> lock(this->allocMutex);
1053 this->sysMemAllocs.push_back(allocation);
1054 }
1055
registerLocalMemAlloc(GraphicsAllocation * allocation,uint32_t rootDeviceIndex)1056 void DrmMemoryManager::registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex) {
1057 std::lock_guard<std::mutex> lock(this->allocMutex);
1058 this->localMemAllocs[rootDeviceIndex].push_back(allocation);
1059 }
unregisterAllocation(GraphicsAllocation * allocation)1060 void DrmMemoryManager::unregisterAllocation(GraphicsAllocation *allocation) {
1061 std::lock_guard<std::mutex> lock(this->allocMutex);
1062 sysMemAllocs.erase(std::remove(sysMemAllocs.begin(), sysMemAllocs.end(), allocation),
1063 sysMemAllocs.end());
1064 localMemAllocs[allocation->getRootDeviceIndex()].erase(std::remove(localMemAllocs[allocation->getRootDeviceIndex()].begin(),
1065 localMemAllocs[allocation->getRootDeviceIndex()].end(),
1066 allocation),
1067 localMemAllocs[allocation->getRootDeviceIndex()].end());
1068 }
1069
registerAllocationInOs(GraphicsAllocation * allocation)1070 void DrmMemoryManager::registerAllocationInOs(GraphicsAllocation *allocation) {
1071 if (allocation && getDrm(allocation->getRootDeviceIndex()).resourceRegistrationEnabled()) {
1072 auto drmAllocation = static_cast<DrmAllocation *>(allocation);
1073 drmAllocation->registerBOBindExtHandle(&getDrm(drmAllocation->getRootDeviceIndex()));
1074
1075 if (isAllocationTypeToCapture(drmAllocation->getAllocationType())) {
1076 drmAllocation->markForCapture();
1077 }
1078 }
1079 }
1080
create(ExecutionEnvironment & executionEnvironment)1081 std::unique_ptr<MemoryManager> DrmMemoryManager::create(ExecutionEnvironment &executionEnvironment) {
1082 bool validateHostPtr = true;
1083
1084 if (DebugManager.flags.EnableHostPtrValidation.get() != -1) {
1085 validateHostPtr = DebugManager.flags.EnableHostPtrValidation.get();
1086 }
1087
1088 return std::make_unique<DrmMemoryManager>(gemCloseWorkerMode::gemCloseWorkerActive,
1089 DebugManager.flags.EnableForcePin.get(),
1090 validateHostPtr,
1091 executionEnvironment);
1092 }
1093
getLocalMemorySize(uint32_t rootDeviceIndex,uint32_t deviceBitfield)1094 uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) {
1095 auto memoryInfo = getDrm(rootDeviceIndex).getMemoryInfo();
1096 if (!memoryInfo) {
1097 return 0;
1098 }
1099
1100 auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
1101 uint32_t subDevicesCount = HwHelper::getSubDevicesCount(hwInfo);
1102 size_t size = 0;
1103
1104 for (uint32_t i = 0; i < subDevicesCount; i++) {
1105 auto memoryBank = (1 << i);
1106
1107 if (deviceBitfield & memoryBank) {
1108 size += memoryInfo->getMemoryRegionSize(memoryBank);
1109 }
1110 }
1111
1112 return size;
1113 }
lockResourceInLocalMemoryImpl(GraphicsAllocation & graphicsAllocation)1114 void *DrmMemoryManager::lockResourceInLocalMemoryImpl(GraphicsAllocation &graphicsAllocation) {
1115 if (!isLocalMemorySupported(graphicsAllocation.getRootDeviceIndex())) {
1116 return nullptr;
1117 }
1118 auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
1119 if (graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1120 auto addr = lockResourceInLocalMemoryImpl(bo);
1121 auto alignedAddr = alignUp(addr, MemoryConstants::pageSize64k);
1122 auto notUsedSize = ptrDiff(alignedAddr, addr);
1123 //call unmap to free the unaligned pages preceding the BO allocation and
1124 //adjust the pointer in the CPU mapping to the beginning of the BO allocation
1125 munmapFunction(addr, notUsedSize);
1126 bo->setLockedAddress(alignedAddr);
1127 return bo->peekLockedAddress();
1128 }
1129 return lockResourceInLocalMemoryImpl(bo);
1130 }
1131
copyMemoryToAllocation(GraphicsAllocation * graphicsAllocation,size_t destinationOffset,const void * memoryToCopy,size_t sizeToCopy)1132 bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
1133 if (graphicsAllocation->getUnderlyingBuffer() || !isLocalMemorySupported(graphicsAllocation->getRootDeviceIndex())) {
1134 return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
1135 }
1136 return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, maxNBitValue(graphicsAllocation->storageInfo.getNumBanks()));
1137 }
copyMemoryToAllocationBanks(GraphicsAllocation * graphicsAllocation,size_t destinationOffset,const void * memoryToCopy,size_t sizeToCopy,DeviceBitfield handleMask)1138 bool DrmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
1139 if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) {
1140 return false;
1141 }
1142 auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
1143 for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
1144 if (!handleMask.test(handleId)) {
1145 continue;
1146 }
1147 auto ptr = lockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
1148 if (!ptr) {
1149 return false;
1150 }
1151 memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy);
1152 this->unlockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
1153 }
1154 return true;
1155 }
1156
unlockResourceInLocalMemoryImpl(BufferObject * bo)1157 void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
1158 if (bo == nullptr)
1159 return;
1160
1161 releaseReservedCpuAddressRange(bo->peekLockedAddress(), bo->peekSize(), this->getRootDeviceIndex(bo->peekDrm()));
1162
1163 [[maybe_unused]] auto ret = munmapFunction(bo->peekLockedAddress(), bo->peekSize());
1164 DEBUG_BREAK_IF(ret != 0);
1165
1166 bo->setLockedAddress(nullptr);
1167 }
1168
createColouredGmms(GmmClientContext * clientContext,DrmAllocation & allocation,const StorageInfo & storageInfo,bool compression)1169 void createColouredGmms(GmmClientContext *clientContext, DrmAllocation &allocation, const StorageInfo &storageInfo, bool compression) {
1170 DEBUG_BREAK_IF(storageInfo.colouringPolicy == ColouringPolicy::DeviceCountBased && storageInfo.colouringGranularity != MemoryConstants::pageSize64k);
1171
1172 auto remainingSize = alignUp(allocation.getUnderlyingBufferSize(), storageInfo.colouringGranularity);
1173 auto handles = storageInfo.getNumBanks();
1174 auto banksCnt = storageInfo.getTotalBanksCnt();
1175
1176 if (storageInfo.colouringPolicy == ColouringPolicy::ChunkSizeBased) {
1177 handles = static_cast<uint32_t>(remainingSize / storageInfo.colouringGranularity);
1178 allocation.resizeGmms(handles);
1179 }
1180 /* This logic is to colour resource as equally as possible.
1181 Divide size by number of devices and align result up to 64kb page, then subtract it from whole size and allocate it on the first tile. First tile has it's chunk.
1182 In the following iteration divide rest of a size by remaining devices and again subtract it.
1183 Notice that if allocation size (in pages) is not divisible by 4 then remainder can be equal to 1,2,3 and by using this algorithm it can be spread efficiently.
1184
1185 For example: 18 pages allocation and 4 devices. Page size is 64kb.
1186 Divide by 4 and align up to page size and result is 5 pages. After subtract, remaining size is 13 pages.
1187 Now divide 13 by 3 and align up - result is 5 pages. After subtract, remaining size is 8 pages.
1188 Divide 8 by 2 - result is 4 pages.
1189 In last iteration remaining 4 pages go to last tile.
1190 18 pages is coloured to (5, 5, 4, 4).
1191
1192 It was tested and doesn't require any debug*/
1193 for (auto handleId = 0u; handleId < handles; handleId++) {
1194 auto currentSize = alignUp(remainingSize / (handles - handleId), storageInfo.colouringGranularity);
1195 remainingSize -= currentSize;
1196 StorageInfo limitedStorageInfo = storageInfo;
1197 limitedStorageInfo.memoryBanks &= (1u << (handleId % banksCnt));
1198 auto gmm = new Gmm(clientContext,
1199 nullptr,
1200 currentSize,
1201 0u,
1202 false,
1203 compression,
1204 false,
1205 limitedStorageInfo);
1206 allocation.setGmm(gmm, handleId);
1207 }
1208 }
1209
fillGmmsInAllocation(GmmClientContext * clientContext,DrmAllocation * allocation,const StorageInfo & storageInfo)1210 void fillGmmsInAllocation(GmmClientContext *clientContext, DrmAllocation *allocation, const StorageInfo &storageInfo) {
1211 auto alignedSize = alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k);
1212 for (auto handleId = 0u; handleId < storageInfo.getNumBanks(); handleId++) {
1213 StorageInfo limitedStorageInfo = storageInfo;
1214 limitedStorageInfo.memoryBanks &= 1u << handleId;
1215 limitedStorageInfo.pageTablesVisibility &= 1u << handleId;
1216 auto gmm = new Gmm(clientContext, nullptr, alignedSize, 0u, false, false, false, limitedStorageInfo);
1217 allocation->setGmm(gmm, handleId);
1218 }
1219 }
1220
getGpuAddress(const AlignmentSelector & alignmentSelector,HeapAssigner & heapAssigner,const HardwareInfo & hwInfo,GraphicsAllocation::AllocationType allocType,GfxPartition * gfxPartition,size_t & sizeAllocated,const void * hostPtr,bool resource48Bit,bool useFrontWindow)1221 uint64_t getGpuAddress(const AlignmentSelector &alignmentSelector, HeapAssigner &heapAssigner, const HardwareInfo &hwInfo, GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition,
1222 size_t &sizeAllocated, const void *hostPtr, bool resource48Bit, bool useFrontWindow) {
1223 uint64_t gpuAddress = 0;
1224 switch (allocType) {
1225 case GraphicsAllocation::AllocationType::SVM_GPU:
1226 gpuAddress = reinterpret_cast<uint64_t>(hostPtr);
1227 sizeAllocated = 0;
1228 break;
1229 case GraphicsAllocation::AllocationType::KERNEL_ISA:
1230 case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL:
1231 case GraphicsAllocation::AllocationType::INTERNAL_HEAP:
1232 case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: {
1233 auto heap = heapAssigner.get32BitHeapIndex(allocType, true, hwInfo, useFrontWindow);
1234 gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heap, sizeAllocated));
1235 } break;
1236 case GraphicsAllocation::AllocationType::WRITE_COMBINED:
1237 sizeAllocated = 0;
1238 break;
1239 default:
1240 AlignmentSelector::CandidateAlignment alignment = alignmentSelector.selectAlignment(sizeAllocated);
1241 if (gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !resource48Bit) {
1242 alignment.heap = HeapIndex::HEAP_EXTENDED;
1243 }
1244 gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocateWithCustomAlignment(alignment.heap, sizeAllocated, alignment.alignment));
1245 break;
1246 }
1247 return gpuAddress;
1248 }
1249
allocateGraphicsMemoryInDevicePool(const AllocationData & allocationData,AllocationStatus & status)1250 GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) {
1251 status = AllocationStatus::RetryInNonDevicePool;
1252 if (!this->localMemorySupported[allocationData.rootDeviceIndex] ||
1253 allocationData.flags.useSystemMemory ||
1254 (allocationData.flags.allow32Bit && this->force32bitAllocations) ||
1255 allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) {
1256 return nullptr;
1257 }
1258
1259 if (allocationData.type == GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY) {
1260 auto allocation = this->createSharedUnifiedMemoryAllocation(allocationData);
1261 status = allocation ? AllocationStatus::Success : AllocationStatus::Error;
1262 return allocation;
1263 }
1264
1265 std::unique_ptr<Gmm> gmm;
1266 size_t sizeAligned = 0;
1267 auto numHandles = allocationData.storageInfo.getNumBanks();
1268 bool createSingleHandle = 1 == numHandles;
1269 if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE) {
1270 allocationData.imgInfo->useLocalMemory = true;
1271 gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo,
1272 allocationData.storageInfo, allocationData.flags.preferCompressed);
1273 sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
1274 } else {
1275 if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1276 sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
1277 } else {
1278 sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
1279 }
1280 if (createSingleHandle) {
1281 gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
1282 nullptr,
1283 sizeAligned,
1284 0u,
1285 allocationData.flags.uncacheable,
1286 allocationData.flags.preferCompressed,
1287 false,
1288 allocationData.storageInfo);
1289 }
1290 }
1291
1292 auto sizeAllocated = sizeAligned;
1293 auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
1294 auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
1295 auto gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo,
1296 allocationData.type, gfxPartition, sizeAllocated,
1297 allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow);
1298
1299 auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, gpuAddress, sizeAligned, MemoryPool::LocalMemory);
1300 if (createSingleHandle) {
1301 allocation->setDefaultGmm(gmm.release());
1302 } else if (allocationData.storageInfo.multiStorage) {
1303 createColouredGmms(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
1304 *allocation,
1305 allocationData.storageInfo,
1306 allocationData.flags.preferCompressed);
1307 } else {
1308 fillGmmsInAllocation(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocation.get(), allocationData.storageInfo);
1309 }
1310 allocation->storageInfo = allocationData.storageInfo;
1311 allocation->setFlushL3Required(allocationData.flags.flushL3);
1312 allocation->setUncacheable(allocationData.flags.uncacheable);
1313 allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), sizeAllocated);
1314
1315 if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) {
1316 for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
1317 delete allocation->getGmm(handleId);
1318 }
1319 gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated);
1320 status = AllocationStatus::Error;
1321 return nullptr;
1322 }
1323 if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
1324 auto cpuAddress = lockResource(allocation.get());
1325 auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte);
1326 auto offset = ptrDiff(cpuAddress, alignedCpuAddress);
1327 allocation->setAllocationOffset(offset);
1328 allocation->setCpuPtrAndGpuAddress(cpuAddress, reinterpret_cast<uint64_t>(alignedCpuAddress));
1329 DEBUG_BREAK_IF(allocation->storageInfo.multiStorage);
1330 allocation->getBO()->setAddress(reinterpret_cast<uint64_t>(cpuAddress));
1331 }
1332 if (allocationData.flags.requiresCpuAccess) {
1333 auto cpuAddress = lockResource(allocation.get());
1334 allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress);
1335 }
1336 if (heapAssigner.useInternal32BitHeap(allocationData.type)) {
1337 allocation->setGpuBaseAddress(GmmHelper::canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex, true)));
1338 }
1339 if (!allocation->setCacheRegion(&getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
1340 for (auto bo : allocation->getBOs()) {
1341 delete bo;
1342 }
1343 for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
1344 delete allocation->getGmm(handleId);
1345 }
1346 gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated);
1347 status = AllocationStatus::Error;
1348 return nullptr;
1349 }
1350
1351 status = AllocationStatus::Success;
1352 return allocation.release();
1353 }
1354
createBufferObjectInMemoryRegion(Drm * drm,uint64_t gpuAddress,size_t size,uint32_t memoryBanks,size_t maxOsContextCount)1355 BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(Drm *drm,
1356 uint64_t gpuAddress,
1357 size_t size,
1358 uint32_t memoryBanks,
1359 size_t maxOsContextCount) {
1360 auto memoryInfo = drm->getMemoryInfo();
1361 if (!memoryInfo) {
1362 return nullptr;
1363 }
1364
1365 uint32_t handle = 0;
1366 auto ret = memoryInfo->createGemExtWithSingleRegion(drm, memoryBanks, size, handle);
1367
1368 if (ret != 0) {
1369 return nullptr;
1370 }
1371
1372 auto bo = new (std::nothrow) BufferObject(drm, handle, size, maxOsContextCount);
1373 if (!bo) {
1374 return nullptr;
1375 }
1376
1377 bo->setAddress(gpuAddress);
1378
1379 return bo;
1380 }
1381
createDrmAllocation(Drm * drm,DrmAllocation * allocation,uint64_t gpuAddress,size_t maxOsContextCount)1382 bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) {
1383 BufferObjects bos{};
1384 auto &storageInfo = allocation->storageInfo;
1385 auto boAddress = gpuAddress;
1386 auto currentBank = 0u;
1387 auto iterationOffset = 0u;
1388 auto banksCnt = storageInfo.getTotalBanksCnt();
1389
1390 auto handles = storageInfo.getNumBanks();
1391 if (storageInfo.colouringPolicy == ColouringPolicy::ChunkSizeBased) {
1392 handles = allocation->getNumGmms();
1393 allocation->resizeBufferObjects(handles);
1394 bos.resize(handles);
1395 }
1396
1397 for (auto handleId = 0u; handleId < handles; handleId++, currentBank++) {
1398 if (currentBank == banksCnt) {
1399 currentBank = 0;
1400 iterationOffset += banksCnt;
1401 }
1402 uint32_t memoryBanks = static_cast<uint32_t>(storageInfo.memoryBanks.to_ulong());
1403 if (storageInfo.getNumBanks() > 1) {
1404 //check if we have this bank, if not move to next one
1405 //we may have holes in memoryBanks that we need to skip i.e. memoryBanks 1101 and 3 handle allocation
1406 while (!(memoryBanks & (1u << currentBank))) {
1407 currentBank++;
1408 }
1409 memoryBanks &= 1u << currentBank;
1410 }
1411 auto boSize = alignUp(allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k);
1412 bos[handleId] = createBufferObjectInMemoryRegion(drm, boAddress, boSize, memoryBanks, maxOsContextCount);
1413 if (nullptr == bos[handleId]) {
1414 return false;
1415 }
1416 allocation->getBufferObjectToModify(currentBank + iterationOffset) = bos[handleId];
1417 if (storageInfo.multiStorage) {
1418 boAddress += boSize;
1419 }
1420 }
1421
1422 if (storageInfo.colouringPolicy == ColouringPolicy::MappingBased) {
1423 auto size = alignUp(allocation->getUnderlyingBufferSize(), storageInfo.colouringGranularity);
1424 auto chunks = static_cast<uint32_t>(size / storageInfo.colouringGranularity);
1425 auto granularity = storageInfo.colouringGranularity;
1426
1427 for (uint32_t boHandle = 0; boHandle < handles; boHandle++) {
1428 bos[boHandle]->setColourWithBind();
1429 bos[boHandle]->setColourChunk(granularity);
1430 bos[boHandle]->reserveAddressVector(alignUp(chunks, handles) / handles);
1431 }
1432
1433 auto boHandle = 0u;
1434 auto colourAddress = gpuAddress;
1435 for (auto chunk = 0u; chunk < chunks; chunk++) {
1436 if (boHandle == handles) {
1437 boHandle = 0u;
1438 }
1439
1440 bos[boHandle]->addColouringAddress(colourAddress);
1441 colourAddress += granularity;
1442
1443 boHandle++;
1444 }
1445 }
1446
1447 return true;
1448 }
1449
retrieveMmapOffsetForBufferObject(uint32_t rootDeviceIndex,BufferObject & bo,uint64_t flags,uint64_t & offset)1450 bool DrmMemoryManager::retrieveMmapOffsetForBufferObject(uint32_t rootDeviceIndex, BufferObject &bo, uint64_t flags, uint64_t &offset) {
1451 constexpr uint64_t mmapOffsetFixed = 4;
1452
1453 drm_i915_gem_mmap_offset mmapOffset = {};
1454 mmapOffset.handle = bo.peekHandle();
1455 mmapOffset.flags = isLocalMemorySupported(rootDeviceIndex) ? mmapOffsetFixed : flags;
1456 auto &drm = getDrm(rootDeviceIndex);
1457 auto ret = drm.ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmapOffset);
1458 if (ret != 0 && isLocalMemorySupported(rootDeviceIndex)) {
1459 mmapOffset.flags = flags;
1460 ret = drm.ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmapOffset);
1461 }
1462 if (ret != 0) {
1463 int err = drm.getErrno();
1464 PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
1465 DEBUG_BREAK_IF(ret != 0);
1466 return false;
1467 }
1468
1469 offset = mmapOffset.offset;
1470 return true;
1471 }
1472
1473 } // namespace NEO
1474