1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #pragma once
9
10 #include "shared/source/gmm_helper/gmm_helper.h"
11 #include "shared/source/helpers/constants.h"
12 #include "shared/source/helpers/debug_helpers.h"
13 #include "shared/source/helpers/ptr_math.h"
14 #include "shared/source/memory_manager/definitions/engine_limits.h"
15 #include "shared/source/memory_manager/definitions/storage_info.h"
16 #include "shared/source/memory_manager/host_ptr_defines.h"
17 #include "shared/source/memory_manager/memory_pool.h"
18 #include "shared/source/utilities/idlist.h"
19 #include "shared/source/utilities/stackvec.h"
20
21 #include <array>
22 #include <atomic>
23 #include <cstddef>
24 #include <cstdint>
25 #include <limits>
26 #include <mutex>
27 #include <vector>
28
29 namespace NEO {
30
31 using osHandle = unsigned int;
toOsHandle(const void * handle)32 inline osHandle toOsHandle(const void *handle) {
33
34 return static_cast<osHandle>(castToUint64(handle));
35 }
36
37 enum class HeapIndex : uint32_t;
38
39 namespace Sharing {
40 constexpr auto nonSharedResource = 0u;
41 }
42
43 class Gmm;
44 class MemoryManager;
45
46 struct AubInfo {
47 uint32_t aubWritable = std::numeric_limits<uint32_t>::max();
48 uint32_t tbxWritable = std::numeric_limits<uint32_t>::max();
49 bool allocDumpable = false;
50 bool bcsDumpOnly = false;
51 bool memObjectsAllocationWithWritableFlags = false;
52 };
53
54 class GraphicsAllocation : public IDNode<GraphicsAllocation> {
55 public:
56 enum class AllocationType {
57 UNKNOWN = 0,
58 BUFFER,
59 BUFFER_HOST_MEMORY,
60 COMMAND_BUFFER,
61 CONSTANT_SURFACE,
62 DEVICE_QUEUE_BUFFER,
63 EXTERNAL_HOST_PTR,
64 FILL_PATTERN,
65 GLOBAL_SURFACE,
66 IMAGE,
67 INDIRECT_OBJECT_HEAP,
68 INSTRUCTION_HEAP,
69 INTERNAL_HEAP,
70 INTERNAL_HOST_MEMORY,
71 KERNEL_ISA,
72 KERNEL_ISA_INTERNAL,
73 LINEAR_STREAM,
74 MAP_ALLOCATION,
75 MCS,
76 PIPE,
77 PREEMPTION,
78 PRINTF_SURFACE,
79 PRIVATE_SURFACE,
80 PROFILING_TAG_BUFFER,
81 SCRATCH_SURFACE,
82 SHARED_BUFFER,
83 SHARED_CONTEXT_IMAGE,
84 SHARED_IMAGE,
85 SHARED_RESOURCE_COPY,
86 SURFACE_STATE_HEAP,
87 SVM_CPU,
88 SVM_GPU,
89 SVM_ZERO_COPY,
90 TAG_BUFFER,
91 GLOBAL_FENCE,
92 TIMESTAMP_PACKET_TAG_BUFFER,
93 WRITE_COMBINED,
94 RING_BUFFER,
95 SEMAPHORE_BUFFER,
96 DEBUG_CONTEXT_SAVE_AREA,
97 DEBUG_SBA_TRACKING_BUFFER,
98 DEBUG_MODULE_AREA,
99 UNIFIED_SHARED_MEMORY,
100 WORK_PARTITION_SURFACE,
101 GPU_TIMESTAMP_DEVICE_BUFFER,
102 SW_TAG_BUFFER,
103 COUNT
104 };
105
106 ~GraphicsAllocation() override;
107 GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
108 GraphicsAllocation(const GraphicsAllocation &) = delete;
109
GraphicsAllocation(uint32_t rootDeviceIndex,AllocationType allocationType,void * cpuPtrIn,uint64_t gpuAddress,uint64_t baseAddress,size_t sizeIn,MemoryPool::Type pool,size_t maxOsContextCount)110 GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn,
111 uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool, size_t maxOsContextCount)
112 : GraphicsAllocation(rootDeviceIndex, 1, allocationType, cpuPtrIn, gpuAddress, baseAddress, sizeIn, pool, maxOsContextCount) {}
113
GraphicsAllocation(uint32_t rootDeviceIndex,AllocationType allocationType,void * cpuPtrIn,size_t sizeIn,osHandle sharedHandleIn,MemoryPool::Type pool,size_t maxOsContextCount)114 GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn,
115 size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool, size_t maxOsContextCount)
116 : GraphicsAllocation(rootDeviceIndex, 1, allocationType, cpuPtrIn, sizeIn, sharedHandleIn, pool, maxOsContextCount) {}
117
118 GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms, AllocationType allocationType, void *cpuPtrIn,
119 uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool, size_t maxOsContextCount);
120
121 GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms, AllocationType allocationType, void *cpuPtrIn,
122 size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool, size_t maxOsContextCount);
123
getRootDeviceIndex()124 uint32_t getRootDeviceIndex() const { return rootDeviceIndex; }
getUnderlyingBuffer()125 void *getUnderlyingBuffer() const { return cpuPtr; }
getDriverAllocatedCpuPtr()126 void *getDriverAllocatedCpuPtr() const { return driverAllocatedCpuPointer; }
setDriverAllocatedCpuPtr(void * allocatedCpuPtr)127 void setDriverAllocatedCpuPtr(void *allocatedCpuPtr) { driverAllocatedCpuPointer = allocatedCpuPtr; }
128
setCpuPtrAndGpuAddress(void * cpuPtr,uint64_t gpuAddress)129 void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
130 this->cpuPtr = cpuPtr;
131 this->gpuAddress = GmmHelper::canonize(gpuAddress);
132 }
getUnderlyingBufferSize()133 size_t getUnderlyingBufferSize() const { return size; }
setSize(size_t size)134 void setSize(size_t size) { this->size = size; }
135
getAllocationOffset()136 uint64_t getAllocationOffset() const {
137 return allocationOffset;
138 }
setAllocationOffset(uint64_t offset)139 void setAllocationOffset(uint64_t offset) {
140 allocationOffset = offset;
141 }
142
getGpuBaseAddress()143 uint64_t getGpuBaseAddress() const {
144 return gpuBaseAddress;
145 }
setGpuBaseAddress(uint64_t baseAddress)146 void setGpuBaseAddress(uint64_t baseAddress) {
147 gpuBaseAddress = baseAddress;
148 }
getGpuAddress()149 uint64_t getGpuAddress() const {
150 DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
151 return gpuAddress + allocationOffset;
152 }
getGpuAddressToPatch()153 uint64_t getGpuAddressToPatch() const {
154 DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
155 return gpuAddress + allocationOffset - gpuBaseAddress;
156 }
157
lock(void * ptr)158 void lock(void *ptr) { lockedPtr = ptr; }
unlock()159 void unlock() { lockedPtr = nullptr; }
isLocked()160 bool isLocked() const { return lockedPtr != nullptr; }
getLockedPtr()161 void *getLockedPtr() const { return lockedPtr; }
162
isCoherent()163 bool isCoherent() const { return allocationInfo.flags.coherent; }
setCoherent(bool coherentIn)164 void setCoherent(bool coherentIn) { allocationInfo.flags.coherent = coherentIn; }
setEvictable(bool evictable)165 void setEvictable(bool evictable) { allocationInfo.flags.evictable = evictable; }
peekEvictable()166 bool peekEvictable() const { return allocationInfo.flags.evictable; }
isFlushL3Required()167 bool isFlushL3Required() const { return allocationInfo.flags.flushL3Required; }
setFlushL3Required(bool flushL3Required)168 void setFlushL3Required(bool flushL3Required) { allocationInfo.flags.flushL3Required = flushL3Required; }
169
isUncacheable()170 bool isUncacheable() const { return allocationInfo.flags.uncacheable; }
setUncacheable(bool uncacheable)171 void setUncacheable(bool uncacheable) { allocationInfo.flags.uncacheable = uncacheable; }
is32BitAllocation()172 bool is32BitAllocation() const { return allocationInfo.flags.is32BitAllocation; }
set32BitAllocation(bool is32BitAllocation)173 void set32BitAllocation(bool is32BitAllocation) { allocationInfo.flags.is32BitAllocation = is32BitAllocation; }
174
175 void setAubWritable(bool writable, uint32_t banks);
176 bool isAubWritable(uint32_t banks) const;
177 void setTbxWritable(bool writable, uint32_t banks);
178 bool isTbxWritable(uint32_t banks) const;
setAllocDumpable(bool dumpable,bool bcsDumpOnly)179 void setAllocDumpable(bool dumpable, bool bcsDumpOnly) {
180 aubInfo.allocDumpable = dumpable;
181 aubInfo.bcsDumpOnly = bcsDumpOnly;
182 }
isAllocDumpable()183 bool isAllocDumpable() const { return aubInfo.allocDumpable; }
isMemObjectsAllocationWithWritableFlags()184 bool isMemObjectsAllocationWithWritableFlags() const { return aubInfo.memObjectsAllocationWithWritableFlags; }
setMemObjectsAllocationWithWritableFlags(bool newValue)185 void setMemObjectsAllocationWithWritableFlags(bool newValue) { aubInfo.memObjectsAllocationWithWritableFlags = newValue; }
186
incReuseCount()187 void incReuseCount() { sharingInfo.reuseCount++; }
decReuseCount()188 void decReuseCount() { sharingInfo.reuseCount--; }
peekReuseCount()189 uint32_t peekReuseCount() const { return sharingInfo.reuseCount; }
peekSharedHandle()190 osHandle peekSharedHandle() const { return sharingInfo.sharedHandle; }
setSharedHandle(osHandle handle)191 void setSharedHandle(osHandle handle) { sharingInfo.sharedHandle = handle; }
192
193 void setAllocationType(AllocationType allocationType);
getAllocationType()194 AllocationType getAllocationType() const { return allocationType; }
195
getMemoryPool()196 MemoryPool::Type getMemoryPool() const { return memoryPool; }
197
isUsed()198 bool isUsed() const { return registeredContextsNum > 0; }
isUsedByManyOsContexts()199 bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
isUsedByOsContext(uint32_t contextId)200 bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
201 void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
getTaskCount(uint32_t contextId)202 uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
releaseUsageInOsContext(uint32_t contextId)203 void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
getInspectionId(uint32_t contextId)204 uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
setInspectionId(uint32_t newInspectionId,uint32_t contextId)205 void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
206
isResident(uint32_t contextId)207 bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
isAlwaysResident(uint32_t contextId)208 bool isAlwaysResident(uint32_t contextId) const { return GraphicsAllocation::objectAlwaysResident == getResidencyTaskCount(contextId); }
updateResidencyTaskCount(uint32_t newTaskCount,uint32_t contextId)209 void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) {
210 if (usageInfos[contextId].residencyTaskCount != GraphicsAllocation::objectAlwaysResident || newTaskCount == GraphicsAllocation::objectNotResident) {
211 usageInfos[contextId].residencyTaskCount = newTaskCount;
212 }
213 }
getResidencyTaskCount(uint32_t contextId)214 uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
releaseResidencyInOsContext(uint32_t contextId)215 void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); }
isResidencyTaskCountBelow(uint32_t taskCount,uint32_t contextId)216 bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
217
218 virtual std::string getAllocationInfoString() const;
peekInternalHandle(MemoryManager * memoryManager)219 virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; }
220
isCpuAccessRequired(AllocationType allocationType)221 static bool isCpuAccessRequired(AllocationType allocationType) {
222 return allocationType == AllocationType::COMMAND_BUFFER ||
223 allocationType == AllocationType::CONSTANT_SURFACE ||
224 allocationType == AllocationType::GLOBAL_SURFACE ||
225 allocationType == AllocationType::INTERNAL_HEAP ||
226 allocationType == AllocationType::LINEAR_STREAM ||
227 allocationType == AllocationType::PIPE ||
228 allocationType == AllocationType::PRINTF_SURFACE ||
229 allocationType == AllocationType::TIMESTAMP_PACKET_TAG_BUFFER ||
230 allocationType == AllocationType::RING_BUFFER ||
231 allocationType == AllocationType::SEMAPHORE_BUFFER ||
232 allocationType == AllocationType::DEBUG_CONTEXT_SAVE_AREA ||
233 allocationType == AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER ||
234 allocationType == AllocationType::DEBUG_MODULE_AREA;
235 }
isLockable(AllocationType allocationType)236 static bool isLockable(AllocationType allocationType) {
237 return isCpuAccessRequired(allocationType) ||
238 isIsaAllocationType(allocationType) ||
239 allocationType == AllocationType::BUFFER_HOST_MEMORY ||
240 allocationType == AllocationType::SHARED_RESOURCE_COPY;
241 }
242
isIsaAllocationType(GraphicsAllocation::AllocationType type)243 static bool isIsaAllocationType(GraphicsAllocation::AllocationType type) {
244 return type == GraphicsAllocation::AllocationType::KERNEL_ISA ||
245 type == GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL ||
246 type == AllocationType::DEBUG_MODULE_AREA;
247 }
248
getReservedAddressPtr()249 void *getReservedAddressPtr() const {
250 return this->reservedAddressRangeInfo.addressPtr;
251 }
getReservedAddressSize()252 size_t getReservedAddressSize() const {
253 return this->reservedAddressRangeInfo.rangeSize;
254 }
setReservedAddressRange(void * reserveAddress,size_t size)255 void setReservedAddressRange(void *reserveAddress, size_t size) {
256 this->reservedAddressRangeInfo.addressPtr = reserveAddress;
257 this->reservedAddressRangeInfo.rangeSize = size;
258 }
259
getDefaultGmm()260 Gmm *getDefaultGmm() const {
261 return getGmm(0u);
262 }
getGmm(uint32_t handleId)263 Gmm *getGmm(uint32_t handleId) const {
264 return gmms[handleId];
265 }
setDefaultGmm(Gmm * gmm)266 void setDefaultGmm(Gmm *gmm) {
267 setGmm(gmm, 0u);
268 }
setGmm(Gmm * gmm,uint32_t handleId)269 void setGmm(Gmm *gmm, uint32_t handleId) {
270 gmms[handleId] = gmm;
271 }
resizeGmms(uint32_t size)272 void resizeGmms(uint32_t size) {
273 gmms.resize(size);
274 }
275
getNumGmms()276 uint32_t getNumGmms() const {
277 return static_cast<uint32_t>(gmms.size());
278 }
279
280 uint32_t getUsedPageSize() const;
281
isAllocatedInLocalMemoryPool()282 bool isAllocatedInLocalMemoryPool() const { return (this->memoryPool == MemoryPool::LocalMemory); }
283 bool isAllocationLockable() const;
284
getAubInfo()285 const AubInfo &getAubInfo() const { return aubInfo; }
286
287 bool isCompressionEnabled() const;
288
289 OsHandleStorage fragmentsStorage;
290 StorageInfo storageInfo = {};
291
292 static constexpr uint32_t defaultBank = 0b1u;
293 static constexpr uint32_t allBanks = 0xffffffff;
294 constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
295 constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
296 constexpr static uint32_t objectAlwaysResident = std::numeric_limits<uint32_t>::max() - 1;
297
298 protected:
299 struct UsageInfo {
300 uint32_t taskCount = objectNotUsed;
301 uint32_t residencyTaskCount = objectNotResident;
302 uint32_t inspectionId = 0u;
303 };
304
305 struct SharingInfo {
306 uint32_t reuseCount = 0;
307 osHandle sharedHandle = Sharing::nonSharedResource;
308 };
309 struct AllocationInfo {
310 union {
311 struct {
312 uint32_t coherent : 1;
313 uint32_t evictable : 1;
314 uint32_t flushL3Required : 1;
315 uint32_t uncacheable : 1;
316 uint32_t is32BitAllocation : 1;
317 uint32_t reserved : 27;
318 } flags;
319 uint32_t allFlags = 0u;
320 };
321 static_assert(sizeof(AllocationInfo::flags) == sizeof(AllocationInfo::allFlags), "");
AllocationInfoAllocationInfo322 AllocationInfo() {
323 flags.coherent = false;
324 flags.evictable = true;
325 flags.flushL3Required = true;
326 flags.is32BitAllocation = false;
327 }
328 };
329
330 struct ReservedAddressRange {
331 void *addressPtr = nullptr;
332 size_t rangeSize = 0;
333 };
334
335 friend class SubmissionAggregator;
336
337 const uint32_t rootDeviceIndex;
338 AllocationInfo allocationInfo;
339 AubInfo aubInfo;
340 SharingInfo sharingInfo;
341 ReservedAddressRange reservedAddressRangeInfo;
342
343 uint64_t allocationOffset = 0u;
344 uint64_t gpuBaseAddress = 0;
345 uint64_t gpuAddress = 0;
346 void *driverAllocatedCpuPointer = nullptr;
347 size_t size = 0;
348 void *cpuPtr = nullptr;
349 void *lockedPtr = nullptr;
350
351 MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
352 AllocationType allocationType = AllocationType::UNKNOWN;
353
354 StackVec<UsageInfo, 32> usageInfos;
355 std::atomic<uint32_t> registeredContextsNum{0};
356 StackVec<Gmm *, EngineLimits::maxHandleCount> gmms;
357 };
358 } // namespace NEO
359