1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 
10 #include "shared/source/gmm_helper/gmm_helper.h"
11 #include "shared/source/helpers/constants.h"
12 #include "shared/source/helpers/debug_helpers.h"
13 #include "shared/source/helpers/ptr_math.h"
14 #include "shared/source/memory_manager/definitions/engine_limits.h"
15 #include "shared/source/memory_manager/definitions/storage_info.h"
16 #include "shared/source/memory_manager/host_ptr_defines.h"
17 #include "shared/source/memory_manager/memory_pool.h"
18 #include "shared/source/utilities/idlist.h"
19 #include "shared/source/utilities/stackvec.h"
20 
21 #include <array>
22 #include <atomic>
23 #include <cstddef>
24 #include <cstdint>
25 #include <limits>
26 #include <mutex>
27 #include <vector>
28 
29 namespace NEO {
30 
31 using osHandle = unsigned int;
toOsHandle(const void * handle)32 inline osHandle toOsHandle(const void *handle) {
33 
34     return static_cast<osHandle>(castToUint64(handle));
35 }
36 
37 enum class HeapIndex : uint32_t;
38 
39 namespace Sharing {
40 constexpr auto nonSharedResource = 0u;
41 }
42 
43 class Gmm;
44 class MemoryManager;
45 
46 struct AubInfo {
47     uint32_t aubWritable = std::numeric_limits<uint32_t>::max();
48     uint32_t tbxWritable = std::numeric_limits<uint32_t>::max();
49     bool allocDumpable = false;
50     bool bcsDumpOnly = false;
51     bool memObjectsAllocationWithWritableFlags = false;
52 };
53 
54 class GraphicsAllocation : public IDNode<GraphicsAllocation> {
55   public:
56     enum class AllocationType {
57         UNKNOWN = 0,
58         BUFFER,
59         BUFFER_HOST_MEMORY,
60         COMMAND_BUFFER,
61         CONSTANT_SURFACE,
62         DEVICE_QUEUE_BUFFER,
63         EXTERNAL_HOST_PTR,
64         FILL_PATTERN,
65         GLOBAL_SURFACE,
66         IMAGE,
67         INDIRECT_OBJECT_HEAP,
68         INSTRUCTION_HEAP,
69         INTERNAL_HEAP,
70         INTERNAL_HOST_MEMORY,
71         KERNEL_ISA,
72         KERNEL_ISA_INTERNAL,
73         LINEAR_STREAM,
74         MAP_ALLOCATION,
75         MCS,
76         PIPE,
77         PREEMPTION,
78         PRINTF_SURFACE,
79         PRIVATE_SURFACE,
80         PROFILING_TAG_BUFFER,
81         SCRATCH_SURFACE,
82         SHARED_BUFFER,
83         SHARED_CONTEXT_IMAGE,
84         SHARED_IMAGE,
85         SHARED_RESOURCE_COPY,
86         SURFACE_STATE_HEAP,
87         SVM_CPU,
88         SVM_GPU,
89         SVM_ZERO_COPY,
90         TAG_BUFFER,
91         GLOBAL_FENCE,
92         TIMESTAMP_PACKET_TAG_BUFFER,
93         WRITE_COMBINED,
94         RING_BUFFER,
95         SEMAPHORE_BUFFER,
96         DEBUG_CONTEXT_SAVE_AREA,
97         DEBUG_SBA_TRACKING_BUFFER,
98         DEBUG_MODULE_AREA,
99         UNIFIED_SHARED_MEMORY,
100         WORK_PARTITION_SURFACE,
101         GPU_TIMESTAMP_DEVICE_BUFFER,
102         SW_TAG_BUFFER,
103         COUNT
104     };
105 
106     ~GraphicsAllocation() override;
107     GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
108     GraphicsAllocation(const GraphicsAllocation &) = delete;
109 
GraphicsAllocation(uint32_t rootDeviceIndex,AllocationType allocationType,void * cpuPtrIn,uint64_t gpuAddress,uint64_t baseAddress,size_t sizeIn,MemoryPool::Type pool,size_t maxOsContextCount)110     GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn,
111                        uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool, size_t maxOsContextCount)
112         : GraphicsAllocation(rootDeviceIndex, 1, allocationType, cpuPtrIn, gpuAddress, baseAddress, sizeIn, pool, maxOsContextCount) {}
113 
GraphicsAllocation(uint32_t rootDeviceIndex,AllocationType allocationType,void * cpuPtrIn,size_t sizeIn,osHandle sharedHandleIn,MemoryPool::Type pool,size_t maxOsContextCount)114     GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn,
115                        size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool, size_t maxOsContextCount)
116         : GraphicsAllocation(rootDeviceIndex, 1, allocationType, cpuPtrIn, sizeIn, sharedHandleIn, pool, maxOsContextCount) {}
117 
118     GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms, AllocationType allocationType, void *cpuPtrIn,
119                        uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool, size_t maxOsContextCount);
120 
121     GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms, AllocationType allocationType, void *cpuPtrIn,
122                        size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool, size_t maxOsContextCount);
123 
getRootDeviceIndex()124     uint32_t getRootDeviceIndex() const { return rootDeviceIndex; }
getUnderlyingBuffer()125     void *getUnderlyingBuffer() const { return cpuPtr; }
getDriverAllocatedCpuPtr()126     void *getDriverAllocatedCpuPtr() const { return driverAllocatedCpuPointer; }
setDriverAllocatedCpuPtr(void * allocatedCpuPtr)127     void setDriverAllocatedCpuPtr(void *allocatedCpuPtr) { driverAllocatedCpuPointer = allocatedCpuPtr; }
128 
setCpuPtrAndGpuAddress(void * cpuPtr,uint64_t gpuAddress)129     void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
130         this->cpuPtr = cpuPtr;
131         this->gpuAddress = GmmHelper::canonize(gpuAddress);
132     }
getUnderlyingBufferSize()133     size_t getUnderlyingBufferSize() const { return size; }
setSize(size_t size)134     void setSize(size_t size) { this->size = size; }
135 
getAllocationOffset()136     uint64_t getAllocationOffset() const {
137         return allocationOffset;
138     }
setAllocationOffset(uint64_t offset)139     void setAllocationOffset(uint64_t offset) {
140         allocationOffset = offset;
141     }
142 
getGpuBaseAddress()143     uint64_t getGpuBaseAddress() const {
144         return gpuBaseAddress;
145     }
setGpuBaseAddress(uint64_t baseAddress)146     void setGpuBaseAddress(uint64_t baseAddress) {
147         gpuBaseAddress = baseAddress;
148     }
getGpuAddress()149     uint64_t getGpuAddress() const {
150         DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
151         return gpuAddress + allocationOffset;
152     }
getGpuAddressToPatch()153     uint64_t getGpuAddressToPatch() const {
154         DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
155         return gpuAddress + allocationOffset - gpuBaseAddress;
156     }
157 
lock(void * ptr)158     void lock(void *ptr) { lockedPtr = ptr; }
unlock()159     void unlock() { lockedPtr = nullptr; }
isLocked()160     bool isLocked() const { return lockedPtr != nullptr; }
getLockedPtr()161     void *getLockedPtr() const { return lockedPtr; }
162 
isCoherent()163     bool isCoherent() const { return allocationInfo.flags.coherent; }
setCoherent(bool coherentIn)164     void setCoherent(bool coherentIn) { allocationInfo.flags.coherent = coherentIn; }
setEvictable(bool evictable)165     void setEvictable(bool evictable) { allocationInfo.flags.evictable = evictable; }
peekEvictable()166     bool peekEvictable() const { return allocationInfo.flags.evictable; }
isFlushL3Required()167     bool isFlushL3Required() const { return allocationInfo.flags.flushL3Required; }
setFlushL3Required(bool flushL3Required)168     void setFlushL3Required(bool flushL3Required) { allocationInfo.flags.flushL3Required = flushL3Required; }
169 
isUncacheable()170     bool isUncacheable() const { return allocationInfo.flags.uncacheable; }
setUncacheable(bool uncacheable)171     void setUncacheable(bool uncacheable) { allocationInfo.flags.uncacheable = uncacheable; }
is32BitAllocation()172     bool is32BitAllocation() const { return allocationInfo.flags.is32BitAllocation; }
set32BitAllocation(bool is32BitAllocation)173     void set32BitAllocation(bool is32BitAllocation) { allocationInfo.flags.is32BitAllocation = is32BitAllocation; }
174 
175     void setAubWritable(bool writable, uint32_t banks);
176     bool isAubWritable(uint32_t banks) const;
177     void setTbxWritable(bool writable, uint32_t banks);
178     bool isTbxWritable(uint32_t banks) const;
setAllocDumpable(bool dumpable,bool bcsDumpOnly)179     void setAllocDumpable(bool dumpable, bool bcsDumpOnly) {
180         aubInfo.allocDumpable = dumpable;
181         aubInfo.bcsDumpOnly = bcsDumpOnly;
182     }
isAllocDumpable()183     bool isAllocDumpable() const { return aubInfo.allocDumpable; }
isMemObjectsAllocationWithWritableFlags()184     bool isMemObjectsAllocationWithWritableFlags() const { return aubInfo.memObjectsAllocationWithWritableFlags; }
setMemObjectsAllocationWithWritableFlags(bool newValue)185     void setMemObjectsAllocationWithWritableFlags(bool newValue) { aubInfo.memObjectsAllocationWithWritableFlags = newValue; }
186 
incReuseCount()187     void incReuseCount() { sharingInfo.reuseCount++; }
decReuseCount()188     void decReuseCount() { sharingInfo.reuseCount--; }
peekReuseCount()189     uint32_t peekReuseCount() const { return sharingInfo.reuseCount; }
peekSharedHandle()190     osHandle peekSharedHandle() const { return sharingInfo.sharedHandle; }
setSharedHandle(osHandle handle)191     void setSharedHandle(osHandle handle) { sharingInfo.sharedHandle = handle; }
192 
193     void setAllocationType(AllocationType allocationType);
getAllocationType()194     AllocationType getAllocationType() const { return allocationType; }
195 
getMemoryPool()196     MemoryPool::Type getMemoryPool() const { return memoryPool; }
197 
isUsed()198     bool isUsed() const { return registeredContextsNum > 0; }
isUsedByManyOsContexts()199     bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
isUsedByOsContext(uint32_t contextId)200     bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
201     void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
getTaskCount(uint32_t contextId)202     uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
releaseUsageInOsContext(uint32_t contextId)203     void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
getInspectionId(uint32_t contextId)204     uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
setInspectionId(uint32_t newInspectionId,uint32_t contextId)205     void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
206 
isResident(uint32_t contextId)207     bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
isAlwaysResident(uint32_t contextId)208     bool isAlwaysResident(uint32_t contextId) const { return GraphicsAllocation::objectAlwaysResident == getResidencyTaskCount(contextId); }
updateResidencyTaskCount(uint32_t newTaskCount,uint32_t contextId)209     void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) {
210         if (usageInfos[contextId].residencyTaskCount != GraphicsAllocation::objectAlwaysResident || newTaskCount == GraphicsAllocation::objectNotResident) {
211             usageInfos[contextId].residencyTaskCount = newTaskCount;
212         }
213     }
getResidencyTaskCount(uint32_t contextId)214     uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
releaseResidencyInOsContext(uint32_t contextId)215     void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); }
isResidencyTaskCountBelow(uint32_t taskCount,uint32_t contextId)216     bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
217 
218     virtual std::string getAllocationInfoString() const;
peekInternalHandle(MemoryManager * memoryManager)219     virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; }
220 
isCpuAccessRequired(AllocationType allocationType)221     static bool isCpuAccessRequired(AllocationType allocationType) {
222         return allocationType == AllocationType::COMMAND_BUFFER ||
223                allocationType == AllocationType::CONSTANT_SURFACE ||
224                allocationType == AllocationType::GLOBAL_SURFACE ||
225                allocationType == AllocationType::INTERNAL_HEAP ||
226                allocationType == AllocationType::LINEAR_STREAM ||
227                allocationType == AllocationType::PIPE ||
228                allocationType == AllocationType::PRINTF_SURFACE ||
229                allocationType == AllocationType::TIMESTAMP_PACKET_TAG_BUFFER ||
230                allocationType == AllocationType::RING_BUFFER ||
231                allocationType == AllocationType::SEMAPHORE_BUFFER ||
232                allocationType == AllocationType::DEBUG_CONTEXT_SAVE_AREA ||
233                allocationType == AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER ||
234                allocationType == AllocationType::DEBUG_MODULE_AREA;
235     }
isLockable(AllocationType allocationType)236     static bool isLockable(AllocationType allocationType) {
237         return isCpuAccessRequired(allocationType) ||
238                isIsaAllocationType(allocationType) ||
239                allocationType == AllocationType::BUFFER_HOST_MEMORY ||
240                allocationType == AllocationType::SHARED_RESOURCE_COPY;
241     }
242 
isIsaAllocationType(GraphicsAllocation::AllocationType type)243     static bool isIsaAllocationType(GraphicsAllocation::AllocationType type) {
244         return type == GraphicsAllocation::AllocationType::KERNEL_ISA ||
245                type == GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL ||
246                type == AllocationType::DEBUG_MODULE_AREA;
247     }
248 
getReservedAddressPtr()249     void *getReservedAddressPtr() const {
250         return this->reservedAddressRangeInfo.addressPtr;
251     }
getReservedAddressSize()252     size_t getReservedAddressSize() const {
253         return this->reservedAddressRangeInfo.rangeSize;
254     }
setReservedAddressRange(void * reserveAddress,size_t size)255     void setReservedAddressRange(void *reserveAddress, size_t size) {
256         this->reservedAddressRangeInfo.addressPtr = reserveAddress;
257         this->reservedAddressRangeInfo.rangeSize = size;
258     }
259 
getDefaultGmm()260     Gmm *getDefaultGmm() const {
261         return getGmm(0u);
262     }
getGmm(uint32_t handleId)263     Gmm *getGmm(uint32_t handleId) const {
264         return gmms[handleId];
265     }
setDefaultGmm(Gmm * gmm)266     void setDefaultGmm(Gmm *gmm) {
267         setGmm(gmm, 0u);
268     }
setGmm(Gmm * gmm,uint32_t handleId)269     void setGmm(Gmm *gmm, uint32_t handleId) {
270         gmms[handleId] = gmm;
271     }
resizeGmms(uint32_t size)272     void resizeGmms(uint32_t size) {
273         gmms.resize(size);
274     }
275 
getNumGmms()276     uint32_t getNumGmms() const {
277         return static_cast<uint32_t>(gmms.size());
278     }
279 
280     uint32_t getUsedPageSize() const;
281 
isAllocatedInLocalMemoryPool()282     bool isAllocatedInLocalMemoryPool() const { return (this->memoryPool == MemoryPool::LocalMemory); }
283     bool isAllocationLockable() const;
284 
getAubInfo()285     const AubInfo &getAubInfo() const { return aubInfo; }
286 
287     bool isCompressionEnabled() const;
288 
289     OsHandleStorage fragmentsStorage;
290     StorageInfo storageInfo = {};
291 
292     static constexpr uint32_t defaultBank = 0b1u;
293     static constexpr uint32_t allBanks = 0xffffffff;
294     constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
295     constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
296     constexpr static uint32_t objectAlwaysResident = std::numeric_limits<uint32_t>::max() - 1;
297 
298   protected:
299     struct UsageInfo {
300         uint32_t taskCount = objectNotUsed;
301         uint32_t residencyTaskCount = objectNotResident;
302         uint32_t inspectionId = 0u;
303     };
304 
305     struct SharingInfo {
306         uint32_t reuseCount = 0;
307         osHandle sharedHandle = Sharing::nonSharedResource;
308     };
309     struct AllocationInfo {
310         union {
311             struct {
312                 uint32_t coherent : 1;
313                 uint32_t evictable : 1;
314                 uint32_t flushL3Required : 1;
315                 uint32_t uncacheable : 1;
316                 uint32_t is32BitAllocation : 1;
317                 uint32_t reserved : 27;
318             } flags;
319             uint32_t allFlags = 0u;
320         };
321         static_assert(sizeof(AllocationInfo::flags) == sizeof(AllocationInfo::allFlags), "");
AllocationInfoAllocationInfo322         AllocationInfo() {
323             flags.coherent = false;
324             flags.evictable = true;
325             flags.flushL3Required = true;
326             flags.is32BitAllocation = false;
327         }
328     };
329 
330     struct ReservedAddressRange {
331         void *addressPtr = nullptr;
332         size_t rangeSize = 0;
333     };
334 
335     friend class SubmissionAggregator;
336 
337     const uint32_t rootDeviceIndex;
338     AllocationInfo allocationInfo;
339     AubInfo aubInfo;
340     SharingInfo sharingInfo;
341     ReservedAddressRange reservedAddressRangeInfo;
342 
343     uint64_t allocationOffset = 0u;
344     uint64_t gpuBaseAddress = 0;
345     uint64_t gpuAddress = 0;
346     void *driverAllocatedCpuPointer = nullptr;
347     size_t size = 0;
348     void *cpuPtr = nullptr;
349     void *lockedPtr = nullptr;
350 
351     MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
352     AllocationType allocationType = AllocationType::UNKNOWN;
353 
354     StackVec<UsageInfo, 32> usageInfos;
355     std::atomic<uint32_t> registeredContextsNum{0};
356     StackVec<Gmm *, EngineLimits::maxHandleCount> gmms;
357 };
358 } // namespace NEO
359