1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 
10 #include "shared/source/command_container/cmdcontainer.h"
11 #include "shared/source/command_stream/preemption_mode.h"
12 #include "shared/source/command_stream/stream_properties.h"
13 #include "shared/source/command_stream/thread_arbitration_policy.h"
14 
15 #include "level_zero/core/source/cmdqueue/cmdqueue.h"
16 #include "level_zero/core/source/device/device.h"
17 #include "level_zero/core/source/kernel/kernel.h"
18 #include <level_zero/ze_api.h>
19 #include <level_zero/zet_api.h>
20 
21 #include <vector>
22 
23 struct _ze_command_list_handle_t {};
24 
25 namespace L0 {
26 struct EventPool;
27 struct Event;
28 struct Kernel;
29 
30 struct CommandList : _ze_command_list_handle_t {
31     static constexpr uint32_t defaultNumIddsPerBlock = 64u;
32     static constexpr uint32_t commandListimmediateIddsPerBlock = 1u;
33 
34     CommandList() = delete;
CommandListCommandList35     CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {}
36 
37     template <typename Type>
38     struct Allocator {
allocateCommandList::Allocator39         static CommandList *allocate(uint32_t numIddsPerBlock) { return new Type(numIddsPerBlock); }
40     };
41 
42     struct CommandToPatch {
43         enum CommandType {
44             FrontEndState,
45             Invalid
46         };
47         void *pDestination = nullptr;
48         void *pCommand = nullptr;
49         CommandType type = Invalid;
50     };
51     using CommandsToPatch = StackVec<CommandToPatch, 16>;
52 
53     virtual ze_result_t close() = 0;
54     virtual ze_result_t destroy() = 0;
55     virtual ze_result_t appendEventReset(ze_event_handle_t hEvent) = 0;
56     virtual ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
57                                       ze_event_handle_t *phWaitEvents) = 0;
58     virtual ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
59                                                   const void **pRanges,
60                                                   ze_event_handle_t hSignalEvent,
61                                                   uint32_t numWaitEvents,
62                                                   ze_event_handle_t *phWaitEvents) = 0;
63     virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr,
64                                                   const ze_image_region_t *pDstRegion,
65                                                   ze_event_handle_t hEvent, uint32_t numWaitEvents,
66                                                   ze_event_handle_t *phWaitEvents) = 0;
67     virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
68                                                 const ze_image_region_t *pSrcRegion,
69                                                 ze_event_handle_t hEvent, uint32_t numWaitEvents,
70                                                 ze_event_handle_t *phWaitEvents) = 0;
71     virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
72                                               const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
73                                               ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
74                                               ze_event_handle_t *phWaitEvents) = 0;
75     virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
76                                         ze_event_handle_t hEvent, uint32_t numWaitEvents,
77                                         ze_event_handle_t *phWaitEvents) = 0;
78     virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions,
79                                            ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
80     virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
81                                                       const ze_group_count_t *pLaunchFuncArgs,
82                                                       ze_event_handle_t hSignalEvent,
83                                                       uint32_t numWaitEvents,
84                                                       ze_event_handle_t *phWaitEvents) = 0;
85     virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
86                                                    const ze_group_count_t *pDispatchArgumentsBuffer,
87                                                    ze_event_handle_t hEvent, uint32_t numWaitEvents,
88                                                    ze_event_handle_t *phWaitEvents) = 0;
89     virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels,
90                                                             const uint32_t *pNumLaunchArguments,
91                                                             const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
92                                                             uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
93     virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size,
94                                         ze_memory_advice_t advice) = 0;
95     virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
96                                          ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
97                                          ze_event_handle_t *phWaitEvents) = 0;
98     virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0;
99     virtual ze_result_t appendMemoryCopyRegion(void *dstPtr,
100                                                const ze_copy_region_t *dstRegion,
101                                                uint32_t dstPitch,
102                                                uint32_t dstSlicePitch,
103                                                const void *srcPtr,
104                                                const ze_copy_region_t *srcRegion,
105                                                uint32_t srcPitch,
106                                                uint32_t srcSlicePitch,
107                                                ze_event_handle_t hSignalEvent,
108                                                uint32_t numWaitEvents,
109                                                ze_event_handle_t *phWaitEvents) = 0;
110     virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern,
111                                          size_t patternSize, size_t size, ze_event_handle_t hSignalEvent,
112                                          uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
113     virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
114     virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
115     virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0;
116     virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
117                                                    uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
118     virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
119                                                     const void *srcptr, size_t size, ze_event_handle_t hSignalEvent,
120                                                     uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
121 
122     virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0;
123     virtual ze_result_t reset() = 0;
124 
125     virtual ze_result_t appendMetricMemoryBarrier() = 0;
126     virtual ze_result_t appendMetricStreamerMarker(zet_metric_streamer_handle_t hMetricStreamer,
127                                                    uint32_t value) = 0;
128     virtual ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) = 0;
129     virtual ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent,
130                                              uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
131 
132     virtual ze_result_t appendQueryKernelTimestamps(uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr,
133                                                     const size_t *pOffsets, ze_event_handle_t hSignalEvent,
134                                                     uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
135 
136     virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0;
137     virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0;
138     virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0;
139     virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0;
140     virtual ze_result_t appendMIMath(void *aluArray, size_t aluCount) = 0;
141     virtual ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) = 0;
142     virtual ze_result_t appendMIBBEnd() = 0;
143     virtual ze_result_t appendMINoop() = 0;
144     virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0;
145     virtual ze_result_t appendWaitOnMemory(void *desc, void *ptr,
146                                            uint32_t data, ze_event_handle_t hSignalEvent) = 0;
147     virtual ze_result_t appendWriteToMemory(void *desc, void *ptr,
148                                             uint64_t data) = 0;
149 
150     static CommandList *create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType,
151                                ze_command_list_flags_t flags, ze_result_t &resultValue);
152     static CommandList *createImmediate(uint32_t productFamily, Device *device,
153                                         const ze_command_queue_desc_t *desc,
154                                         bool internalUsage, NEO::EngineGroupType engineGroupType,
155                                         ze_result_t &resultValue);
156 
fromHandleCommandList157     static CommandList *fromHandle(ze_command_list_handle_t handle) {
158         return static_cast<CommandList *>(handle);
159     }
160 
toHandleCommandList161     inline ze_command_list_handle_t toHandle() { return this; }
162 
getCommandListPerThreadScratchSizeCommandList163     uint32_t getCommandListPerThreadScratchSize() const {
164         return commandListPerThreadScratchSize;
165     }
166 
setCommandListPerThreadScratchSizeCommandList167     void setCommandListPerThreadScratchSize(uint32_t size) {
168         commandListPerThreadScratchSize = size;
169     }
170 
getCommandListPerThreadPrivateScratchSizeCommandList171     uint32_t getCommandListPerThreadPrivateScratchSize() const {
172         return commandListPerThreadPrivateScratchSize;
173     }
174 
setCommandListPerThreadPrivateScratchSizeCommandList175     void setCommandListPerThreadPrivateScratchSize(uint32_t size) {
176         commandListPerThreadPrivateScratchSize = size;
177     }
178 
getCommandListSLMEnableCommandList179     uint32_t getCommandListSLMEnable() const {
180         return commandListSLMEnabled;
181     }
182 
setCommandListSLMEnableCommandList183     void setCommandListSLMEnable(bool isSLMEnabled) {
184         commandListSLMEnabled = isSLMEnabled;
185     }
186 
getCommandListPreemptionModeCommandList187     NEO::PreemptionMode getCommandListPreemptionMode() const {
188         return commandListPreemptionMode;
189     }
190 
getThreadArbitrationPolicyCommandList191     uint32_t getThreadArbitrationPolicy() const {
192         return threadArbitrationPolicy;
193     }
194 
getUnifiedMemoryControlsCommandList195     UnifiedMemoryControls getUnifiedMemoryControls() const {
196         return unifiedMemoryControls;
197     }
198 
hasIndirectAllocationsAllowedCommandList199     bool hasIndirectAllocationsAllowed() const {
200         return indirectAllocationsAllowed;
201     }
202 
203     NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel);
204 
getPrintfFunctionContainerCommandList205     std::vector<Kernel *> &getPrintfFunctionContainer() {
206         return this->printfFunctionContainer;
207     }
208 
209     void storePrintfFunction(Kernel *kernel);
210     void removeDeallocationContainerData();
211     void removeHostPtrAllocations();
212     void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
213     void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
214     bool isCopyOnly() const;
isInternalCommandList215     bool isInternal() const {
216         return internalUsage;
217     }
containsCooperativeKernelsCommandList218     bool containsCooperativeKernels() const {
219         return containsCooperativeKernelsFlag;
220     }
221 
222     enum CommandListType : uint32_t {
223         TYPE_REGULAR = 0u,
224         TYPE_IMMEDIATE = 1u
225     };
226 
227     virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
228     virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) = 0;
229     virtual ~CommandList();
230     NEO::CommandContainer commandContainer;
getContainsStatelessUncachedResourceCommandList231     bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; }
getHostPtrMapCommandList232     std::map<const void *, NEO::GraphicsAllocation *> &getHostPtrMap() {
233         return hostPtrMap;
234     };
235 
getRequiredStreamStateCommandList236     const NEO::StreamProperties &getRequiredStreamState() {
237         return requiredStreamState;
238     }
getFinalStreamStateCommandList239     const NEO::StreamProperties &getFinalStreamState() {
240         return finalStreamState;
241     }
getCommandsToPatchCommandList242     const CommandsToPatch &getCommandsToPatch() {
243         return commandsToPatch;
244     }
245 
246     void makeResidentAndMigrate(bool);
247     void migrateSharedAllocations();
248 
249     std::vector<Kernel *> printfFunctionContainer;
250     CommandQueue *cmdQImmediate = nullptr;
251     NEO::CommandStreamReceiver *csr = nullptr;
252     Device *device = nullptr;
253     NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
254     uint32_t cmdListType = CommandListType::TYPE_REGULAR;
255     uint32_t commandListPerThreadScratchSize = 0u;
256     uint32_t commandListPerThreadPrivateScratchSize = 0u;
257     uint32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
258     uint32_t partitionCount = 1;
259     bool isFlushTaskSubmissionEnabled = false;
260     bool isSyncModeQueue = false;
261     bool commandListSLMEnabled = false;
262     bool requiresQueueUncachedMocs = false;
263 
264   protected:
265     NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
266     NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
267 
268     std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
269     std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
270     std::vector<NEO::GraphicsAllocation *> patternAllocations;
271 
272     NEO::StreamProperties requiredStreamState{};
273     NEO::StreamProperties finalStreamState{};
274     CommandsToPatch commandsToPatch{};
275 
276     ze_command_list_flags_t flags = 0u;
277     UnifiedMemoryControls unifiedMemoryControls;
278 
279     NEO::EngineGroupType engineGroupType;
280     bool indirectAllocationsAllowed = false;
281     bool internalUsage = false;
282     bool containsCooperativeKernelsFlag = false;
283     bool containsStatelessUncachedResource = false;
284 };
285 
286 using CommandListAllocatorFn = CommandList *(*)(uint32_t);
287 extern CommandListAllocatorFn commandListFactory[];
288 extern CommandListAllocatorFn commandListFactoryImmediate[];
289 
290 template <uint32_t productFamily, typename CommandListType>
291 struct CommandListPopulateFactory {
CommandListPopulateFactoryCommandListPopulateFactory292     CommandListPopulateFactory() {
293         commandListFactory[productFamily] = CommandList::Allocator<CommandListType>::allocate;
294     }
295 };
296 
297 template <uint32_t productFamily, typename CommandListType>
298 struct CommandListImmediatePopulateFactory {
CommandListImmediatePopulateFactoryCommandListImmediatePopulateFactory299     CommandListImmediatePopulateFactory() {
300         commandListFactoryImmediate[productFamily] = CommandList::Allocator<CommandListType>::allocate;
301     }
302 };
303 
304 } // namespace L0
305