1 /* 2 * Copyright (C) 2020-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 10 #include "shared/source/command_container/cmdcontainer.h" 11 #include "shared/source/command_stream/preemption_mode.h" 12 #include "shared/source/command_stream/stream_properties.h" 13 #include "shared/source/command_stream/thread_arbitration_policy.h" 14 15 #include "level_zero/core/source/cmdqueue/cmdqueue.h" 16 #include "level_zero/core/source/device/device.h" 17 #include "level_zero/core/source/kernel/kernel.h" 18 #include <level_zero/ze_api.h> 19 #include <level_zero/zet_api.h> 20 21 #include <vector> 22 23 struct _ze_command_list_handle_t {}; 24 25 namespace L0 { 26 struct EventPool; 27 struct Event; 28 struct Kernel; 29 30 struct CommandList : _ze_command_list_handle_t { 31 static constexpr uint32_t defaultNumIddsPerBlock = 64u; 32 static constexpr uint32_t commandListimmediateIddsPerBlock = 1u; 33 34 CommandList() = delete; CommandListCommandList35 CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {} 36 37 template <typename Type> 38 struct Allocator { allocateCommandList::Allocator39 static CommandList *allocate(uint32_t numIddsPerBlock) { return new Type(numIddsPerBlock); } 40 }; 41 42 struct CommandToPatch { 43 enum CommandType { 44 FrontEndState, 45 Invalid 46 }; 47 void *pDestination = nullptr; 48 void *pCommand = nullptr; 49 CommandType type = Invalid; 50 }; 51 using CommandsToPatch = StackVec<CommandToPatch, 16>; 52 53 virtual ze_result_t close() = 0; 54 virtual ze_result_t destroy() = 0; 55 virtual ze_result_t appendEventReset(ze_event_handle_t hEvent) = 0; 56 virtual ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, 57 ze_event_handle_t *phWaitEvents) = 0; 58 virtual ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, 59 const void **pRanges, 60 ze_event_handle_t hSignalEvent, 61 uint32_t numWaitEvents, 62 ze_event_handle_t *phWaitEvents) = 0; 63 virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr, 64 const ze_image_region_t *pDstRegion, 65 ze_event_handle_t hEvent, uint32_t numWaitEvents, 66 ze_event_handle_t *phWaitEvents) = 0; 67 virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage, 68 const ze_image_region_t *pSrcRegion, 69 ze_event_handle_t hEvent, uint32_t numWaitEvents, 70 ze_event_handle_t *phWaitEvents) = 0; 71 virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, 72 const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, 73 ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, 74 ze_event_handle_t *phWaitEvents) = 0; 75 virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, 76 ze_event_handle_t hEvent, uint32_t numWaitEvents, 77 ze_event_handle_t *phWaitEvents) = 0; 78 virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, 79 ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 80 virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, 81 const ze_group_count_t *pLaunchFuncArgs, 82 ze_event_handle_t hSignalEvent, 83 uint32_t numWaitEvents, 84 ze_event_handle_t *phWaitEvents) = 0; 85 virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, 86 const ze_group_count_t *pDispatchArgumentsBuffer, 87 ze_event_handle_t hEvent, uint32_t numWaitEvents, 88 ze_event_handle_t *phWaitEvents) = 0; 89 virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels, 90 const uint32_t *pNumLaunchArguments, 91 const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, 92 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 93 virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, 94 ze_memory_advice_t advice) = 0; 95 virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, 96 ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, 97 ze_event_handle_t *phWaitEvents) = 0; 98 virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0; 99 virtual ze_result_t appendMemoryCopyRegion(void *dstPtr, 100 const ze_copy_region_t *dstRegion, 101 uint32_t dstPitch, 102 uint32_t dstSlicePitch, 103 const void *srcPtr, 104 const ze_copy_region_t *srcRegion, 105 uint32_t srcPitch, 106 uint32_t srcSlicePitch, 107 ze_event_handle_t hSignalEvent, 108 uint32_t numWaitEvents, 109 ze_event_handle_t *phWaitEvents) = 0; 110 virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern, 111 size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, 112 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 113 virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; 114 virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; 115 virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0; 116 virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, 117 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 118 virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, 119 const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, 120 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 121 122 virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0; 123 virtual ze_result_t reset() = 0; 124 125 virtual ze_result_t appendMetricMemoryBarrier() = 0; 126 virtual ze_result_t appendMetricStreamerMarker(zet_metric_streamer_handle_t hMetricStreamer, 127 uint32_t value) = 0; 128 virtual ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) = 0; 129 virtual ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, 130 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 131 132 virtual ze_result_t appendQueryKernelTimestamps(uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, 133 const size_t *pOffsets, ze_event_handle_t hSignalEvent, 134 uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; 135 136 virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0; 137 virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0; 138 virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0; 139 virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0; 140 virtual ze_result_t appendMIMath(void *aluArray, size_t aluCount) = 0; 141 virtual ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) = 0; 142 virtual ze_result_t appendMIBBEnd() = 0; 143 virtual ze_result_t appendMINoop() = 0; 144 virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0; 145 virtual ze_result_t appendWaitOnMemory(void *desc, void *ptr, 146 uint32_t data, ze_event_handle_t hSignalEvent) = 0; 147 virtual ze_result_t appendWriteToMemory(void *desc, void *ptr, 148 uint64_t data) = 0; 149 150 static CommandList *create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType, 151 ze_command_list_flags_t flags, ze_result_t &resultValue); 152 static CommandList *createImmediate(uint32_t productFamily, Device *device, 153 const ze_command_queue_desc_t *desc, 154 bool internalUsage, NEO::EngineGroupType engineGroupType, 155 ze_result_t &resultValue); 156 fromHandleCommandList157 static CommandList *fromHandle(ze_command_list_handle_t handle) { 158 return static_cast<CommandList *>(handle); 159 } 160 toHandleCommandList161 inline ze_command_list_handle_t toHandle() { return this; } 162 getCommandListPerThreadScratchSizeCommandList163 uint32_t getCommandListPerThreadScratchSize() const { 164 return commandListPerThreadScratchSize; 165 } 166 setCommandListPerThreadScratchSizeCommandList167 void setCommandListPerThreadScratchSize(uint32_t size) { 168 commandListPerThreadScratchSize = size; 169 } 170 getCommandListPerThreadPrivateScratchSizeCommandList171 uint32_t getCommandListPerThreadPrivateScratchSize() const { 172 return commandListPerThreadPrivateScratchSize; 173 } 174 setCommandListPerThreadPrivateScratchSizeCommandList175 void setCommandListPerThreadPrivateScratchSize(uint32_t size) { 176 commandListPerThreadPrivateScratchSize = size; 177 } 178 getCommandListSLMEnableCommandList179 uint32_t getCommandListSLMEnable() const { 180 return commandListSLMEnabled; 181 } 182 setCommandListSLMEnableCommandList183 void setCommandListSLMEnable(bool isSLMEnabled) { 184 commandListSLMEnabled = isSLMEnabled; 185 } 186 getCommandListPreemptionModeCommandList187 NEO::PreemptionMode getCommandListPreemptionMode() const { 188 return commandListPreemptionMode; 189 } 190 getThreadArbitrationPolicyCommandList191 uint32_t getThreadArbitrationPolicy() const { 192 return threadArbitrationPolicy; 193 } 194 getUnifiedMemoryControlsCommandList195 UnifiedMemoryControls getUnifiedMemoryControls() const { 196 return unifiedMemoryControls; 197 } 198 hasIndirectAllocationsAllowedCommandList199 bool hasIndirectAllocationsAllowed() const { 200 return indirectAllocationsAllowed; 201 } 202 203 NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel); 204 getPrintfFunctionContainerCommandList205 std::vector<Kernel *> &getPrintfFunctionContainer() { 206 return this->printfFunctionContainer; 207 } 208 209 void storePrintfFunction(Kernel *kernel); 210 void removeDeallocationContainerData(); 211 void removeHostPtrAllocations(); 212 void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); 213 void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation); 214 bool isCopyOnly() const; isInternalCommandList215 bool isInternal() const { 216 return internalUsage; 217 } containsCooperativeKernelsCommandList218 bool containsCooperativeKernels() const { 219 return containsCooperativeKernelsFlag; 220 } 221 222 enum CommandListType : uint32_t { 223 TYPE_REGULAR = 0u, 224 TYPE_IMMEDIATE = 1u 225 }; 226 227 virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0; 228 virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) = 0; 229 virtual ~CommandList(); 230 NEO::CommandContainer commandContainer; getContainsStatelessUncachedResourceCommandList231 bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; } getHostPtrMapCommandList232 std::map<const void *, NEO::GraphicsAllocation *> &getHostPtrMap() { 233 return hostPtrMap; 234 }; 235 getRequiredStreamStateCommandList236 const NEO::StreamProperties &getRequiredStreamState() { 237 return requiredStreamState; 238 } getFinalStreamStateCommandList239 const NEO::StreamProperties &getFinalStreamState() { 240 return finalStreamState; 241 } getCommandsToPatchCommandList242 const CommandsToPatch &getCommandsToPatch() { 243 return commandsToPatch; 244 } 245 246 void makeResidentAndMigrate(bool); 247 void migrateSharedAllocations(); 248 249 std::vector<Kernel *> printfFunctionContainer; 250 CommandQueue *cmdQImmediate = nullptr; 251 NEO::CommandStreamReceiver *csr = nullptr; 252 Device *device = nullptr; 253 NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; 254 uint32_t cmdListType = CommandListType::TYPE_REGULAR; 255 uint32_t commandListPerThreadScratchSize = 0u; 256 uint32_t commandListPerThreadPrivateScratchSize = 0u; 257 uint32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin; 258 uint32_t partitionCount = 1; 259 bool isFlushTaskSubmissionEnabled = false; 260 bool isSyncModeQueue = false; 261 bool commandListSLMEnabled = false; 262 bool requiresQueueUncachedMocs = false; 263 264 protected: 265 NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); 266 NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); 267 268 std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap; 269 std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations; 270 std::vector<NEO::GraphicsAllocation *> patternAllocations; 271 272 NEO::StreamProperties requiredStreamState{}; 273 NEO::StreamProperties finalStreamState{}; 274 CommandsToPatch commandsToPatch{}; 275 276 ze_command_list_flags_t flags = 0u; 277 UnifiedMemoryControls unifiedMemoryControls; 278 279 NEO::EngineGroupType engineGroupType; 280 bool indirectAllocationsAllowed = false; 281 bool internalUsage = false; 282 bool containsCooperativeKernelsFlag = false; 283 bool containsStatelessUncachedResource = false; 284 }; 285 286 using CommandListAllocatorFn = CommandList *(*)(uint32_t); 287 extern CommandListAllocatorFn commandListFactory[]; 288 extern CommandListAllocatorFn commandListFactoryImmediate[]; 289 290 template <uint32_t productFamily, typename CommandListType> 291 struct CommandListPopulateFactory { CommandListPopulateFactoryCommandListPopulateFactory292 CommandListPopulateFactory() { 293 commandListFactory[productFamily] = CommandList::Allocator<CommandListType>::allocate; 294 } 295 }; 296 297 template <uint32_t productFamily, typename CommandListType> 298 struct CommandListImmediatePopulateFactory { CommandListImmediatePopulateFactoryCommandListImmediatePopulateFactory299 CommandListImmediatePopulateFactory() { 300 commandListFactoryImmediate[productFamily] = CommandList::Allocator<CommandListType>::allocate; 301 } 302 }; 303 304 } // namespace L0 305