1 /* 2 * Copyright (C) 2020-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 10 #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" 11 #include "shared/source/unified_memory/unified_memory.h" 12 13 #include "level_zero/core/source/kernel/kernel.h" 14 15 #include <memory> 16 17 namespace L0 { 18 19 struct KernelImp : Kernel { 20 KernelImp(Module *module); 21 22 ~KernelImp() override; 23 destroyKernelImp24 ze_result_t destroy() override { 25 delete this; 26 return ZE_RESULT_SUCCESS; 27 } 28 29 ze_result_t setIndirectAccess(ze_kernel_indirect_access_flags_t flags) override; 30 ze_result_t getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) override; 31 ze_result_t getSourceAttributes(uint32_t *pSize, char **pString) override; 32 33 ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) override; 34 35 ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override; 36 37 void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override; 38 39 ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, 40 uint32_t groupSizeZ) override; 41 42 ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, 43 uint32_t *groupSizeX, uint32_t *groupSizeY, 44 uint32_t *groupSizeZ) override; 45 46 ze_result_t getKernelName(size_t *pSize, char *pName) override; 47 48 ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType, 49 bool isEngineInstanced) override; 50 getCrossThreadDataKernelImp51 const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); } getCrossThreadDataSizeKernelImp52 uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; } 53 getResidencyContainerKernelImp54 const std::vector<NEO::GraphicsAllocation *> &getResidencyContainer() const override { 55 return residencyContainer; 56 } 57 58 ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); 59 60 ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); 61 62 ze_result_t setArgUnknown(uint32_t argIndex, size_t argSize, const void *argVal); 63 64 ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override; 65 66 ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override; 67 68 ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); 69 70 ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); 71 72 virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0; 73 74 ze_result_t initialize(const ze_kernel_desc_t *desc); 75 getPerThreadDataKernelImp76 const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; } getPerThreadDataSizeForWholeThreadGroupKernelImp77 uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; } 78 getPerThreadDataSizeKernelImp79 uint32_t getPerThreadDataSize() const override { return perThreadDataSize; } getNumThreadsPerThreadGroupKernelImp80 uint32_t getNumThreadsPerThreadGroup() const override { return numThreadsPerThreadGroup; } getThreadExecutionMaskKernelImp81 uint32_t getThreadExecutionMask() const override { return threadExecutionMask; } 82 getPrintfBufferAllocationKernelImp83 NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; } 84 void printPrintfOutput() override; 85 86 bool usesSyncBuffer() override; 87 void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override; 88 getSurfaceStateHeapDataKernelImp89 const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); } getSurfaceStateHeapDataSizeKernelImp90 uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; } 91 getDynamicStateHeapDataKernelImp92 const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); } 93 getImmutableDataKernelImp94 const KernelImmutableData *getImmutableData() const override { return kernelImmData; } 95 getUnifiedMemoryControlsKernelImp96 UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; } 97 bool hasIndirectAllocationsAllowed() const override; 98 getKernelDescriptorKernelImp99 const NEO::KernelDescriptor &getKernelDescriptor() const override { 100 return kernelImmData->getDescriptor(); 101 } getGroupSizeKernelImp102 const uint32_t *getGroupSize() const override { 103 return groupSize; 104 } 105 uint32_t getSlmTotalSize() const override; 106 getSlmPolicyKernelImp107 NEO::SlmPolicy getSlmPolicy() const override { 108 if (cacheConfigFlags & ZE_CACHE_CONFIG_FLAG_LARGE_SLM) { 109 return NEO::SlmPolicy::SlmPolicyLargeSlm; 110 } else if (cacheConfigFlags & ZE_CACHE_CONFIG_FLAG_LARGE_DATA) { 111 return NEO::SlmPolicy::SlmPolicyLargeData; 112 } else { 113 return NEO::SlmPolicy::SlmPolicyNone; 114 } 115 } 116 117 NEO::GraphicsAllocation *getIsaAllocation() const override; 118 getRequiredWorkgroupOrderKernelImp119 uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; } requiresGenerationOfLocalIdsByRuntimeKernelImp120 bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; } getKernelRequiresUncachedMocsKernelImp121 bool getKernelRequiresUncachedMocs() { return (kernelRequiresUncachedMocsCount > 0); } getKernelRequiresQueueUncachedMocsKernelImp122 bool getKernelRequiresQueueUncachedMocs() { return (kernelRequiresQueueUncachedMocsCount > 0); } setKernelArgUncachedKernelImp123 void setKernelArgUncached(uint32_t index, bool val) { isArgUncached[index] = val; } 124 getGlobalOffsetsKernelImp125 uint32_t *getGlobalOffsets() override { 126 return this->globalOffsets; 127 } 128 ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) override; 129 void patchGlobalOffset() override; 130 131 ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override; usesRayTracingKernelImp132 bool usesRayTracing() { 133 return kernelImmData->getDescriptor().hasRTCalls(); 134 } 135 getProfileInfoKernelImp136 ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) override { 137 pProfileProperties->flags = 0; 138 pProfileProperties->numTokens = 0; 139 return ZE_RESULT_SUCCESS; 140 } 141 hasIndirectAccessKernelImp142 bool hasIndirectAccess() { 143 return kernelHasIndirectAccess; 144 } 145 146 NEO::GraphicsAllocation *allocatePrivateMemoryGraphicsAllocation() override; 147 void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override; 148 getPrivateMemoryGraphicsAllocationKernelImp149 NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override { 150 return privateMemoryGraphicsAllocation; 151 } 152 153 ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override; 154 uint32_t getSchedulingHintExp(); 155 getImplicitArgsKernelImp156 NEO::ImplicitArgs *getImplicitArgs() const override { return pImplicitArgs.get(); } 157 uint32_t getSizeForImplicitArgsPatching() const override; 158 void patchImplicitArgs(void *&pOut) const override; 159 160 protected: 161 KernelImp() = default; 162 163 void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z); 164 165 NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr; 166 167 void createPrintfBuffer(); 168 void setDebugSurface(); 169 virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0; 170 void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless); 171 172 const KernelImmutableData *kernelImmData = nullptr; 173 Module *module = nullptr; 174 175 typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); 176 std::vector<KernelImp::KernelArgHandler> kernelArgHandlers; 177 std::vector<NEO::GraphicsAllocation *> residencyContainer; 178 179 NEO::GraphicsAllocation *printfBuffer = nullptr; 180 181 uint32_t groupSize[3] = {0u, 0u, 0u}; 182 uint32_t numThreadsPerThreadGroup = 1u; 183 uint32_t threadExecutionMask = 0u; 184 185 std::unique_ptr<uint8_t[]> crossThreadData = nullptr; 186 uint32_t crossThreadDataSize = 0; 187 188 std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr; 189 uint32_t surfaceStateHeapDataSize = 0; 190 191 std::unique_ptr<uint8_t[]> dynamicStateHeapData = nullptr; 192 uint32_t dynamicStateHeapDataSize = 0; 193 194 uint8_t *perThreadDataForWholeThreadGroup = nullptr; 195 uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0; 196 uint32_t perThreadDataSizeForWholeThreadGroup = 0u; 197 uint32_t perThreadDataSize = 0u; 198 199 UnifiedMemoryControls unifiedMemoryControls; 200 std::vector<uint32_t> slmArgSizes; 201 uint32_t slmArgsTotalSize = 0U; 202 uint32_t requiredWorkgroupOrder = 0u; 203 204 bool kernelRequiresGenerationOfLocalIdsByRuntime = true; 205 uint32_t kernelRequiresUncachedMocsCount = false; 206 uint32_t kernelRequiresQueueUncachedMocsCount = false; 207 std::vector<bool> isArgUncached; 208 209 uint32_t globalOffsets[3] = {}; 210 211 ze_cache_config_flags_t cacheConfigFlags = 0u; 212 213 bool kernelHasIndirectAccess = true; 214 215 uint32_t schedulingHintExpFlag = 0u; 216 std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs; 217 }; 218 219 } // namespace L0 220