1 /* 2 * Copyright (C) 2019-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 #include "shared/source/built_ins/built_ins.h" 10 #include "shared/source/helpers/per_thread_data.h" 11 12 #include "opencl/source/kernel/kernel.h" 13 14 #include <algorithm> 15 #include <cstddef> 16 #include <cstdint> 17 18 namespace NEO { 19 class CommandQueue; 20 class LinearStream; 21 class IndirectHeap; 22 struct CrossThreadInfo; 23 struct MultiDispatchInfo; 24 25 template <typename GfxFamily> 26 struct HardwareCommandsHelper : public PerThreadDataHelper { 27 using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; 28 using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; 29 using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; 30 using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; 31 using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; 32 using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; 33 34 static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor( 35 const IndirectHeap &indirectHeap, 36 uint64_t offsetInterfaceDescriptor, 37 INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor); 38 39 static void setGrfInfo( 40 INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, 41 const Kernel &kernel, 42 const size_t &sizeCrossThreadData, 43 const size_t &sizePerThreadData); 44 45 inline static uint32_t additionalSizeRequiredDsh(); 46 47 static size_t sendInterfaceDescriptorData( 48 const IndirectHeap &indirectHeap, 49 uint64_t offsetInterfaceDescriptor, 50 uint64_t kernelStartOffset, 51 size_t sizeCrossThreadData, 52 size_t sizePerThreadData, 53 size_t bindingTablePointer, 54 size_t offsetSamplerState, 55 uint32_t numSamplers, 56 uint32_t numThreadsPerThreadGroup, 57 const Kernel &kernel, 58 uint32_t bindingTablePrefetchSize, 59 PreemptionMode preemptionMode, 60 INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, 61 const Device &device); 62 63 static void sendMediaStateFlush( 64 LinearStream &commandStream, 65 size_t offsetInterfaceDescriptorData); 66 67 static void sendMediaInterfaceDescriptorLoad( 68 LinearStream &commandStream, 69 size_t offsetInterfaceDescriptorData, 70 size_t sizeInterfaceDescriptorData); 71 72 static size_t sendCrossThreadData( 73 IndirectHeap &indirectHeap, 74 Kernel &kernel, 75 bool inlineDataProgrammingRequired, 76 WALKER_TYPE *walkerCmd, 77 uint32_t &sizeCrossThreadData); 78 79 static size_t sendIndirectState( 80 LinearStream &commandStream, 81 IndirectHeap &dsh, 82 IndirectHeap &ioh, 83 IndirectHeap &ssh, 84 Kernel &kernel, 85 uint64_t kernelStartOffset, 86 uint32_t simd, 87 const size_t localWorkSize[3], 88 const uint64_t offsetInterfaceDescriptorTable, 89 uint32_t &interfaceDescriptorIndex, 90 PreemptionMode preemptionMode, 91 WALKER_TYPE *walkerCmd, 92 INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, 93 bool localIdsGenerationByRuntime, 94 const Device &device); 95 96 static void programPerThreadData( 97 size_t &sizePerThreadData, 98 const bool &localIdsGenerationByRuntime, 99 LinearStream &ioh, 100 uint32_t &simd, 101 uint32_t &numChannels, 102 const size_t localWorkSize[3], 103 Kernel &kernel, 104 size_t &sizePerThreadDataTotal, 105 size_t &localWorkItems, 106 uint32_t rootDeviceIndex); 107 108 static void updatePerThreadDataTotal( 109 size_t &sizePerThreadData, 110 uint32_t &simd, 111 uint32_t &numChannels, 112 size_t &sizePerThreadDataTotal, 113 size_t &localWorkItems); 114 115 inline static bool resetBindingTablePrefetch(Kernel &kernel); 116 117 static size_t getSizeRequiredCS(); 118 static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); 119 120 static size_t getSizeRequiredDSH( 121 const Kernel &kernel); 122 static size_t getSizeRequiredIOH( 123 const Kernel &kernel, 124 size_t localWorkSize = 256); 125 static size_t getSizeRequiredSSH( 126 const Kernel &kernel); 127 128 static size_t getTotalSizeRequiredDSH( 129 const MultiDispatchInfo &multiDispatchInfo); 130 static size_t getTotalSizeRequiredIOH( 131 const MultiDispatchInfo &multiDispatchInfo); 132 static size_t getTotalSizeRequiredSSH( 133 const MultiDispatchInfo &multiDispatchInfo); 134 135 static size_t getSshSizeForExecutionModel(const Kernel &kernel); 136 static void setInterfaceDescriptorOffset( 137 WALKER_TYPE *walkerCmd, 138 uint32_t &interfaceDescriptorIndex); 139 140 static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); 141 142 static bool inlineDataProgrammingRequired(const Kernel &kernel); 143 static bool kernelUsesLocalIds(const Kernel &kernel); 144 }; 145 } // namespace NEO 146