1 /*
2  * Copyright (C) 2019-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 #include "shared/source/built_ins/built_ins.h"
10 #include "shared/source/helpers/per_thread_data.h"
11 
12 #include "opencl/source/kernel/kernel.h"
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 
18 namespace NEO {
19 class CommandQueue;
20 class LinearStream;
21 class IndirectHeap;
22 struct CrossThreadInfo;
23 struct MultiDispatchInfo;
24 
25 template <typename GfxFamily>
26 struct HardwareCommandsHelper : public PerThreadDataHelper {
27     using WALKER_TYPE = typename GfxFamily::WALKER_TYPE;
28     using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
29     using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
30     using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
31     using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
32     using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
33 
34     static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor(
35         const IndirectHeap &indirectHeap,
36         uint64_t offsetInterfaceDescriptor,
37         INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor);
38 
39     static void setGrfInfo(
40         INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor,
41         const Kernel &kernel,
42         const size_t &sizeCrossThreadData,
43         const size_t &sizePerThreadData);
44 
45     inline static uint32_t additionalSizeRequiredDsh();
46 
47     static size_t sendInterfaceDescriptorData(
48         const IndirectHeap &indirectHeap,
49         uint64_t offsetInterfaceDescriptor,
50         uint64_t kernelStartOffset,
51         size_t sizeCrossThreadData,
52         size_t sizePerThreadData,
53         size_t bindingTablePointer,
54         size_t offsetSamplerState,
55         uint32_t numSamplers,
56         uint32_t numThreadsPerThreadGroup,
57         const Kernel &kernel,
58         uint32_t bindingTablePrefetchSize,
59         PreemptionMode preemptionMode,
60         INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
61         const Device &device);
62 
63     static void sendMediaStateFlush(
64         LinearStream &commandStream,
65         size_t offsetInterfaceDescriptorData);
66 
67     static void sendMediaInterfaceDescriptorLoad(
68         LinearStream &commandStream,
69         size_t offsetInterfaceDescriptorData,
70         size_t sizeInterfaceDescriptorData);
71 
72     static size_t sendCrossThreadData(
73         IndirectHeap &indirectHeap,
74         Kernel &kernel,
75         bool inlineDataProgrammingRequired,
76         WALKER_TYPE *walkerCmd,
77         uint32_t &sizeCrossThreadData);
78 
79     static size_t sendIndirectState(
80         LinearStream &commandStream,
81         IndirectHeap &dsh,
82         IndirectHeap &ioh,
83         IndirectHeap &ssh,
84         Kernel &kernel,
85         uint64_t kernelStartOffset,
86         uint32_t simd,
87         const size_t localWorkSize[3],
88         const uint64_t offsetInterfaceDescriptorTable,
89         uint32_t &interfaceDescriptorIndex,
90         PreemptionMode preemptionMode,
91         WALKER_TYPE *walkerCmd,
92         INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
93         bool localIdsGenerationByRuntime,
94         const Device &device);
95 
96     static void programPerThreadData(
97         size_t &sizePerThreadData,
98         const bool &localIdsGenerationByRuntime,
99         LinearStream &ioh,
100         uint32_t &simd,
101         uint32_t &numChannels,
102         const size_t localWorkSize[3],
103         Kernel &kernel,
104         size_t &sizePerThreadDataTotal,
105         size_t &localWorkItems,
106         uint32_t rootDeviceIndex);
107 
108     static void updatePerThreadDataTotal(
109         size_t &sizePerThreadData,
110         uint32_t &simd,
111         uint32_t &numChannels,
112         size_t &sizePerThreadDataTotal,
113         size_t &localWorkItems);
114 
115     inline static bool resetBindingTablePrefetch(Kernel &kernel);
116 
117     static size_t getSizeRequiredCS();
118     static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
119 
120     static size_t getSizeRequiredDSH(
121         const Kernel &kernel);
122     static size_t getSizeRequiredIOH(
123         const Kernel &kernel,
124         size_t localWorkSize = 256);
125     static size_t getSizeRequiredSSH(
126         const Kernel &kernel);
127 
128     static size_t getTotalSizeRequiredDSH(
129         const MultiDispatchInfo &multiDispatchInfo);
130     static size_t getTotalSizeRequiredIOH(
131         const MultiDispatchInfo &multiDispatchInfo);
132     static size_t getTotalSizeRequiredSSH(
133         const MultiDispatchInfo &multiDispatchInfo);
134 
135     static size_t getSshSizeForExecutionModel(const Kernel &kernel);
136     static void setInterfaceDescriptorOffset(
137         WALKER_TYPE *walkerCmd,
138         uint32_t &interfaceDescriptorIndex);
139 
140     static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
141 
142     static bool inlineDataProgrammingRequired(const Kernel &kernel);
143     static bool kernelUsesLocalIds(const Kernel &kernel);
144 };
145 } // namespace NEO
146