1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 #include "shared/source/helpers/hw_info.h"
10 #include "shared/source/kernel/kernel_descriptor.h"
11 #include "shared/source/program/heap_info.h"
12 #include "shared/source/utilities/arrayref.h"
13 #include "shared/source/utilities/const_stringref.h"
14 
15 #include <algorithm>
16 #include <array>
17 #include <cmath>
18 #include <cstdint>
19 #include <map>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23 
24 namespace gtpin {
25 typedef struct igc_info_s igc_info_t;
26 }
27 
28 namespace NEO {
29 class BuiltinDispatchInfoBuilder;
30 class Device;
31 class Kernel;
32 struct KernelInfo;
33 class DispatchInfo;
34 struct KernelArgumentType;
35 class GraphicsAllocation;
36 class MemoryManager;
37 
38 static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
39 
40 struct WorkSizeInfo {
41     uint32_t maxWorkGroupSize;
42     uint32_t minWorkGroupSize;
43     bool hasBarriers;
44     uint32_t simdSize;
45     uint32_t slmTotalSize;
46     GFXCORE_FAMILY coreFamily;
47     uint32_t numThreadsPerSubSlice;
48     uint32_t localMemSize;
49     bool imgUsed = false;
50     bool yTiledSurfaces = false;
51     bool useRatio = false;
52     bool useStrictRatio = false;
53     float targetRatio = 0;
54 
55     WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
56 
57     void setIfUseImg(const KernelInfo &kernelInfo);
58     void setMinWorkGroupSize(const HardwareInfo *hwInfo);
59     void checkRatio(const size_t workItems[3]);
60 };
61 
62 struct DeviceInfoKernelPayloadConstants {
63     void *slmWindow = nullptr;
64     uint32_t slmWindowSize = 0U;
65     uint32_t computeUnitsUsedForScratch = 0U;
66     uint32_t maxWorkGroupSize = 0U;
67 };
68 
69 struct KernelInfo {
70   public:
71     KernelInfo() = default;
72     KernelInfo(const KernelInfo &) = delete;
73     KernelInfo &operator=(const KernelInfo &) = delete;
74     ~KernelInfo();
75 
getGraphicsAllocationKernelInfo76     GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }
77 
getArgDescriptorAtKernelInfo78     const ArgDescriptor &getArgDescriptorAt(uint32_t index) const {
79         DEBUG_BREAK_IF(index >= kernelDescriptor.payloadMappings.explicitArgs.size());
80         return kernelDescriptor.payloadMappings.explicitArgs[index];
81     }
getExplicitArgsKernelInfo82     const StackVec<ArgDescriptor, 16> &getExplicitArgs() const {
83         return kernelDescriptor.payloadMappings.explicitArgs;
84     }
getExtendedMetadataKernelInfo85     const ArgTypeMetadataExtended &getExtendedMetadata(uint32_t index) const {
86         DEBUG_BREAK_IF(index >= kernelDescriptor.explicitArgsExtendedMetadata.size());
87         return kernelDescriptor.explicitArgsExtendedMetadata[index];
88     }
89     size_t getSamplerStateArrayCount() const;
90     size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
91     size_t getBorderColorStateSize() const;
92     size_t getBorderColorOffset() const;
getMaxSimdSizeKernelInfo93     unsigned int getMaxSimdSize() const {
94         return kernelDescriptor.kernelAttributes.simdSize;
95     }
hasDeviceEnqueueKernelInfo96     bool hasDeviceEnqueue() const {
97         return kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue;
98     }
requiresSubgroupIndependentForwardProgressKernelInfo99     bool requiresSubgroupIndependentForwardProgress() const {
100         return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
101     }
getMaxRequiredWorkGroupSizeKernelInfo102     size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
103         auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
104         auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
105         auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
106         size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
107         if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
108             maxRequiredWorkGroupSize = maxWorkGroupSize;
109         }
110         return maxRequiredWorkGroupSize;
111     }
112 
113     uint32_t getConstantBufferSize() const;
114     int32_t getArgNumByName(const char *name) const;
115 
116     bool createKernelAllocation(const Device &device, bool internalIsa);
117     void apply(const DeviceInfoKernelPayloadConstants &constants);
118 
119     HeapInfo heapInfo = {};
120     std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
121     char *crossThreadData = nullptr;
122     const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
123     uint32_t systemKernelOffset = 0;
124     uint64_t kernelId = 0;
125     bool hasIndirectStatelessAccess = false;
126     bool isKernelHeapSubstituted = false;
127     GraphicsAllocation *kernelAllocation = nullptr;
128     DebugData debugData;
129     bool computeMode = false;
130     const gtpin::igc_info_t *igcInfoForGtpin = nullptr;
131 
132     uint64_t shaderHashCode;
133     KernelDescriptor kernelDescriptor;
134 };
135 
136 std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);
137 
138 } // namespace NEO
139