1 /* 2 * Copyright (C) 2018-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 #include "shared/source/helpers/hw_info.h" 10 #include "shared/source/kernel/kernel_descriptor.h" 11 #include "shared/source/program/heap_info.h" 12 #include "shared/source/utilities/arrayref.h" 13 #include "shared/source/utilities/const_stringref.h" 14 15 #include <algorithm> 16 #include <array> 17 #include <cmath> 18 #include <cstdint> 19 #include <map> 20 #include <string> 21 #include <unordered_map> 22 #include <vector> 23 24 namespace gtpin { 25 typedef struct igc_info_s igc_info_t; 26 } 27 28 namespace NEO { 29 class BuiltinDispatchInfoBuilder; 30 class Device; 31 class Kernel; 32 struct KernelInfo; 33 class DispatchInfo; 34 struct KernelArgumentType; 35 class GraphicsAllocation; 36 class MemoryManager; 37 38 static const float YTilingRatioValue = 1.3862943611198906188344642429164f; 39 40 struct WorkSizeInfo { 41 uint32_t maxWorkGroupSize; 42 uint32_t minWorkGroupSize; 43 bool hasBarriers; 44 uint32_t simdSize; 45 uint32_t slmTotalSize; 46 GFXCORE_FAMILY coreFamily; 47 uint32_t numThreadsPerSubSlice; 48 uint32_t localMemSize; 49 bool imgUsed = false; 50 bool yTiledSurfaces = false; 51 bool useRatio = false; 52 bool useStrictRatio = false; 53 float targetRatio = 0; 54 55 WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface); 56 57 void setIfUseImg(const KernelInfo &kernelInfo); 58 void setMinWorkGroupSize(const HardwareInfo *hwInfo); 59 void checkRatio(const size_t workItems[3]); 60 }; 61 62 struct DeviceInfoKernelPayloadConstants { 63 void *slmWindow = nullptr; 64 uint32_t slmWindowSize = 0U; 65 uint32_t computeUnitsUsedForScratch = 0U; 66 uint32_t maxWorkGroupSize = 0U; 67 }; 68 69 struct KernelInfo { 70 public: 71 KernelInfo() = default; 72 KernelInfo(const KernelInfo &) = delete; 73 KernelInfo &operator=(const KernelInfo &) = delete; 74 ~KernelInfo(); 75 getGraphicsAllocationKernelInfo76 GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; } 77 getArgDescriptorAtKernelInfo78 const ArgDescriptor &getArgDescriptorAt(uint32_t index) const { 79 DEBUG_BREAK_IF(index >= kernelDescriptor.payloadMappings.explicitArgs.size()); 80 return kernelDescriptor.payloadMappings.explicitArgs[index]; 81 } getExplicitArgsKernelInfo82 const StackVec<ArgDescriptor, 16> &getExplicitArgs() const { 83 return kernelDescriptor.payloadMappings.explicitArgs; 84 } getExtendedMetadataKernelInfo85 const ArgTypeMetadataExtended &getExtendedMetadata(uint32_t index) const { 86 DEBUG_BREAK_IF(index >= kernelDescriptor.explicitArgsExtendedMetadata.size()); 87 return kernelDescriptor.explicitArgsExtendedMetadata[index]; 88 } 89 size_t getSamplerStateArrayCount() const; 90 size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const; 91 size_t getBorderColorStateSize() const; 92 size_t getBorderColorOffset() const; getMaxSimdSizeKernelInfo93 unsigned int getMaxSimdSize() const { 94 return kernelDescriptor.kernelAttributes.simdSize; 95 } hasDeviceEnqueueKernelInfo96 bool hasDeviceEnqueue() const { 97 return kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue; 98 } requiresSubgroupIndependentForwardProgressKernelInfo99 bool requiresSubgroupIndependentForwardProgress() const { 100 return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress; 101 } getMaxRequiredWorkGroupSizeKernelInfo102 size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const { 103 auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; 104 auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; 105 auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; 106 size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; 107 if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) { 108 maxRequiredWorkGroupSize = maxWorkGroupSize; 109 } 110 return maxRequiredWorkGroupSize; 111 } 112 113 uint32_t getConstantBufferSize() const; 114 int32_t getArgNumByName(const char *name) const; 115 116 bool createKernelAllocation(const Device &device, bool internalIsa); 117 void apply(const DeviceInfoKernelPayloadConstants &constants); 118 119 HeapInfo heapInfo = {}; 120 std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset; 121 char *crossThreadData = nullptr; 122 const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr; 123 uint32_t systemKernelOffset = 0; 124 uint64_t kernelId = 0; 125 bool hasIndirectStatelessAccess = false; 126 bool isKernelHeapSubstituted = false; 127 GraphicsAllocation *kernelAllocation = nullptr; 128 DebugData debugData; 129 bool computeMode = false; 130 const gtpin::igc_info_t *igcInfoForGtpin = nullptr; 131 132 uint64_t shaderHashCode; 133 KernelDescriptor kernelDescriptor; 134 }; 135 136 std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos); 137 138 } // namespace NEO 139