1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/CodeGenPublicEnums.h"
12 
13 #include <string>
14 #include <map>
15 #include <vector>
16 #include <array>
17 #include <optional>
18 #include <climits>
19 #include "common/LLVMWarningsPush.hpp"
20 #include <llvm/ADT/MapVector.h>
21 #include "common/LLVMWarningsPop.hpp"
22 
23 namespace llvm
24 {
25     class Module;
26     class Function;
27     class Value;
28     class GlobalVariable;
29     class StructType;
30 }
31 
32 const unsigned int INPUT_RESOURCE_SLOT_COUNT = 128;
33 const unsigned int NUM_SHADER_RESOURCE_VIEW_SIZE = (INPUT_RESOURCE_SLOT_COUNT + 1) / 64;
34 
35 const unsigned int g_c_maxNumberOfBufferPushed = 4;
36 static const int MAX_VECTOR_SIZE_TO_PRINT_IN_SHADER_DUMPS = 1000;
37 
38 namespace IGC
39 {
40     const unsigned int INVALID_CONSTANT_BUFFER_INVALID_ADDR = 0xFFFFFFFF;
41 
42     static const char* NAMED_METADATA_COARSE_PHASE = "coarse_phase";
43     static const char* NAMED_METADATA_PIXEL_PHASE  = "pixel_phase";
44 
45     enum FunctionTypeMD
46     {
47         KernelFunction,
48         CallableShader,
49         UserFunction,
50         NumberOfFunctionType,
51     };
52 
53     enum UniqueIndirectAS
54     {
55         // The convention is to use a '0' index for indirect accesses if
56         // you don't need to distinguish between accesses.
57         DefaultIndirectIdx = 0,
58     };
59 
60 
61     enum ResourceTypeEnum
62     {
63         OtherResourceType,
64         UAVResourceType,
65         SRVResourceType,
66         SamplerResourceType,
67         BindlessUAVResourceType,
68         BindlessSamplerResourceType,
69         DefaultResourceType,
70     };
71 
72     enum ResourceExtensionTypeEnum
73     {
74         NonExtensionType,
75 
76         // VME
77         MediaResourceType,
78         MediaResourceBlockType,
79         MediaSamplerType,
80 
81         // VA
82         MediaSamplerTypeConvolve,
83         MediaSamplerTypeErode,
84         MediaSamplerTypeDilate,
85         MediaSamplerTypeMinMaxFilter,
86         MediaSamplerTypeMinMax,
87         MediaSamplerTypeCentroid,
88         MediaSamplerTypeBoolCentroid,
89         MediaSamplerTypeBoolSum,
90         MediaSamplerTypeLbp,
91         MediaSamplerTypeFloodFill,
92         MediaSamplerTypeCorrelation,
93         DefaultResourceExtensionType,
94     };
95 
96     struct InlineResInfo
97     {
98         unsigned int textureID = 0;
99         unsigned int SurfaceType = 0x7;
100         unsigned int WidthOrBufferSize = 0;
101         unsigned int Height = 0;
102         unsigned int Depth = 0;
103         unsigned int SurfaceArray = 0;
104         unsigned int QWidth = 0;
105         unsigned int QHeight = 0;
106         unsigned int MipCount = 0;
107     };
108 
109     struct ArgDependencyInfoMD
110     {
111         int argDependency = 0;
112     };
113 
114     struct ArgAllocMD
115     {
116         int type = -1;
117         int extensionType = -1;
118         int indexType = -1;
119     };
120 
121     struct InlineSamplersMD
122     {
123         int m_Value = 0;
124         int addressMode = 0;
125         int index = 0;
126         int TCXAddressMode = 0;
127         int TCYAddressMode = 0;
128         int TCZAddressMode = 0;
129         int MagFilterType = 0;
130         int MinFilterType = 0;
131         int MipFilterType = 0;
132         int CompareFunc = 0;
133         int NormalizedCoords = 0;
134         float BorderColorR = 0.0f;
135         float BorderColorG = 0.0f;
136         float BorderColorB = 0.0f;
137         float BorderColorA = 0.0f;
138     };
139 
140     struct ResourceAllocMD
141     {
142         int uavsNumType = 0;
143         int srvsNumType = 0;
144         int samplersNumType = 0;
145         std::vector<ArgAllocMD> argAllocMDList;
146         std::vector<InlineSamplersMD> inlineSamplersMD;
147     };
148 
149     struct ComputeShaderSecondCompileInputInfoMD
150     {
151         int runtimeVal_ResWidthHeight = 0;
152         int runtimeVal_LoopCount = 0;
153         int runtimeVal_ConstantBufferSize = 0;
154         bool isSecondCompile = false;
155         int isRowMajor = 0;
156         int numChannelsUsed = 0;
157     };
158 
159     struct LocalOffsetMD
160     {
161         int m_Offset;
162         llvm::GlobalVariable* m_Var;
163     };
164 
165     struct WorkGroupWalkOrderMD
166     {
167         int dim0 = 0;
168         int dim1 = 0;
169         int dim2 = 0;
170     };
171 
172     struct FuncArgMD
173     {
174         int bufferLocationIndex = -1;
175         int bufferLocationCount = -1;
176         bool isEmulationArg = 0;
177     };
178 
179 
180     struct ConstantAddress
181     {
182         unsigned int bufId = 0;
183         unsigned int eltId = 0;
184         unsigned int size = 0;
185     };
186 
187     bool operator < (const ConstantAddress &a, const ConstantAddress &b);
188 
189     //to hold metadata of every function
190     struct FunctionMetaData
191     {
192         std::vector<LocalOffsetMD> localOffsets;
193         WorkGroupWalkOrderMD workGroupWalkOrder;
194         std::vector<FuncArgMD> funcArgs;
195         FunctionTypeMD functionType = KernelFunction;
196         std::map<ConstantAddress, uint32_t> inlineDynConstants;
197         ResourceAllocMD resAllocMD;
198         std::vector<unsigned> maxByteOffsets;
199         bool IsInitializer = false;
200         bool IsFinalizer = false;
201         unsigned CompiledSubGroupsNumber = 0;
202         bool hasInlineVmeSamplers = false;
203         int localSize = 0;
204         bool localIDPresent = false;
205         bool groupIDPresent = false;
206         int privateMemoryPerWI = 0;
207         bool globalIDPresent = false;
208         bool isUniqueEntry = false;
209 
210         // Analysis result of if there are non-kernel-argument ld/st in the kernel
211         bool hasNonKernelArgLoad = false;
212         bool hasNonKernelArgStore = false;
213         bool hasNonKernelArgAtomic = false;
214 
215         std::vector<std::string> UserAnnotations;
216 
217         std::vector<int32_t> m_OpenCLArgAddressSpaces;
218         std::vector<std::string> m_OpenCLArgAccessQualifiers;
219         std::vector<std::string> m_OpenCLArgTypes;
220         std::vector<std::string> m_OpenCLArgBaseTypes;
221         std::vector<std::string> m_OpenCLArgTypeQualifiers;
222         std::vector<std::string> m_OpenCLArgNames;
223     };
224 
225     // isCloned member is added to mark whether a function is clone
226     // of another one. If two kernels from a compilation unit invoke
227     // the same callee, IGC ends up creating clone of the callee
228     // to separate call graphs. But it doesnt create metadata nodes
229     // so debug info for cloned function will be empty. Marking
230     // function as clone and later in debug info iterating over
231     // original function instead of clone helps emit out correct debug
232     // info.
233 
234     //new structure to replace old Metatdata framework's CompilerOptions
235     struct CompOptions
236     {
237         bool DenormsAreZero                             = false;
238         bool CorrectlyRoundedDivSqrt                    = false;
239         bool OptDisable                                 = false;
240         bool MadEnable                                  = false;
241         bool NoSignedZeros                              = false;
242         bool NoNaNs                                     = false;
243 
244         // default rounding modes
245         unsigned FloatRoundingMode                      = IGC::ROUND_TO_NEAREST_EVEN;
246         unsigned FloatCvtIntRoundingMode                = IGC::ROUND_TO_ZERO;
247 
248         unsigned VISAPreSchedRPThreshold           = 0;
249         unsigned SetLoopUnrollThreshold            = 0;
250         bool UnsafeMathOptimizations                    = false;
251         bool FiniteMathOnly                             = false;
252         bool FastRelaxedMath                            = false;
253         bool DashGSpecified                             = false;
254         bool FastCompilation                            = false;
255         bool UseScratchSpacePrivateMemory               = true;
256         bool RelaxedBuiltins                            = false;
257         bool SubgroupIndependentForwardProgressRequired = true;
258         bool GreaterThan2GBBufferRequired               = true;
259         bool GreaterThan4GBBufferRequired               = true;
260         bool DisableA64WA                               = false;
261         bool ForceEnableA64WA                           = false;
262         bool PushConstantsEnable                        = true;
263         bool HasPositivePointerOffset                   = false;
264         bool HasBufferOffsetArg                         = false;
265         bool BufferOffsetArgOptional                    = true;
266         bool HasSubDWAlignedPtrArg                      = false;
267         bool replaceGlobalOffsetsByZero                 = false;
268         unsigned forcePixelShaderSIMDMode               = 0;
269         bool pixelShaderDoNotAbortOnSpill               = false;
270         bool UniformWGS                                 = false;
271         bool disableVertexComponentPacking              = false;
272         bool disablePartialVertexComponentPacking       = false;
273         bool PreferBindlessImages                       = false;
274         bool UseBindlessMode                            = false;
275         bool UseLegacyBindlessMode                      = true;
276         bool disableMathRefactoring                     = false;
277         //if PTSS is enabled and if PrivateData is too large (>256k in XeHP_SDV+),
278         //we might use stateless memory to hold privatedata instead of using PTSS.
279         //this flag is for this scenario.
280         bool UseStatelessforPrivateMemory               = false;
281         bool EnableTakeGlobalAddress                    = false;
282         bool IsLibraryCompilation                       = false;
283         bool FastVISACompile                            = false;
284         bool MatchSinCosPi                              = false;
285         bool CaptureCompilerStats                       = false;
286         // Suggest to enableZEBinary. IGC could still fall-back to legacy
287         // patch-token based binary if the input contains features those
288         // are not supported by ZEBinary
289         bool EnableZEBinary                             = false;
290     };
291 
292     enum class ThreadIDLayout
293     {
294         // layout IDs along X,Y,Z
295         X,
296         // Tile along just the y-dimension
297         TileY,
298         // tile IDs in 2x2 groups as expected by derivative calculations
299         QuadTile
300     };
301 
302     struct ComputeShaderInfo
303     {
304         unsigned int maxWorkGroupSize = 0;
305         unsigned int waveSize = 0; // force a wave size
306         std::vector<ComputeShaderSecondCompileInputInfoMD> ComputeShaderSecondCompile;
307         unsigned char forcedSIMDSize = 0;  // 0 means not forced
308         unsigned int forceTotalGRFNum = 0; // 0 means not forced
309         unsigned int VISAPreSchedRPThreshold = 0; // 0 means use the default
310         unsigned int SetLoopUnrollThreshold = 0; // 0 means use the default
311         bool forcedVISAPreRAScheduler = false;
312         // disables dispatch along y and tiled order optimizations
313         bool disableLocalIdOrderOptimizations = false;
314         // force disables dispatch along y optimization
315         bool disableDispatchAlongY = false;
316         // If nullopt, then there is no requirement
317         std::optional<ThreadIDLayout> neededThreadIdLayout;
318         // force enable tile y optimization
319         bool forceTileYWalk = false;
320     };
321 
322 
323     struct PixelShaderInfo
324     {
325         unsigned char BlendStateDisabledMask = 0;
326         bool SkipSrc0Alpha                   = false;
327         bool DualSourceBlendingDisabled      = false;
328         bool ForceEnableSimd32               = false; // forces compilation of simd32; bypass heuristics
329         bool outputDepth                     = false;
330         bool outputStencil                   = false;
331         bool outputMask                      = false;
332         bool blendToFillEnabled              = false;
333         bool forceEarlyZ                     = false;   // force earlyz test
334         bool hasVersionedLoop                = false;   // if versioned by customloopversioning
335         // Number of samples for this pixel shader if known.
336         // Valid values 0, 1, 2, 4, 8 and 16.
337         // 0 means unknown or not set.
338         unsigned char NumSamples             = 0;
339         std::vector<int> blendOptimizationMode;
340         std::vector<int> colorOutputMask;
341     };
342 
343 
344     struct SInputDesc
345     {
346         unsigned int index = 0;
347         int argIndex = 0;
348         int interpolationMode = 0;
349     };
350 
351     // SimplePushInfo holds information about the promoted constant buffer
352     // region (see member descriptions in SSimplePushInfo). It also holds
353     // mappings between the byte offsets in the promoted region and
354     // corresponding argument index.
355     struct SimplePushInfo
356     {
357         unsigned int cbIdx = 0;
358         int pushableAddressGrfOffset = -1;
359         int pushableOffsetGrfOffset = -1;
360         unsigned int offset = 0;
361         unsigned int size = 0;
362         bool isStateless = false;
363         bool isBindless = false;
364         // std::map<offset, argumentIndex>
365         std::map<unsigned int, int> simplePushLoads;
366     };
367 
368     struct StatelessPushInfo
369     {
370         unsigned int addressOffset = 0;
371         bool isStatic = false;
372     };
373 
374     struct DynamicBufferInfo
375     {
376         // If numOffsets > 0, dynamic buffer offsets occupy a contiguous region
377         // of runtime values with indices in [firstIndex, firstIndex + numOffsets).
378         unsigned int firstIndex = 0;
379         unsigned int numOffsets = 0;
380     };
381 
382     // simplePushInfoArr needs to be initialized to a vector of size g_c_maxNumberOfBufferPushed, which we are doing in module MD initialization done in code gen context
383     // All the pushinfo below is mapping to an argument number (int) so that we can retrieve relevant Argument as a value pointer from Function
384     struct PushInfo
385     {
386         std::vector<StatelessPushInfo> pushableAddresses;
387 
388         // Indices of RuntimeValues that can be used to compute surface state
389         // offsets for the bindless push.
390         std::vector<unsigned int> bindlessPushInfo;
391 
392         // Dynamic buffer offsets info.
393         // Used only on with clients that support dynamic buffers.
394         DynamicBufferInfo dynamicBufferInfo;
395         unsigned int MaxNumberOfPushedBuffers = 0; ///> specifies the maximum number of buffers available for the simple push mechanism for current shader.
396 
397         unsigned int inlineConstantBufferSlot = INVALID_CONSTANT_BUFFER_INVALID_ADDR; // slot of the inlined constant buffer
398         unsigned int inlineConstantBufferOffset = INVALID_CONSTANT_BUFFER_INVALID_ADDR;    // offset of the inlined constant buffer
399         unsigned int inlineConstantBufferGRFOffset = INVALID_CONSTANT_BUFFER_INVALID_ADDR;
400 
401         std::map<ConstantAddress, int> constants;
402         std::map<unsigned int, SInputDesc> inputs;
403         std::map<unsigned int, int> constantReg;
404         std::array<SimplePushInfo, g_c_maxNumberOfBufferPushed> simplePushInfoArr;
405         unsigned int simplePushBufferUsed = 0;
406 
407         std::vector<ArgDependencyInfoMD> pushAnalysisWIInfos;
408     };
409 
410     struct InlineProgramScopeBuffer
411     {
412         int alignment;
413         unsigned allocSize;
414         std::vector<unsigned char> Buffer;
415     };
416 
417     struct ImmConstantInfo
418     {
419         std::vector<char> data;
420     };
421 
422     struct PointerProgramBinaryInfo
423     {
424         int PointerBufferIndex;
425         int PointerOffset;
426         int PointeeAddressSpace;
427         int PointeeBufferIndex;
428     };
429 
430     struct PointerAddressRelocInfo
431     {
432         unsigned BufferOffset;
433         unsigned PointerSize;
434         std::string Symbol;
435     };
436 
437     struct ShaderData
438     {
439         unsigned int numReplicas = 0;
440     };
441 
442     struct URBLayoutInfo
443     {
444         bool has64BVertexHeaderInput = false;
445         bool has64BVertexHeaderOutput = false;
446         bool hasVertexHeader = true;
447     };
448 
449     //metadata for the entire module
450     struct ModuleMetaData
451     {
452         bool isPrecise = false;
453         CompOptions compOpt;
454         llvm::MapVector<llvm::Function*, IGC::FunctionMetaData> FuncMD;
455         PushInfo pushInfo;
456         PixelShaderInfo psInfo;
457         ComputeShaderInfo csInfo;
458         uint32_t CurUniqueIndirectIdx = DefaultIndirectIdx;
459         std::map<uint32_t, std::array<uint32_t, 4>> inlineDynTextures;
460         std::vector<InlineResInfo> inlineResInfoData;
461         ImmConstantInfo immConstant;
462         std::vector<InlineProgramScopeBuffer> inlineConstantBuffers;
463         std::vector<InlineProgramScopeBuffer> inlineGlobalBuffers;
464         std::vector<PointerProgramBinaryInfo> GlobalPointerProgramBinaryInfos;
465         std::vector<PointerProgramBinaryInfo> ConstantPointerProgramBinaryInfos;
466         std::vector<PointerAddressRelocInfo> GlobalBufferAddressRelocInfo;
467         std::vector<PointerAddressRelocInfo> ConstantBufferAddressRelocInfo;
468         unsigned int MinNOSPushConstantSize = 0;
469         llvm::MapVector<llvm::GlobalVariable*, int> inlineProgramScopeOffsets;
470         ShaderData shaderData;
471         URBLayoutInfo URBInfo;
472         bool UseBindlessImage = false;
473         bool enableRangeReduce = false;
474 
475         //when true, compiler enables MatchMad optimization for VS
476         bool allowMatchMadOptimizationforVS = false;
477 
478         bool disableMemOptforNegativeOffsetLoads = false;
479 
480         // When true compiler can assume that resources bound to two different
481         // bindings do not alias.
482         bool statefullResourcesNotAliased = false;
483         bool disableMixMode = false;
484 
485         unsigned int privateMemoryPerWI = 0;
486         std::array<uint64_t, NUM_SHADER_RESOURCE_VIEW_SIZE> m_ShaderResourceViewMcsMask{};
487         unsigned int computedDepthMode = 0; //Defaults to 0 meaning depth mode is off
488         // set by LowerGPCallArg pass
489         bool hasNoLocalToGenericCast = false;
490         bool hasNoPrivateToGenericCast = false;
491     };
492     void serialize(const IGC::ModuleMetaData &moduleMD, llvm::Module* module);
493     void deserialize(IGC::ModuleMetaData &deserializedMD, const llvm::Module* module);
494 
495 }
496