1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "Compiler/CodeGenPublicEnums.h" 12 13 #include <string> 14 #include <map> 15 #include <vector> 16 #include <array> 17 #include <optional> 18 #include <climits> 19 #include "common/LLVMWarningsPush.hpp" 20 #include <llvm/ADT/MapVector.h> 21 #include "common/LLVMWarningsPop.hpp" 22 23 namespace llvm 24 { 25 class Module; 26 class Function; 27 class Value; 28 class GlobalVariable; 29 class StructType; 30 } 31 32 const unsigned int INPUT_RESOURCE_SLOT_COUNT = 128; 33 const unsigned int NUM_SHADER_RESOURCE_VIEW_SIZE = (INPUT_RESOURCE_SLOT_COUNT + 1) / 64; 34 35 const unsigned int g_c_maxNumberOfBufferPushed = 4; 36 static const int MAX_VECTOR_SIZE_TO_PRINT_IN_SHADER_DUMPS = 1000; 37 38 namespace IGC 39 { 40 const unsigned int INVALID_CONSTANT_BUFFER_INVALID_ADDR = 0xFFFFFFFF; 41 42 static const char* NAMED_METADATA_COARSE_PHASE = "coarse_phase"; 43 static const char* NAMED_METADATA_PIXEL_PHASE = "pixel_phase"; 44 45 enum FunctionTypeMD 46 { 47 KernelFunction, 48 CallableShader, 49 UserFunction, 50 NumberOfFunctionType, 51 }; 52 53 enum UniqueIndirectAS 54 { 55 // The convention is to use a '0' index for indirect accesses if 56 // you don't need to distinguish between accesses. 57 DefaultIndirectIdx = 0, 58 }; 59 60 61 enum ResourceTypeEnum 62 { 63 OtherResourceType, 64 UAVResourceType, 65 SRVResourceType, 66 SamplerResourceType, 67 BindlessUAVResourceType, 68 BindlessSamplerResourceType, 69 DefaultResourceType, 70 }; 71 72 enum ResourceExtensionTypeEnum 73 { 74 NonExtensionType, 75 76 // VME 77 MediaResourceType, 78 MediaResourceBlockType, 79 MediaSamplerType, 80 81 // VA 82 MediaSamplerTypeConvolve, 83 MediaSamplerTypeErode, 84 MediaSamplerTypeDilate, 85 MediaSamplerTypeMinMaxFilter, 86 MediaSamplerTypeMinMax, 87 MediaSamplerTypeCentroid, 88 MediaSamplerTypeBoolCentroid, 89 MediaSamplerTypeBoolSum, 90 MediaSamplerTypeLbp, 91 MediaSamplerTypeFloodFill, 92 MediaSamplerTypeCorrelation, 93 DefaultResourceExtensionType, 94 }; 95 96 struct InlineResInfo 97 { 98 unsigned int textureID = 0; 99 unsigned int SurfaceType = 0x7; 100 unsigned int WidthOrBufferSize = 0; 101 unsigned int Height = 0; 102 unsigned int Depth = 0; 103 unsigned int SurfaceArray = 0; 104 unsigned int QWidth = 0; 105 unsigned int QHeight = 0; 106 unsigned int MipCount = 0; 107 }; 108 109 struct ArgDependencyInfoMD 110 { 111 int argDependency = 0; 112 }; 113 114 struct ArgAllocMD 115 { 116 int type = -1; 117 int extensionType = -1; 118 int indexType = -1; 119 }; 120 121 struct InlineSamplersMD 122 { 123 int m_Value = 0; 124 int addressMode = 0; 125 int index = 0; 126 int TCXAddressMode = 0; 127 int TCYAddressMode = 0; 128 int TCZAddressMode = 0; 129 int MagFilterType = 0; 130 int MinFilterType = 0; 131 int MipFilterType = 0; 132 int CompareFunc = 0; 133 int NormalizedCoords = 0; 134 float BorderColorR = 0.0f; 135 float BorderColorG = 0.0f; 136 float BorderColorB = 0.0f; 137 float BorderColorA = 0.0f; 138 }; 139 140 struct ResourceAllocMD 141 { 142 int uavsNumType = 0; 143 int srvsNumType = 0; 144 int samplersNumType = 0; 145 std::vector<ArgAllocMD> argAllocMDList; 146 std::vector<InlineSamplersMD> inlineSamplersMD; 147 }; 148 149 struct ComputeShaderSecondCompileInputInfoMD 150 { 151 int runtimeVal_ResWidthHeight = 0; 152 int runtimeVal_LoopCount = 0; 153 int runtimeVal_ConstantBufferSize = 0; 154 bool isSecondCompile = false; 155 int isRowMajor = 0; 156 int numChannelsUsed = 0; 157 }; 158 159 struct LocalOffsetMD 160 { 161 int m_Offset; 162 llvm::GlobalVariable* m_Var; 163 }; 164 165 struct WorkGroupWalkOrderMD 166 { 167 int dim0 = 0; 168 int dim1 = 0; 169 int dim2 = 0; 170 }; 171 172 struct FuncArgMD 173 { 174 int bufferLocationIndex = -1; 175 int bufferLocationCount = -1; 176 bool isEmulationArg = 0; 177 }; 178 179 180 struct ConstantAddress 181 { 182 unsigned int bufId = 0; 183 unsigned int eltId = 0; 184 unsigned int size = 0; 185 }; 186 187 bool operator < (const ConstantAddress &a, const ConstantAddress &b); 188 189 //to hold metadata of every function 190 struct FunctionMetaData 191 { 192 std::vector<LocalOffsetMD> localOffsets; 193 WorkGroupWalkOrderMD workGroupWalkOrder; 194 std::vector<FuncArgMD> funcArgs; 195 FunctionTypeMD functionType = KernelFunction; 196 std::map<ConstantAddress, uint32_t> inlineDynConstants; 197 ResourceAllocMD resAllocMD; 198 std::vector<unsigned> maxByteOffsets; 199 bool IsInitializer = false; 200 bool IsFinalizer = false; 201 unsigned CompiledSubGroupsNumber = 0; 202 bool hasInlineVmeSamplers = false; 203 int localSize = 0; 204 bool localIDPresent = false; 205 bool groupIDPresent = false; 206 int privateMemoryPerWI = 0; 207 bool globalIDPresent = false; 208 bool isUniqueEntry = false; 209 210 // Analysis result of if there are non-kernel-argument ld/st in the kernel 211 bool hasNonKernelArgLoad = false; 212 bool hasNonKernelArgStore = false; 213 bool hasNonKernelArgAtomic = false; 214 215 std::vector<std::string> UserAnnotations; 216 217 std::vector<int32_t> m_OpenCLArgAddressSpaces; 218 std::vector<std::string> m_OpenCLArgAccessQualifiers; 219 std::vector<std::string> m_OpenCLArgTypes; 220 std::vector<std::string> m_OpenCLArgBaseTypes; 221 std::vector<std::string> m_OpenCLArgTypeQualifiers; 222 std::vector<std::string> m_OpenCLArgNames; 223 }; 224 225 // isCloned member is added to mark whether a function is clone 226 // of another one. If two kernels from a compilation unit invoke 227 // the same callee, IGC ends up creating clone of the callee 228 // to separate call graphs. But it doesnt create metadata nodes 229 // so debug info for cloned function will be empty. Marking 230 // function as clone and later in debug info iterating over 231 // original function instead of clone helps emit out correct debug 232 // info. 233 234 //new structure to replace old Metatdata framework's CompilerOptions 235 struct CompOptions 236 { 237 bool DenormsAreZero = false; 238 bool CorrectlyRoundedDivSqrt = false; 239 bool OptDisable = false; 240 bool MadEnable = false; 241 bool NoSignedZeros = false; 242 bool NoNaNs = false; 243 244 // default rounding modes 245 unsigned FloatRoundingMode = IGC::ROUND_TO_NEAREST_EVEN; 246 unsigned FloatCvtIntRoundingMode = IGC::ROUND_TO_ZERO; 247 248 unsigned VISAPreSchedRPThreshold = 0; 249 unsigned SetLoopUnrollThreshold = 0; 250 bool UnsafeMathOptimizations = false; 251 bool FiniteMathOnly = false; 252 bool FastRelaxedMath = false; 253 bool DashGSpecified = false; 254 bool FastCompilation = false; 255 bool UseScratchSpacePrivateMemory = true; 256 bool RelaxedBuiltins = false; 257 bool SubgroupIndependentForwardProgressRequired = true; 258 bool GreaterThan2GBBufferRequired = true; 259 bool GreaterThan4GBBufferRequired = true; 260 bool DisableA64WA = false; 261 bool ForceEnableA64WA = false; 262 bool PushConstantsEnable = true; 263 bool HasPositivePointerOffset = false; 264 bool HasBufferOffsetArg = false; 265 bool BufferOffsetArgOptional = true; 266 bool HasSubDWAlignedPtrArg = false; 267 bool replaceGlobalOffsetsByZero = false; 268 unsigned forcePixelShaderSIMDMode = 0; 269 bool pixelShaderDoNotAbortOnSpill = false; 270 bool UniformWGS = false; 271 bool disableVertexComponentPacking = false; 272 bool disablePartialVertexComponentPacking = false; 273 bool PreferBindlessImages = false; 274 bool UseBindlessMode = false; 275 bool UseLegacyBindlessMode = true; 276 bool disableMathRefactoring = false; 277 //if PTSS is enabled and if PrivateData is too large (>256k in XeHP_SDV+), 278 //we might use stateless memory to hold privatedata instead of using PTSS. 279 //this flag is for this scenario. 280 bool UseStatelessforPrivateMemory = false; 281 bool EnableTakeGlobalAddress = false; 282 bool IsLibraryCompilation = false; 283 bool FastVISACompile = false; 284 bool MatchSinCosPi = false; 285 bool CaptureCompilerStats = false; 286 // Suggest to enableZEBinary. IGC could still fall-back to legacy 287 // patch-token based binary if the input contains features those 288 // are not supported by ZEBinary 289 bool EnableZEBinary = false; 290 }; 291 292 enum class ThreadIDLayout 293 { 294 // layout IDs along X,Y,Z 295 X, 296 // Tile along just the y-dimension 297 TileY, 298 // tile IDs in 2x2 groups as expected by derivative calculations 299 QuadTile 300 }; 301 302 struct ComputeShaderInfo 303 { 304 unsigned int maxWorkGroupSize = 0; 305 unsigned int waveSize = 0; // force a wave size 306 std::vector<ComputeShaderSecondCompileInputInfoMD> ComputeShaderSecondCompile; 307 unsigned char forcedSIMDSize = 0; // 0 means not forced 308 unsigned int forceTotalGRFNum = 0; // 0 means not forced 309 unsigned int VISAPreSchedRPThreshold = 0; // 0 means use the default 310 unsigned int SetLoopUnrollThreshold = 0; // 0 means use the default 311 bool forcedVISAPreRAScheduler = false; 312 // disables dispatch along y and tiled order optimizations 313 bool disableLocalIdOrderOptimizations = false; 314 // force disables dispatch along y optimization 315 bool disableDispatchAlongY = false; 316 // If nullopt, then there is no requirement 317 std::optional<ThreadIDLayout> neededThreadIdLayout; 318 // force enable tile y optimization 319 bool forceTileYWalk = false; 320 }; 321 322 323 struct PixelShaderInfo 324 { 325 unsigned char BlendStateDisabledMask = 0; 326 bool SkipSrc0Alpha = false; 327 bool DualSourceBlendingDisabled = false; 328 bool ForceEnableSimd32 = false; // forces compilation of simd32; bypass heuristics 329 bool outputDepth = false; 330 bool outputStencil = false; 331 bool outputMask = false; 332 bool blendToFillEnabled = false; 333 bool forceEarlyZ = false; // force earlyz test 334 bool hasVersionedLoop = false; // if versioned by customloopversioning 335 // Number of samples for this pixel shader if known. 336 // Valid values 0, 1, 2, 4, 8 and 16. 337 // 0 means unknown or not set. 338 unsigned char NumSamples = 0; 339 std::vector<int> blendOptimizationMode; 340 std::vector<int> colorOutputMask; 341 }; 342 343 344 struct SInputDesc 345 { 346 unsigned int index = 0; 347 int argIndex = 0; 348 int interpolationMode = 0; 349 }; 350 351 // SimplePushInfo holds information about the promoted constant buffer 352 // region (see member descriptions in SSimplePushInfo). It also holds 353 // mappings between the byte offsets in the promoted region and 354 // corresponding argument index. 355 struct SimplePushInfo 356 { 357 unsigned int cbIdx = 0; 358 int pushableAddressGrfOffset = -1; 359 int pushableOffsetGrfOffset = -1; 360 unsigned int offset = 0; 361 unsigned int size = 0; 362 bool isStateless = false; 363 bool isBindless = false; 364 // std::map<offset, argumentIndex> 365 std::map<unsigned int, int> simplePushLoads; 366 }; 367 368 struct StatelessPushInfo 369 { 370 unsigned int addressOffset = 0; 371 bool isStatic = false; 372 }; 373 374 struct DynamicBufferInfo 375 { 376 // If numOffsets > 0, dynamic buffer offsets occupy a contiguous region 377 // of runtime values with indices in [firstIndex, firstIndex + numOffsets). 378 unsigned int firstIndex = 0; 379 unsigned int numOffsets = 0; 380 }; 381 382 // simplePushInfoArr needs to be initialized to a vector of size g_c_maxNumberOfBufferPushed, which we are doing in module MD initialization done in code gen context 383 // All the pushinfo below is mapping to an argument number (int) so that we can retrieve relevant Argument as a value pointer from Function 384 struct PushInfo 385 { 386 std::vector<StatelessPushInfo> pushableAddresses; 387 388 // Indices of RuntimeValues that can be used to compute surface state 389 // offsets for the bindless push. 390 std::vector<unsigned int> bindlessPushInfo; 391 392 // Dynamic buffer offsets info. 393 // Used only on with clients that support dynamic buffers. 394 DynamicBufferInfo dynamicBufferInfo; 395 unsigned int MaxNumberOfPushedBuffers = 0; ///> specifies the maximum number of buffers available for the simple push mechanism for current shader. 396 397 unsigned int inlineConstantBufferSlot = INVALID_CONSTANT_BUFFER_INVALID_ADDR; // slot of the inlined constant buffer 398 unsigned int inlineConstantBufferOffset = INVALID_CONSTANT_BUFFER_INVALID_ADDR; // offset of the inlined constant buffer 399 unsigned int inlineConstantBufferGRFOffset = INVALID_CONSTANT_BUFFER_INVALID_ADDR; 400 401 std::map<ConstantAddress, int> constants; 402 std::map<unsigned int, SInputDesc> inputs; 403 std::map<unsigned int, int> constantReg; 404 std::array<SimplePushInfo, g_c_maxNumberOfBufferPushed> simplePushInfoArr; 405 unsigned int simplePushBufferUsed = 0; 406 407 std::vector<ArgDependencyInfoMD> pushAnalysisWIInfos; 408 }; 409 410 struct InlineProgramScopeBuffer 411 { 412 int alignment; 413 unsigned allocSize; 414 std::vector<unsigned char> Buffer; 415 }; 416 417 struct ImmConstantInfo 418 { 419 std::vector<char> data; 420 }; 421 422 struct PointerProgramBinaryInfo 423 { 424 int PointerBufferIndex; 425 int PointerOffset; 426 int PointeeAddressSpace; 427 int PointeeBufferIndex; 428 }; 429 430 struct PointerAddressRelocInfo 431 { 432 unsigned BufferOffset; 433 unsigned PointerSize; 434 std::string Symbol; 435 }; 436 437 struct ShaderData 438 { 439 unsigned int numReplicas = 0; 440 }; 441 442 struct URBLayoutInfo 443 { 444 bool has64BVertexHeaderInput = false; 445 bool has64BVertexHeaderOutput = false; 446 bool hasVertexHeader = true; 447 }; 448 449 //metadata for the entire module 450 struct ModuleMetaData 451 { 452 bool isPrecise = false; 453 CompOptions compOpt; 454 llvm::MapVector<llvm::Function*, IGC::FunctionMetaData> FuncMD; 455 PushInfo pushInfo; 456 PixelShaderInfo psInfo; 457 ComputeShaderInfo csInfo; 458 uint32_t CurUniqueIndirectIdx = DefaultIndirectIdx; 459 std::map<uint32_t, std::array<uint32_t, 4>> inlineDynTextures; 460 std::vector<InlineResInfo> inlineResInfoData; 461 ImmConstantInfo immConstant; 462 std::vector<InlineProgramScopeBuffer> inlineConstantBuffers; 463 std::vector<InlineProgramScopeBuffer> inlineGlobalBuffers; 464 std::vector<PointerProgramBinaryInfo> GlobalPointerProgramBinaryInfos; 465 std::vector<PointerProgramBinaryInfo> ConstantPointerProgramBinaryInfos; 466 std::vector<PointerAddressRelocInfo> GlobalBufferAddressRelocInfo; 467 std::vector<PointerAddressRelocInfo> ConstantBufferAddressRelocInfo; 468 unsigned int MinNOSPushConstantSize = 0; 469 llvm::MapVector<llvm::GlobalVariable*, int> inlineProgramScopeOffsets; 470 ShaderData shaderData; 471 URBLayoutInfo URBInfo; 472 bool UseBindlessImage = false; 473 bool enableRangeReduce = false; 474 475 //when true, compiler enables MatchMad optimization for VS 476 bool allowMatchMadOptimizationforVS = false; 477 478 bool disableMemOptforNegativeOffsetLoads = false; 479 480 // When true compiler can assume that resources bound to two different 481 // bindings do not alias. 482 bool statefullResourcesNotAliased = false; 483 bool disableMixMode = false; 484 485 unsigned int privateMemoryPerWI = 0; 486 std::array<uint64_t, NUM_SHADER_RESOURCE_VIEW_SIZE> m_ShaderResourceViewMcsMask{}; 487 unsigned int computedDepthMode = 0; //Defaults to 0 meaning depth mode is off 488 // set by LowerGPCallArg pass 489 bool hasNoLocalToGenericCast = false; 490 bool hasNoPrivateToGenericCast = false; 491 }; 492 void serialize(const IGC::ModuleMetaData &moduleMD, llvm::Module* module); 493 void deserialize(IGC::ModuleMetaData &deserializedMD, const llvm::Module* module); 494 495 } 496