1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef STRUCTURE_ALINGMENT_VERIFICATION 10 #pragma once 11 #endif 12 13 // reuse GFX enumeration types common to gen6 and gen7 14 #include <sstream> 15 #include "usc.h" 16 #include "SurfaceFormats.h" 17 #include "ShaderTypesConst.h" 18 19 20 /******************************************************************************\ 21 Serialization of CompilerOutput structures is affected by structure padding 22 added by C++ compiler. Each member of a structure and the structure itself 23 maybe padded if its size does not meet specific criteria. 24 For instance: if the structure contains two members: char and int, 3 bytes 25 will be added after the char so int starts at the address that is a multiple 26 of int size. Similar rule applies the entire structure. 27 The problem is that sometimes a slight difference occurs between C++ 28 compilers. It happens when the structure inherits from another structure and 29 the base one needs padding. MS compiler adds padding between base and 30 derived one, while GCC does at the end of derived one. As the result sizes 31 on both compilers are the same, but internally, on byte basis, they are 32 different. If such construction is serialized on build from one compiler, 33 it cannot be properly deserialized on build form the second one. 34 Theoretically #pragma pack(1) could be used to prevent compilers from adding 35 padding, but some compilers have only partial support for it. 36 37 Therefore manual padding is added to prevent compilers from adding it 38 automatically. It is added between members as well as at the end of the 39 structures. 40 41 Padding is added by unnamed bitfields of size equal to the size of type of 42 bitfield, e.g.: 43 44 One byte padding is: 45 46 char : sizeof(char) * 8 47 48 To make it less obscure, three base paddings are defined: 49 PADDING_1_BYTE 50 PADDING_2_BYTES 51 PADDING_4_BYTES 52 53 And two additional, based on architecture: 54 PADDING_4_BYTES_x64_ONLY 55 PADDING_4_BYTES_x32_ONLY 56 57 The first one will add four bytes on 64bit builds only, while the second one 58 will do the same on 32bit builds. 59 60 There is a separate verification project "glsl_compile_time_verification", 61 which verifies in compile time whether manual padding is required. 62 \******************************************************************************/ 63 #define PADDING_1_BYTE char : sizeof(char) * 8; 64 #define PADDING_2_BYTES short int : sizeof(short int) * 8; 65 #define PADDING_4_BYTES int : sizeof(int) * 8; 66 67 #ifdef _AMD64_ 68 #define PADDING_4_BYTES_x64_ONLY PADDING_4_BYTES; 69 #define PADDING_4_BYTES_x32_ONLY 70 #else 71 #define PADDING_4_BYTES_x32_ONLY PADDING_4_BYTES; 72 #define PADDING_4_BYTES_x64_ONLY 73 #endif 74 75 namespace IGC 76 { 77 enum class PushConstantMode : unsigned int 78 { 79 DEFAULT = 0, 80 SIMPLE = 1, 81 GATHER = 2, 82 NONE = 3, 83 }; 84 } 85 86 namespace USC 87 { 88 89 /*****************************************************************************\ 90 ENUM: TESSELLATOR_DOMAIN_TYPE 91 \*****************************************************************************/ 92 enum TESSELLATOR_DOMAIN_TYPE 93 { 94 TESSELLATOR_DOMAIN_QUAD, 95 TESSELLATOR_DOMAIN_TRI, 96 TESSELLATOR_DOMAIN_ISOLINE, 97 NUM_TESSELLATOR_DOMAIN_TYPES 98 }; 99 100 /*****************************************************************************\ 101 ENUM: TESSELLATOR_PARTITIONING_TYPE 102 \*****************************************************************************/ 103 enum TESSELLATOR_PARTITIONING_TYPE 104 { 105 TESSELLATOR_PARTITIONING_INTEGER, 106 TESSELLATOR_PARTITIONING_POW2, 107 TESSELLATOR_PARTITIONING_FRACTIONAL_ODD, 108 TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN, 109 NUM_TESSELLATOR_PARTITIONING_TYPES 110 }; 111 112 /*****************************************************************************\ 113 ENUM: TESSELLATOR_OUTPUT_PRIMITIVE_TYPE 114 \*****************************************************************************/ 115 enum TESSELLATOR_OUTPUT_PRIMITIVE_TYPE 116 { 117 TESSELLATOR_OUTPUT_PRIMITIVE_POINT, 118 TESSELLATOR_OUTPUT_PRIMITIVE_LINE, 119 TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW, 120 TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW, 121 NUM_TESSELLATOR_OUTPUT_PRIMITIVE_TYPES 122 }; 123 124 /*****************************************************************************\ 125 ENUM: GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE 126 \*****************************************************************************/ 127 enum GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE 128 { 129 GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_SINGLE = 0x0, 130 GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_DUAL_INSTANCE = 0x1, 131 GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_DUAL_OBJECT = 0x2, 132 GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_SIMD8 = 0x3 133 }; 134 135 /*****************************************************************************\ 136 ENUM: GFX3DSTATE_CONTROL_DATA_FORMAT 137 \*****************************************************************************/ 138 enum GFX3DSTATE_CONTROL_DATA_FORMAT 139 { 140 GFX3DSTATE_CONTROL_DATA_FORMAT_CUT = 0x0, 141 GFX3DSTATE_CONTROL_DATA_FORMAT_SID = 0x1 142 }; 143 144 /*****************************************************************************\ 145 ENUM: GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL 146 \*****************************************************************************/ 147 enum GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL 148 { 149 GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_NORMAL = 0x0, 150 GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_PSEXEC = 0x1, 151 GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_PREPS = 0x2 152 // Reserved = 0x3 153 }; 154 155 /*****************************************************************************\ 156 ENUM: GFX3DSTATE_COMPUTED_DEPTH_MODE 157 \*****************************************************************************/ 158 enum GFX3DSTATE_COMPUTED_DEPTH_MODE 159 { 160 GFX3DSTATE_COMPUTED_DEPTH_MODE_OFF = 0x0, 161 GFX3DSTATE_COMPUTED_DEPTH_MODE_ON = 0x1, 162 GFX3DSTATE_COMPUTED_DEPTH_MODE_ON_GE_SRC = 0x2, 163 GFX3DSTATE_COMPUTED_DEPTH_MODE_ON_LE_SRC = 0x3 164 }; 165 166 /*****************************************************************************\ 167 ENUM: GFX3DSTATE_ROUNDING_MODE 168 \*****************************************************************************/ 169 enum GFX3DSTATE_ROUNDING_MODE 170 { 171 GFX3DSTATE_ROUNDING_MODE_ROUND_TO_NEAREST_EVEN = 0x0, 172 GFX3DSTATE_ROUNDING_MODE_ROUND_TO_POS_INF = 0x1, 173 GFX3DSTATE_ROUNDING_MODE_ROUND_TO_NEG_INF = 0x2, 174 GFX3DSTATE_ROUNDING_MODE_ROUND_TO_ZERO = 0x3 175 }; 176 177 /*****************************************************************************\ 178 ENUM: GFXMEDIA_GPGPU_MODE 179 \*****************************************************************************/ 180 enum GFXMEDIA_GPGPU_MODE 181 { 182 GFXMEDIA_GPGPU_MODE_MEDIA = 0x0, 183 GFXMEDIA_GPGPU_MODE_GPGPU = 0x1 184 }; 185 186 /*****************************************************************************\ 187 ENUM: GFXMEDIA_MMIO_ACCESS_CONTROL 188 \*****************************************************************************/ 189 enum GFXMEDIA_MMIO_ACCESS_CONTROL 190 { 191 GFXMEDIA_MMIO_ACCESS_CONTROL_NO_READWRITE = 0x0, 192 GFXMEDIA_MMIO_ACCESS_CONTROL_OA_READWRITE = 0x1, 193 GFXMEDIA_MMIO_ACCESS_CONTROL_ANY_READWRITE = 0x2 194 }; 195 196 /*****************************************************************************\ 197 ENUM: GFXMEDIA_GPUWALKER_SIMDSIZE 198 \*****************************************************************************/ 199 enum GFXMEDIA_GPUWALKER_SIMD 200 { 201 GFXMEDIA_GPUWALKER_SIMD8 = 0x0, 202 GFXMEDIA_GPUWALKER_SIMD16 = 0x1, 203 GFXMEDIA_GPUWALKER_SIMD32 = 0x2 204 }; 205 206 /*****************************************************************************\ 207 Enum: GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT 208 \*****************************************************************************/ 209 enum GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT 210 { 211 GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED = 0x0, // All components disabled 212 GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XY = 0x1, // 2D attribute, z and w components disabled 213 GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZ = 0x2, // 3D attribute, w components disabled 214 GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZW = 0x3, // 4D attribute, no disabled components 215 }; 216 217 enum GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE 218 { 219 GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_NONE, // No Coverage 220 GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_NORMAL, // OUTERCONSERVATIVE when conservative rasterization is enabled. 221 // Normal otherwise. 222 GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_INNERCONSERVATIVE, // INNER conservative rasterization 223 GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_DEPTH_COVERAGE // Depth coverage 224 }; 225 226 /*****************************************************************************\ 227 STRUCT: STypedUAVReadEmulationEntry 228 \*****************************************************************************/ 229 struct STypedUAVReadEmulationEntry 230 { 231 bool m_Valid; 232 bool m_DoubleHorizontalSize; 233 IGC::SURFACE_FORMAT m_SubstituteFormat; 234 unsigned int m_UAVEmulationIndex; 235 }; 236 237 /*****************************************************************************\ 238 STRUCT: SInterfaceThisData 239 \*****************************************************************************/ 240 struct STypedUAVReadEmulBTLayout 241 { 242 STypedUAVReadEmulationEntry* pTypedUAVReadEmulBTEntries; 243 unsigned int TypedUAVReadEmulBTEntriesSize; 244 }; 245 246 struct ConstantAddress 247 { 248 unsigned int bufId = 0; 249 unsigned int eltId = 0; 250 int size = 0; 251 SerializeConstantAddress252 void Serialize( std::stringstream& stringStream ) const 253 { 254 stringStream << bufId << eltId << size; 255 } 256 }; 257 258 bool operator < (const ConstantAddress &a, const ConstantAddress &b); 259 260 struct ConstantAddrValue 261 { 262 ConstantAddress ca; 263 bool anyValue; 264 uint32_t value; 265 }; 266 267 struct InlineDynConstants 268 { 269 ConstantAddress ca; 270 uint32_t value; 271 SerializeInlineDynConstants272 void Serialize( std::stringstream& stringStream ) const 273 { 274 ca.Serialize( stringStream ); 275 276 stringStream << value; 277 } 278 }; 279 280 // Dynamic Constant Folding 281 struct DynamicConstFoldingInputs 282 { 283 const InlineDynConstants* pInlineDynConstants = nullptr; 284 unsigned int m_inlineDynConstantsSize = 0; 285 SerializeDynamicConstFoldingInputs286 void Serialize( std::stringstream& stringStream ) const 287 { 288 for( unsigned int i = 0; i < m_inlineDynConstantsSize; i++ ) 289 { 290 pInlineDynConstants[ i ].Serialize( stringStream ); 291 } 292 } 293 }; 294 295 // Constant Buffer to Constant Register gather entry 296 struct SConstantGatherEntry 297 { 298 // ### DW3 3DSTATE_GATHER_CONSTANT_* ### 299 union _GatherEntry 300 { 301 struct _Fields 302 { 303 unsigned short constantBufferIndex : 4; // bits 3:0 304 unsigned short channelMask : 4; // bits 7:4 305 unsigned short constantBufferOffset : 8; // bits 15:8 306 } Fields; 307 unsigned short Value; 308 } GatherEntry; 309 }; 310 311 /*****************************************************************************\ 312 STRUCT: SComputeShaderNOSLayout 313 \*****************************************************************************/ 314 struct SComputeShaderNOSLayout 315 { 316 unsigned int runtimeVal_LoopCount; 317 unsigned int runtimeVal_ResWidthOrHeight; 318 unsigned int runtimeVal_ConstBufferSize; 319 }; 320 321 struct SCompilerInputCommon 322 { 323 DynamicConstFoldingInputs m_DcfInputs; 324 void* m_pGTPinInput; 325 const unsigned int* m_pShaderDebugInfo; 326 327 IGC::PushConstantMode m_PushConstantMode; 328 SerializeSCompilerInputCommon329 void Serialize( std::stringstream& shaderCacheBlob ) const 330 { 331 m_DcfInputs.Serialize( shaderCacheBlob ); 332 shaderCacheBlob << (unsigned int)m_PushConstantMode; 333 334 // Assume that m_pGTPinInput and m_pShaderDebugInfo are not valid when caching 335 } 336 }; 337 338 /*****************************************************************************\ 339 STRUCT: SCompilerInputCommon_Gen7 340 \*****************************************************************************/ 341 struct SCompilerInputCommon_Gen7 : public SCompilerInputCommon 342 { 343 bool secondCompile; // Set this flag to indicate to the compiler that this is the 2nd compilation of the kernel 344 bool isRowMajor; 345 int numChannelsUsed; 346 unsigned int shaderHash; 347 SerializeSCompilerInputCommon_Gen7348 void Serialize( std::stringstream& shaderCacheBlob ) const 349 { 350 SCompilerInputCommon::Serialize( shaderCacheBlob ); 351 shaderCacheBlob << secondCompile << isRowMajor << numChannelsUsed << shaderHash; 352 } 353 }; 354 355 static const SComputeShaderNOSLayout g_nosLayout = { 0, 1, 2 }; 356 357 struct SCompilerOutputCommon_Gen7 358 { 359 // DX10+ immediate constants defined in shader code; expected driver behavior: 360 // a) allocate an internal CB of size 'm_ImmediateConstantsSize' 361 // b) copy 'm_pImmediateConstants' data to this CBbuffer 362 // c) bind internal CB to 'SBindingTableLayout.immediateConstantBufferIndex' 363 void* m_pImmediateConstants; 364 unsigned int m_ImmediateConstantsSize; // if 0, immediate constants not used 365 366 // DX11+ shader interface binding table; expected driver behavior: 367 // a) allocate an internal CB of size 'm_InterfaceConstantsSize' 368 // b) lock buffer for writting on SetShaderWithInterfaces() 369 // c) call Populate*ShaderInterfaceData11() passing interface bind data 370 // d) unmap buffer and bind to 'SBindingTableLayout.interfaceConstantBufferIndex' 371 unsigned int m_InterfaceConstantsSize; // if 0, interface buffer not used 372 void* VFuncOffsets; // call offsets to virtual functions used in kernel program 373 374 // Helper field containing a pointer to the compiled shader object. 375 // IVB-specific. Should not be used on HSW+ platforms. 376 void* m_pShaderHandle; 377 378 // Constant Buffer to Constant Register gather map 379 SConstantGatherEntry* m_pGatherConstants; 380 // Number of entries in gather constants map. The number of entries is always even 381 // which makes the gather constants map size a multiple of unsigned int. 382 unsigned int m_GatherConstantsSize; // if 0, gather map not used 383 // Bitmap of valid constant buffers in the push constants gather. 384 // Specifies which of the 16 constant buffers are used in the push constants gather. 385 // If a bit is set it indicates the corresponding constant buffer is used. 386 // If a bit is clear it indicates the corresponding constant buffer is not used. 387 // ### DW1 3DSTATE_GATHER_CONSTANT_* ### 388 unsigned short m_GatherConstantsBufferValid; // if 0, gather buffer not used 389 390 bool m_IsMessageTargetDataCacheDataPort; 391 392 // USC enables this to indicate that it expects that the VE component packing 393 // has been applied to the delivered thread's payload 394 // ### Gen9+: (DW0, bit 9) 3DSTATE_VF ### 395 bool ComponentPackingEnable; 396 397 // ### DW1 3DSTATE_CONSTANT_* ### 398 unsigned int m_ConstantBuffer1ReadLength; // Constant Buffer 1 Read Length (DW1, bit 31..16) 399 // In 256-bit units. If 0, gather map not used. 400 401 // Mask of constant buffers accessed by kernel (if BIT#n==0, kernel does not access CB#n) 402 // CB usage can change after applying ConstantBuffersToConstantRegisters optimization. 403 unsigned int m_ConstantBufferAccessed; 404 405 // Bitmask that indicates which MSAA level is used for UAV load/store 406 unsigned int m_MsaaUAVMask; 407 408 // Additional UAV Binding Table Entries to be used for emulation 409 // of Typed UAV loads from surface formats unsupported by hardware. 410 unsigned int m_TypedUAVReadEmulationEntriesSize; 411 STypedUAVReadEmulationEntry* m_pTypedUAVReadEmulationEntries; 412 413 unsigned int m_ShaderHash; 414 unsigned int m_ShaderOrdinal; 415 unsigned int m_CompileNum; 416 417 // ISA to IL map. 418 unsigned int m_ISA2ILMapSize[3]; 419 void* m_pISA2ILMap[3]; 420 421 // Bitmask of shader resources accessed by gather4 instructions 422 // with green channel select and not accessed by any other then gather4 423 // instruction type. This bitmask is a part of 424 // the WaGather4WithGreenChannelSelectOnR32G32Float workaround. 425 // DW0 - bitmask of resource indexes 0 - 31 426 // DW1 - bitmask of resource indexes 32 - 63 427 // DW2 - bitmask of resource indexes 64 - 95 428 // DW3 - bitmask of resource indexes 96 - 127 429 unsigned int m_WaGather4WithGreenResourceMask[4]; 430 431 // Bitmask of shader resources accessed by sample_c instructions. This 432 // field is only used when shader compiler was created with the 433 // EnableWaCheckResourceFormatForNFSRivals bit set. 434 unsigned int m_SampleWithComparisonResourceMask[4]; 435 436 int m_UAVSlotsDeclared; // true if one or more UAVs declared 437 unsigned int m_ResourceSlotMask[4]; 438 439 // Component(channel) mask provided in the least significant nibble of each table element 440 // ### Gen9+: DW1-DW4 3DSTATE_VF_COMPONENT_PACKING ### 441 unsigned int ElementComponentDeliverMask[ NUM_VSHADER_INPUT_REGISTERS_PACKAGEABLE ]; 442 443 // Same as above mask but HW-agnostic. Used for cross-shader optimizations. 444 unsigned int ElementComponentUseMask[ NUM_VSHADER_INPUT_REGISTERS_PACKAGEABLE ]; 445 446 // Bitmask of input registers that are *used* by the shader. 447 // The field ElementComponentDeliverMask contains 4-bit nibbles. Subsequent 448 // nibbles are referring to subsequent bits set in this mask. In other words, 449 // for bits cleared in this mask, nibbles are omitted from the field 450 // ElementComponentDeliverMask (only nibbles for bits set here are present). 451 unsigned int ElementDeliverMask; 452 453 unsigned long long m_UAVSlotsWAppendConsume; // used as bitfield, each bit 454 // represent UAV slot that is 455 // referenced with Append/Consume. 456 457 // Planar YUV formats NOS data. 458 // For each texture with index 'i' declared as planar YUV by 459 // SGen6PixelShaderKernelProgramCacheKey.SetPlanarYUVFormat(i, ...) 460 // this table keeps resource numbers of the additional planes used by sampling. 461 // If three separate planes are defined, Y is at the original texture index 462 // while indices of U and V are given in this table. When Y, V channels are 463 // packed in one plane, both indices are set to the same resource number. 464 // E.g.: 465 // For YV12: 466 // m_planarTextureResourceIndex[i][0] -- index of V plane resource, 467 // m_planarTextureResourceIndex[i][1] -- index of U plane resource. 468 // for NV12: 469 // m_planarTextureResourceIndex[i][0] -- index of interleaved U+V plane resource, 470 // m_planarTextureResourceIndex[i][1] -- unused, same as [i][0] 471 // 472 // If no planar YUV format is defined, both table entries are set to i. 473 unsigned int m_planarTextureResourceIndex[NUM_TEXTURE_SLOTS][NUM_EXTRA_PLANES]; 474 475 // Max binding table index used for stateful, non-TGSM resources 476 unsigned int m_MaxBindingTableIndex; 477 478 // If we have indirect sampling and >16 samplers we need to use even slots only, so double the amount 479 bool m_IsUsingDoubleSamplerSlots; 480 481 // Indicates if the shader has any control flow 482 bool m_hasControlFlow; 483 484 bool m_UsesTextureFences; 485 PADDING_1_BYTE 486 487 // used by GenUpdateCB 488 void* m_ConstantBufferReplaceShaderPatterns; 489 unsigned int m_ConstantBufferReplaceShaderPatternsSize; 490 unsigned int m_ConstantBufferUsageMask; 491 unsigned int m_ConstantBufferReplaceSize; 492 PADDING_4_BYTES_x64_ONLY 493 }; 494 495 /*****************************************************************************\ 496 \*****************************************************************************/ 497 USC_PARAM() 498 struct SCompilerOutputVertexShader_Gen7 : public SCompilerOutputCommon_Gen7 499 { 500 // ### DW1 3DSTATE_VS ### 501 void* m_pKernelProgram; // Kernel Start Pointer (DW1, bit 31..6) 502 unsigned int m_KernelProgramSize; 503 504 // ### DW2 3DSTATE_VS ### 505 int m_SingleProgramFlow; // Single Program Flow (DW2, bit 31) 506 unsigned int m_SamplerCount; // Sampler Count (DW2, bit 29..27) 507 unsigned int m_BindingTableEntryCount; // Binding Table Entry Count (DW2, bit 25..18) 508 // Gen7 and Gen7.5+ with HW binding table generation disabled. 509 unsigned int m_BindingTableEntryBitmap; // Binding Table Entry Count (DW2, bit 25..18) 510 // Gen7.5+ with HW binding table generation enabled. 511 GFX3DSTATE_FLOATING_POINT_MODE m_FloatingPointMode; //Floating Point Mode (DW2, bit 16) 512 513 // ### DW3 3DSTATE_VS ### 514 unsigned int m_PerThreadScratchSpace; // Per-Thread Scratch Space (DW3, bit 3..0) 515 516 // ### DW4 3DSTATE_VS ### 517 unsigned int m_DispatchGRFStartRegister; // Dispatch GRF Start Register (DW4, bit 24..20) 518 unsigned int m_VertexURBEntryReadLength; // Vertex URB Entry Read Length (DW4, bit 16..11) 519 unsigned int m_VertexURBEntryReadOffset; // Vertex URB Entry Read Offset (DW4, bit 9..4) 520 521 // ### DW5 3DSTATE_VS ### 522 unsigned int m_MaxNumberThreads; // Maximum Number Of Threads (DW5, bit 31..25) 523 524 // ### DW1 3DSTATE_SBE ### 525 unsigned int m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4) 526 527 // Other 528 unsigned int m_URBAllocationSize; 529 unsigned int m_URBEntryWriteLength; 530 unsigned int m_URBEntriesPerHandle; 531 532 int m_HasInstanceID; 533 int m_HasVertexID; 534 535 unsigned int m_InstanceIDIndex; 536 unsigned int m_VertexIDIndex; 537 538 unsigned int m_InstanceIDMask; 539 unsigned int m_VertexIDMask; 540 541 unsigned int m_UserClipDistancesMask; 542 unsigned int m_UserCullDistancesMask; 543 unsigned int m_AntiAliasTextureCoordinateId; 544 unsigned int m_VsMaxNumInputRegisters; 545 546 int m_DeclaresVPAIndex; 547 int m_DeclaresRTAIndex; 548 549 unsigned int m_InstructionCount; 550 551 PADDING_4_BYTES_x32_ONLY 552 }; 553 554 /*****************************************************************************\ 555 \*****************************************************************************/ 556 USC_PARAM() 557 struct SCompilerOutputHullShader_Gen7 : public SCompilerOutputCommon_Gen7 558 { 559 // ### DW1 3DSTATE_HS ### 560 unsigned int m_SamplerCount; //Sampler Count (DW1, bit 29..27) 561 unsigned int m_BindingTableEntryCount; //Binding Table Entry Count (DW1, bit 25..18) 562 //Gen7 and Gen7.5+ with HW binding table generation disabled. 563 unsigned int m_BindingTableEntryBitmap; //Binding Table Entry Count (DW2, bit 25..18) 564 //Gen7.5+ with HW binding table generation enabled. 565 unsigned int m_MaxNumberThreads; //Maximum Number Of Threads (DW1, bit 6..0) 566 567 // ### DW2 3DSTATE_HS ### 568 int m_HSEnable; //HS Enable (DW2, bit 31) 569 //Statistics Enable (DW2, bit 29) 570 unsigned int m_InstanceCount; //InstanceCount (DW2, bit 7..0) 571 572 // ### DW3 3DSTATE_HS ### 573 void* m_pKernelProgram; //Kernel Start Pointer (DW3, bit 31..6) 574 unsigned int m_KernelProgramSize; 575 576 // ### DW4 3DSTATE_HS ### 577 unsigned int m_PerThreadScratchSpace; //Per-Thread Scratch Space (DW4, bit 3..0) 578 579 // ### DW5 3DSTATE_HS ### 580 int m_SingleProgramFlow; //Single Program Flow (DW5, bit 27) 581 int m_IncludeVertexHandles; //Include Vertex Handles (DW5, bit 24) 582 unsigned int m_DispatchGRFStartRegister; //Dispatch GRF Start Register (DW5, bit 23..19) 583 unsigned int m_VertexURBEntryReadLength; //Vertex URB Entry Read Length (DW5, bit 16..11) 584 unsigned int m_VertexURBEntryReadOffset; //Vertex URB Entry Read Offset (DW5, bit 9..4) 585 586 // Other 587 int m_HasNOSDefaultTesselationFactors; 588 unsigned int m_URBAllocationSize; 589 unsigned int m_URBEntryWriteLength; 590 unsigned int m_URBEntriesPerHandle; 591 592 int m_AttributePullModelUsed; 593 unsigned int m_PatchConstantURBSize; 594 unsigned int m_NumberOutputControlPoints; 595 unsigned int m_NumberInputControlPoints; 596 597 // Only used when VS is skipped and VF used directly in HS: 598 int m_HasInstanceID; 599 unsigned int m_InstanceIDIndex; 600 unsigned int m_InstanceIDMask; 601 602 // ### 3DSTATE_TE related fields ### 603 TESSELLATOR_PARTITIONING_TYPE m_Partitioning; 604 TESSELLATOR_OUTPUT_PRIMITIVE_TYPE m_OutputPrimitive; 605 TESSELLATOR_DOMAIN_TYPE m_Domain; 606 float m_MaxTessFactor; 607 unsigned int m_InstructionCount; 608 609 PADDING_4_BYTES_x64_ONLY 610 }; 611 612 /*****************************************************************************\ 613 \*****************************************************************************/ 614 USC_PARAM() 615 struct SCompilerOutputDomainShader_Gen7 : public SCompilerOutputCommon_Gen7 616 { 617 // ### DW1 3DSTATE_DS ### 618 void* m_pKernelProgram; //Kernel Start Pointer (DW1, bit 31..6) 619 unsigned int m_KernelProgramSize; 620 621 // ### DW2 3DSTATE_DS ### 622 int m_SingleProgramFlow; //Single Program Flow (DW2, bit 31) 623 unsigned int m_SamplerCount; //Sampler Count (DW2, bit 29..27) 624 unsigned int m_BindingTableEntryCount; //Binding Table Entry Count (DW2, bit 25..18) 625 //Gen7 and Gen7.5+ with HW binding table generation disabled. 626 unsigned int m_BindingTableEntryBitmap; //Binding Table Entry Count (DW2, bit 25..18) 627 //Gen7.5+ with HW binding table generation enabled. 628 629 // ### DW3 3DSTATE_DS ### 630 unsigned int m_PerThreadScratchSpace; //Per-Thread Scratch Space (DW3, bit 3..0) 631 632 // ### DW4 3DSTATE_DS ### 633 unsigned int m_DispatchGRFStartRegister; //Dispatch GRF Start Register (DW4, bit 24..20) 634 unsigned int m_PatchURBEntryReadLength; //Patch URB Entry Read Length (DW4, bit 17..11) 635 unsigned int m_PatchURBEntryReadOffset; //Patch URB Entry Read Offset (DW4, bit 9..4) 636 637 // ### DW5 3DSTATE_DS ### 638 unsigned int m_MaxNumberThreads; //Maximum Number Of Threads (DW5, bit 31..25) 639 //Statistics Enable (DW5, bit 10) 640 int m_ComputeWAttribute; //Compute W Coordinate Enable (DW5, bit 2) 641 int m_DSCacheDisable; //DS Cache Disable (DW5, bit 1) 642 int m_DSEnable; //DS Enable (DW5, bit 0) 643 644 // ### DW1 3DSTATE_SBE ### 645 unsigned int m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4) 646 647 // Other 648 unsigned int m_URBAllocationSize; 649 unsigned int m_URBEntryWriteLength; 650 unsigned int m_URBEntriesPerHandle; 651 652 unsigned int m_UserClipDistancesMask; 653 unsigned int m_UserCullDistancesMask; 654 655 // ### 3DSTATE_TE related fields ### 656 TESSELLATOR_DOMAIN_TYPE m_Domain; 657 TESSELLATOR_PARTITIONING_TYPE m_Partitioning; 658 TESSELLATOR_OUTPUT_PRIMITIVE_TYPE m_OutputPrimitive; 659 unsigned int m_InstructionCount; 660 661 bool m_DeclaresVPAIndex; 662 bool m_DeclaresRTAIndex; 663 664 PADDING_2_BYTES 665 PADDING_4_BYTES_x32_ONLY 666 }; 667 668 /*****************************************************************************\ 669 \*****************************************************************************/ 670 USC_PARAM() 671 struct SCompilerOutputGeometryShader_Gen7 : public SCompilerOutputCommon_Gen7 672 { 673 // ### DW1 3DSTATE_GS ### 674 void* m_pKernelProgram[2]; //Kernel Start Pointer (DW1, bit 31..6) 675 //[Rendering Disabled|Enabled] 676 unsigned int m_KernelProgramSize[2]; //[Rendering Disabled|Enabled] 677 678 // ### DW2 3DSTATE_GS ### 679 int m_SingleProgramFlow; //Single Program Flow (DW2, bit 31) 680 unsigned int m_SamplerCount; //Sampler Count (DW2, bit 29..27) 681 unsigned int m_BindingTableEntryCount; //Binding Table Entry Count (DW2, bit 25..18) 682 //Gen7 and Gen7.5+ with HW binding table generation disabled. 683 unsigned int m_BindingTableEntryBitmap; //Binding Table Entry Count (DW2, bit 25..18) 684 //Gen7.5+ with HW binding table generation enabled. 685 686 // ### DW3 3DSTATE_GS ### 687 unsigned int m_PerThreadScratchSpace; //Per-Thread Scratch Space (DW3, bit 3..0) 688 689 // ### DW4 3DSTATE_GS ### 690 unsigned int m_OutputVertexSize[2]; //Output Vertex Size (DW4, bit 28..23) 691 //[Rendering Disabled|Enabled] 692 GFX3DPRIMITIVE_TOPOLOGY_TYPE m_OutputTopology; //Output Topology (DW4, bit 22..17) 693 unsigned int m_VertexEntryReadLength; //Vertex URB Entry Read Length (DW4, bit 16..11) 694 int m_IncludeVertexHandles; //Include Vertex Handles (DW4, bit 10) 695 unsigned int m_VertexEntryReadOffset; //Vertex URB Entry Read Offset (DW4, bit 9..4) 696 unsigned int m_DispatchGRFStartRegister; //Dispatch GRF Start Register (DW4, bit 3..0) 697 698 // ### DW5 3DSTATE_GS ### 699 unsigned int m_MaxNumberThreads; //Maximum Number Of Threads (DW5, bit 31..25) 700 GFX3DSTATE_CONTROL_DATA_FORMAT m_ControlDataFormat; //Control Data Format (DW5, bit 24) 701 unsigned int m_ControlDataHeaderSize; //Control Data Header Size (DW5, bit 23..20) 702 unsigned int m_InstanceControl; //Instance Control (DW5, bit 19..15) 703 unsigned int m_DefaultStreamId; //Default Stream ID (DW5, bit 14..13) 704 GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE m_DispatchMode; //Control Data Format (DW5, bit 12..11) 705 //Statistics Enable (DW5, bit 10) 706 //GS Invocations Increment Value(DW5, bit 9..5) 707 int m_IncludePrimitiveIdEnable; //Include PrimitiveId Enable (DW5, bit 4) 708 //Rendering Enable Hint (DW5, bit 3) 709 int m_ReorderEnable; //Reorder Enable (DW5, bit 2) 710 int m_DiscardAdjacencyEnable; //Discard Adjacency Enable (DW5, bit 1) 711 int m_GSEnable; //GS Enable (DW5, bit 0) 712 713 // ### DW1 3DSTATE_SBE ### 714 unsigned int m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4) 715 716 // Other 717 unsigned int m_URBAllocationSize; 718 unsigned int m_URBEntryWriteLength; 719 unsigned int m_URBEntriesPerHandle; 720 721 unsigned int m_UserClipDistancesMask; 722 unsigned int m_UserCullDistancesMask; 723 unsigned int m_MaxOutputVertexCount; 724 725 unsigned int m_InstructionCount; 726 727 bool m_DeclaresVPAIndex; 728 bool m_DeclaresRTAIndex; 729 730 PADDING_2_BYTES 731 PADDING_4_BYTES 732 }; 733 734 /*****************************************************************************\ 735 \*****************************************************************************/ 736 USC_PARAM() 737 struct SCompilerOutputPixelShader_Gen7 : public SCompilerOutputCommon_Gen7 738 { 739 // ### DW1, DW6, DW7 3DSTATE_PS ### 740 741 void* m_pKernelProgram[NUM_PS_DISPATCH_TYPES]; 742 unsigned int m_KernelProgramSize[NUM_PS_DISPATCH_TYPES]; 743 int m_EnablePixelDispatch[NUM_PS_DISPATCH_TYPES]; 744 745 unsigned int m_NumberOfSFOutputAttributes; 746 747 // ### DW2 3DSTATE_PS ### 748 GFX3DSTATE_FLOATING_POINT_MODE m_FloatingPointMode; // (DW2, bit 16) 749 unsigned int m_BindingTableEntryCount; // Binding Table Entry Count (DW2, bit 25..18) 750 // Gen7 and Gen7.5+ with HW binding table generation disabled. 751 unsigned int m_BindingTableEntryBitmap; // Binding Table Entry Count (DW2, bit 25..18) 752 // Gen7.5+ with HW binding table generation enabled. 753 unsigned int m_SamplerCount; // Sampler Count (DW2, bit 29..27) 754 755 int m_VectorMaskEnable; // Gen8+ need to know if we use VMASK or DMASK for pixel dispatch 756 757 GFX3DSTATE_PROGRAM_FLOW m_SingleProgramFlow; // (DW2, bit 31) 758 759 // ### DW3 3DSTATE_PS ### 760 unsigned int m_PerThreadScratchSpace; // Per-Thread Scratch Space (DW3, bit 3..0) 761 762 // ### DW4 3DSTATE_PS ### 763 GFX3DSTATE_POSITIONXY_OFFSET m_PositionXYOffset; // (DW4, bit 4..3) 764 int m_HasOMaskOutput; // OMask Present to RT (DW4, bit 9) 765 int m_AttributeEnable; // Attribute Enable (DW4, bit 10) 766 int m_PushConstantEnable; // Push Constant Enable (DW4, bit 11) 767 unsigned int m_SampleMask; // Sample Mask, for Gen7.5 only (DW4, bit 19..12) 768 unsigned int m_MaxNumberThreads; // Maximum Number Of Threads (DW4, bit 31..23) 769 770 // ### DW5 3DSTATE_PS ### 771 // Dispatch GRF Start Registers For Constant/Setup Data 772 unsigned int m_DispatchGRFStartRegForConstSetupData[NUM_PS_DISPATCH_TYPES]; // (DW5, bit 22..16, 14..8, 6..0) 773 774 // ### DW1 3DSTATE_WM ### 775 int m_UsesInputCoverageMask; // (DW1, bit 10) 776 int m_RequiresBarycentricPerspectivePixelLocation; // (DW1, bit 11) 777 int m_RequiresBarycentricPerspectiveCentroid; // (DW1, bit 12) 778 int m_RequiresBarycentricPerspectiveSample; // (DW1, bit 13) 779 int m_RequiresBarycentricNonPerspectivePixelLocation; // (DW1, bit 14) 780 int m_RequiresBarycentricNonPerspectiveCentroid; // (DW1, bit 15) 781 int m_RequiresBarycentricNonPerspectiveSample; // (DW1, bit 16) 782 783 GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE m_PositionZWInterpolationMode; // (DW1, bit 18..17) 784 int m_UsesSourceW; // PS Uses Source W (DW1, bit 19) 785 int m_UsesSourceDepth; // PS Uses Source Depth (DW1, bit 20) 786 GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL m_EarlyDepthStencilControl; // (DW1, bit 22..21) 787 GFX3DSTATE_COMPUTED_DEPTH_MODE m_ComputedDepthMode; // (DW1, bit 24..23) 788 int m_KillsPixel; // PS Kill Pixel (DW1, bit 25) 789 790 int m_HasStoreOrAtomicInstructions; 791 unsigned int m_RenderTargetMask; 792 793 // ### DW2 3DSTATE_WM ### 794 int m_UAVOnly; // (DW2, bit 30) 795 796 // Other 797 int m_KernelIsPerSample; 798 int m_HasNOSInputSampleIndex; 799 int m_HasNOSUnlitCentroidInterpolation; 800 int m_HasPrimitiveIdInput; 801 unsigned int m_PrimitiveIdIndex; 802 int m_OverrideX; 803 int m_OverrideY; 804 int m_OverrideZ; 805 int m_OverrideW; 806 unsigned int m_SamplersUsageMask; 807 808 unsigned int m_ConstantInterpolationEnableMask; 809 810 // Fields used by the sample_c workaround. 811 // IVB-specific 812 int m_HasSampleCmpWaCandidates; // shader kernel has sample_c instructions eligible for the wa 813 int m_SampleCmpWaRequiresSingleLODResources; // sampled resources must have only 1 LOD or have MIP filter disabled 814 int m_SampleCmpWaSampler; // index of the sampler used by sample_c instructions 815 unsigned int m_SampleCmpWaResourcesMask[4]; // bitmap of resources sampled by sample_c instructions 816 817 unsigned int m_InstructionCount[NUM_PS_DISPATCH_TYPES]; 818 819 bool m_HigherSIMDRecommended; // True if a compilation in higher SIMD can be beneficial. 820 bool m_HasSampleInfoInstruction; // True if pixel shader uses samplepos instruction. 821 822 // Used by SWStencil 823 bool m_IsSWStencilPossible; 824 bool m_NeedMSAARate; 825 826 PADDING_4_BYTES_x64_ONLY; 827 }; 828 829 /*****************************************************************************\ 830 \*****************************************************************************/ 831 USC_PARAM() 832 struct SCompilerOutputComputeShader_Gen7 : public SCompilerOutputCommon_Gen7 833 834 { 835 // ### DW0 INTERFACE_DESCRIPTOR_DATA ### 836 void* m_pKernelProgram; // Kernel Start Pointer (DW0, bit 31..6) 837 unsigned int m_KernelProgramSize; 838 839 // ### DW1 INTERFACE_DESCRIPTOR_DATA ### 840 GFX3DSTATE_FLOATING_POINT_MODE m_FloatingPointMode; // (DW1, bit 16) 841 GFX3DSTATE_PROGRAM_FLOW m_SingleProgramFlow; // (DW1, bit 18) 842 843 // ### DW2 INTERFACE_DESCRIPTOR_DATA ### 844 unsigned int m_SamplerCount; // Sampler Count (DW2, bit 4..2) 845 846 // ### DW3 INTERFACE_DESCRIPTOR_DATA ### 847 unsigned int m_BindingTableEntryCount; // Binding Table Entry Count (DW3, bit 4..0) 848 849 // ### DW4 INTERFACE_DESCRIPTOR_DATA ### 850 unsigned int m_CurbeReadOffset; // Constant URB Entry Read Offset (DW4, bit 15..0) 851 unsigned int m_CurbeReadLength; // Constant URB Entry Read Length (DW4, bit 31..16) 852 853 // ### DW5 INTERFACE_DESCRIPTOR_DATA ### 854 unsigned int m_PhysicalThreadsInGroup; // Number of Threads in GPGPU Thread Group 855 // (DW5, bit 7..0) 856 unsigned int m_BarrierReturnByte; // Barrier Return Byte (DW5 bit 15..8) 857 int m_BarrierUsed; // Barrier Enable (DW5, bit 21) 858 GFX3DSTATE_ROUNDING_MODE m_RoundingMode; // Rounding Mode (DW5, bit 23..22) 859 unsigned int m_BarrierReturnGrfOffset; // Barrier Return GRF Offset (DW5, bit 31..24) 860 861 // ### DW6 INTERFACE_DESCRIPTOR_DATA [DevHSW] ### 862 unsigned int m_ThreadConstantDataReadLength; // [DevHSW] Cross-Thread Constant Data Read Length 863 // (DW6 bit 7..0) 864 // [PreDevHSW] Per Thread Constant Data in 256bit units 865 866 // ### DW1 MEDIA_VFE_STATE ### 867 unsigned int m_PerThreadScratchSpace; // Per-Thread Scratch Space (DW1, bit 3..0) 868 869 // ### DW2 MEDIA_VFE_STATE ### 870 GFXMEDIA_GPGPU_MODE m_GPGPUMode; // GPGPU Mode (DW2, bit 2) 871 GFXMEDIA_MMIO_ACCESS_CONTROL m_GtwMMIOAccess; // (DW2, bit 4..3) 872 int m_FastPreempt; // Fast Preempt (DW2, bit 5) 873 int m_GtwBypass; // Bypass Gateway Control (DW2, bit 6) 874 int m_GtwResetTimer; // Reset Gateway Timer (DW2, bit 7) 875 unsigned int m_URBEntriesNum; // Number of URB Entries (DW2, bit 15..8) 876 unsigned int m_MaxNumberThreads; // Maximum Number Of Threads (DW2, bit 31..16) 877 878 // ### DW3 MEDIA_VFE_STATE ### 879 unsigned int m_URBEntryAllocationSize; // URB Entry Allocation Size (DW3, bit 31..16) 880 881 // ### DW2 MEDIA_CURBE_LOAD ### 882 unsigned int m_CurbeTotalDataLength; // CURBE Total Data Length (DW2, bit 16..0) 883 884 // ### DW3 MEDIA_CURBE_LOAD ### 885 unsigned int m_CurbeDataOffset; // CURBE Data Start Address (DW3, bit 31..0) 886 887 // ### DW2 GPGPU_WALKER ### 888 GFXMEDIA_GPUWALKER_SIMD m_SimdWidth; // SIMD size (DW2, bit 31..30) 889 890 // Other (driver has to interpret fields listed below, not for direct copy to HW command). 891 unsigned int m_TgsmTotalByteCount; 892 unsigned int m_ThreadGroupSize; // Number of threads in declared thread group 893 894 // This member indicates which channel do we use 895 // for threads' dispatch in Compute Shaders. 896 unsigned int m_CSHThreadDispatchChannel; 897 898 void* m_pThreadPayloadData; // Thread payload data to be sent in a CURBE. Size = m_CurbeTotalDataLength 899 900 // Is set if compiled for Indirect thread payload 901 bool m_CompiledForIndirectPayload; 902 903 bool m_DispatchAlongY; 904 905 bool m_performSecondCompile; // Indicate to the driver if a second compilation is needed for CS. 906 bool m_rowMajor; // Indicate whether this is a row major or column major optimization 907 908 unsigned int m_InstructionCount; 909 910 int m_numChannelsUsed; // Indicate the number of channels loaded from each resource. 911 912 PADDING_4_BYTES 913 }; 914 915 } // namespace USC 916