1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef STRUCTURE_ALINGMENT_VERIFICATION
10 #pragma once
11 #endif
12 
13 // reuse GFX enumeration types common to gen6 and gen7
14 #include <sstream>
15 #include "usc.h"
16 #include "SurfaceFormats.h"
17 #include "ShaderTypesConst.h"
18 
19 
20 /******************************************************************************\
21 Serialization of CompilerOutput structures is affected by structure padding
22 added by C++ compiler. Each member of a structure and the structure itself
23 maybe padded if its size does not meet specific criteria.
24 For instance: if the structure contains two members: char and int, 3 bytes
25 will be added after the char so int starts at the address that is a multiple
26 of int size. Similar rule applies the entire structure.
27 The problem is that sometimes a slight difference occurs between C++
28 compilers. It happens when the structure inherits from another structure and
29 the base one needs padding. MS compiler adds padding between base and
30 derived one, while GCC does at the end of derived one. As the result sizes
31 on both compilers are the same, but internally, on byte basis, they are
32 different. If such construction is serialized on build from one compiler,
33 it cannot be properly deserialized on build form the second one.
34 Theoretically #pragma pack(1) could be used to prevent compilers from adding
35 padding, but some compilers have only partial support for it.
36 
37 Therefore manual padding is added to prevent compilers from adding it
38 automatically. It is added between members as well as at the end of the
39 structures.
40 
41 Padding is added by unnamed bitfields of size equal to the size of type of
42 bitfield, e.g.:
43 
44 One byte padding is:
45 
46 char : sizeof(char) * 8
47 
48 To make it less obscure, three base paddings are defined:
49 PADDING_1_BYTE
50 PADDING_2_BYTES
51 PADDING_4_BYTES
52 
53 And two additional, based on architecture:
54 PADDING_4_BYTES_x64_ONLY
55 PADDING_4_BYTES_x32_ONLY
56 
57 The first one will add four bytes on 64bit builds only, while the second one
58 will do the same on 32bit builds.
59 
60 There is a separate verification project "glsl_compile_time_verification",
61 which verifies in compile time whether manual padding is required.
62 \******************************************************************************/
63 #define PADDING_1_BYTE char : sizeof(char) * 8;
64 #define PADDING_2_BYTES short int : sizeof(short int) * 8;
65 #define PADDING_4_BYTES int : sizeof(int) * 8;
66 
67 #ifdef _AMD64_
68 #define PADDING_4_BYTES_x64_ONLY    PADDING_4_BYTES;
69 #define PADDING_4_BYTES_x32_ONLY
70 #else
71 #define PADDING_4_BYTES_x32_ONLY    PADDING_4_BYTES;
72 #define PADDING_4_BYTES_x64_ONLY
73 #endif
74 
75 namespace IGC
76 {
77     enum class PushConstantMode : unsigned int
78     {
79         DEFAULT = 0,
80         SIMPLE = 1,
81         GATHER = 2,
82         NONE = 3,
83     };
84 }
85 
86 namespace USC
87 {
88 
89 /*****************************************************************************\
90 ENUM: TESSELLATOR_DOMAIN_TYPE
91 \*****************************************************************************/
92 enum TESSELLATOR_DOMAIN_TYPE
93 {
94     TESSELLATOR_DOMAIN_QUAD,
95     TESSELLATOR_DOMAIN_TRI,
96     TESSELLATOR_DOMAIN_ISOLINE,
97     NUM_TESSELLATOR_DOMAIN_TYPES
98 };
99 
100 /*****************************************************************************\
101 ENUM: TESSELLATOR_PARTITIONING_TYPE
102 \*****************************************************************************/
103 enum TESSELLATOR_PARTITIONING_TYPE
104 {
105     TESSELLATOR_PARTITIONING_INTEGER,
106     TESSELLATOR_PARTITIONING_POW2,
107     TESSELLATOR_PARTITIONING_FRACTIONAL_ODD,
108     TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN,
109     NUM_TESSELLATOR_PARTITIONING_TYPES
110 };
111 
112 /*****************************************************************************\
113 ENUM: TESSELLATOR_OUTPUT_PRIMITIVE_TYPE
114 \*****************************************************************************/
115 enum TESSELLATOR_OUTPUT_PRIMITIVE_TYPE
116 {
117     TESSELLATOR_OUTPUT_PRIMITIVE_POINT,
118     TESSELLATOR_OUTPUT_PRIMITIVE_LINE,
119     TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW,
120     TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW,
121     NUM_TESSELLATOR_OUTPUT_PRIMITIVE_TYPES
122 };
123 
124 /*****************************************************************************\
125 ENUM: GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE
126 \*****************************************************************************/
127 enum GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE
128 {
129     GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_SINGLE         = 0x0,
130     GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_DUAL_INSTANCE  = 0x1,
131     GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_DUAL_OBJECT    = 0x2,
132     GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE_SIMD8          = 0x3
133 };
134 
135 /*****************************************************************************\
136 ENUM: GFX3DSTATE_CONTROL_DATA_FORMAT
137 \*****************************************************************************/
138 enum GFX3DSTATE_CONTROL_DATA_FORMAT
139 {
140     GFX3DSTATE_CONTROL_DATA_FORMAT_CUT  = 0x0,
141     GFX3DSTATE_CONTROL_DATA_FORMAT_SID  = 0x1
142 };
143 
144 /*****************************************************************************\
145 ENUM: GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL
146 \*****************************************************************************/
147 enum GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL
148 {
149     GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_NORMAL   = 0x0,
150     GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_PSEXEC   = 0x1,
151     GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL_PREPS    = 0x2
152     // Reserved                                     = 0x3
153 };
154 
155 /*****************************************************************************\
156 ENUM: GFX3DSTATE_COMPUTED_DEPTH_MODE
157 \*****************************************************************************/
158 enum GFX3DSTATE_COMPUTED_DEPTH_MODE
159 {
160     GFX3DSTATE_COMPUTED_DEPTH_MODE_OFF          = 0x0,
161     GFX3DSTATE_COMPUTED_DEPTH_MODE_ON           = 0x1,
162     GFX3DSTATE_COMPUTED_DEPTH_MODE_ON_GE_SRC    = 0x2,
163     GFX3DSTATE_COMPUTED_DEPTH_MODE_ON_LE_SRC    = 0x3
164 };
165 
166 /*****************************************************************************\
167 ENUM: GFX3DSTATE_ROUNDING_MODE
168 \*****************************************************************************/
169 enum GFX3DSTATE_ROUNDING_MODE
170 {
171     GFX3DSTATE_ROUNDING_MODE_ROUND_TO_NEAREST_EVEN  = 0x0,
172     GFX3DSTATE_ROUNDING_MODE_ROUND_TO_POS_INF       = 0x1,
173     GFX3DSTATE_ROUNDING_MODE_ROUND_TO_NEG_INF       = 0x2,
174     GFX3DSTATE_ROUNDING_MODE_ROUND_TO_ZERO          = 0x3
175 };
176 
177 /*****************************************************************************\
178 ENUM: GFXMEDIA_GPGPU_MODE
179 \*****************************************************************************/
180 enum GFXMEDIA_GPGPU_MODE
181 {
182     GFXMEDIA_GPGPU_MODE_MEDIA   = 0x0,
183     GFXMEDIA_GPGPU_MODE_GPGPU   = 0x1
184 };
185 
186 /*****************************************************************************\
187 ENUM: GFXMEDIA_MMIO_ACCESS_CONTROL
188 \*****************************************************************************/
189 enum GFXMEDIA_MMIO_ACCESS_CONTROL
190 {
191     GFXMEDIA_MMIO_ACCESS_CONTROL_NO_READWRITE   = 0x0,
192     GFXMEDIA_MMIO_ACCESS_CONTROL_OA_READWRITE   = 0x1,
193     GFXMEDIA_MMIO_ACCESS_CONTROL_ANY_READWRITE  = 0x2
194 };
195 
196 /*****************************************************************************\
197 ENUM: GFXMEDIA_GPUWALKER_SIMDSIZE
198 \*****************************************************************************/
199 enum GFXMEDIA_GPUWALKER_SIMD
200 {
201     GFXMEDIA_GPUWALKER_SIMD8    = 0x0,
202     GFXMEDIA_GPUWALKER_SIMD16   = 0x1,
203     GFXMEDIA_GPUWALKER_SIMD32   = 0x2
204 };
205 
206 /*****************************************************************************\
207 Enum: GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT
208 \*****************************************************************************/
209 enum  GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT
210 {
211     GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED = 0x0,  // All components disabled
212     GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XY = 0x1,        // 2D attribute, z and w components disabled
213     GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZ = 0x2,       // 3D attribute, w components disabled
214     GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZW = 0x3,      // 4D attribute, no disabled components
215 };
216 
217 enum GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE
218 {
219     GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_NONE,               // No Coverage
220     GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_NORMAL,             // OUTERCONSERVATIVE when conservative rasterization is enabled.
221                                                                     // Normal otherwise.
222     GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_INNERCONSERVATIVE,  // INNER conservative rasterization
223     GFX3DSTATE_PSEXTRA_INPUT_COVERAGE_MASK_MODE_DEPTH_COVERAGE      // Depth coverage
224 };
225 
226 /*****************************************************************************\
227 STRUCT: STypedUAVReadEmulationEntry
228 \*****************************************************************************/
229 struct STypedUAVReadEmulationEntry
230 {
231     bool                    m_Valid;
232     bool                    m_DoubleHorizontalSize;
233     IGC::SURFACE_FORMAT     m_SubstituteFormat;
234     unsigned int            m_UAVEmulationIndex;
235 };
236 
237 /*****************************************************************************\
238 STRUCT: SInterfaceThisData
239 \*****************************************************************************/
240 struct STypedUAVReadEmulBTLayout
241 {
242     STypedUAVReadEmulationEntry*    pTypedUAVReadEmulBTEntries;
243     unsigned int                    TypedUAVReadEmulBTEntriesSize;
244 };
245 
246 struct ConstantAddress
247 {
248     unsigned int bufId = 0;
249     unsigned int eltId = 0;
250     int size = 0;
251 
SerializeConstantAddress252     void Serialize( std::stringstream& stringStream ) const
253     {
254         stringStream << bufId << eltId << size;
255     }
256 };
257 
258 bool operator < (const ConstantAddress &a, const ConstantAddress &b);
259 
260 struct ConstantAddrValue
261 {
262     ConstantAddress ca;
263     bool anyValue;
264     uint32_t value;
265 };
266 
267 struct InlineDynConstants
268 {
269     ConstantAddress ca;
270     uint32_t value;
271 
SerializeInlineDynConstants272     void Serialize( std::stringstream& stringStream ) const
273     {
274         ca.Serialize( stringStream );
275 
276         stringStream << value;
277     }
278 };
279 
280 // Dynamic Constant Folding
281 struct DynamicConstFoldingInputs
282 {
283     const InlineDynConstants* pInlineDynConstants = nullptr;
284     unsigned int              m_inlineDynConstantsSize = 0;
285 
SerializeDynamicConstFoldingInputs286     void Serialize( std::stringstream& stringStream ) const
287     {
288         for( unsigned int i = 0; i < m_inlineDynConstantsSize; i++ )
289         {
290             pInlineDynConstants[ i ].Serialize( stringStream );
291         }
292     }
293 };
294 
295 // Constant Buffer to Constant Register gather entry
296 struct SConstantGatherEntry
297 {
298     // ### DW3 3DSTATE_GATHER_CONSTANT_* ###
299     union _GatherEntry
300     {
301         struct _Fields
302         {
303             unsigned short    constantBufferIndex  : 4;   // bits 3:0
304             unsigned short    channelMask          : 4;   // bits 7:4
305             unsigned short    constantBufferOffset : 8;   // bits 15:8
306         } Fields;
307         unsigned short   Value;
308     } GatherEntry;
309 };
310 
311 /*****************************************************************************\
312 STRUCT: SComputeShaderNOSLayout
313 \*****************************************************************************/
314 struct SComputeShaderNOSLayout
315 {
316     unsigned int runtimeVal_LoopCount;
317     unsigned int runtimeVal_ResWidthOrHeight;
318     unsigned int runtimeVal_ConstBufferSize;
319 };
320 
321 struct SCompilerInputCommon
322 {
323     DynamicConstFoldingInputs m_DcfInputs;
324     void* m_pGTPinInput;
325     const unsigned int* m_pShaderDebugInfo;
326 
327     IGC::PushConstantMode m_PushConstantMode;
328 
SerializeSCompilerInputCommon329     void Serialize( std::stringstream& shaderCacheBlob ) const
330     {
331         m_DcfInputs.Serialize( shaderCacheBlob );
332         shaderCacheBlob << (unsigned int)m_PushConstantMode;
333 
334         // Assume that m_pGTPinInput and m_pShaderDebugInfo are not valid when caching
335     }
336 };
337 
338 /*****************************************************************************\
339 STRUCT: SCompilerInputCommon_Gen7
340 \*****************************************************************************/
341 struct SCompilerInputCommon_Gen7 : public SCompilerInputCommon
342 {
343     bool secondCompile; // Set this flag to indicate to the compiler that this is the 2nd compilation of the kernel
344     bool isRowMajor;
345     int  numChannelsUsed;
346     unsigned int shaderHash;
347 
SerializeSCompilerInputCommon_Gen7348     void Serialize( std::stringstream& shaderCacheBlob ) const
349     {
350         SCompilerInputCommon::Serialize( shaderCacheBlob );
351         shaderCacheBlob << secondCompile << isRowMajor << numChannelsUsed << shaderHash;
352     }
353 };
354 
355 static const SComputeShaderNOSLayout g_nosLayout = { 0, 1, 2 };
356 
357 struct SCompilerOutputCommon_Gen7
358 {
359     // DX10+ immediate constants defined in shader code; expected driver behavior:
360     //    a) allocate an internal CB of size 'm_ImmediateConstantsSize'
361     //    b) copy 'm_pImmediateConstants' data to this CBbuffer
362     //    c) bind internal CB to 'SBindingTableLayout.immediateConstantBufferIndex'
363     void*           m_pImmediateConstants;
364     unsigned int    m_ImmediateConstantsSize;       // if 0, immediate constants not used
365 
366     // DX11+ shader interface binding table; expected driver behavior:
367     //    a) allocate an internal CB of size 'm_InterfaceConstantsSize'
368     //    b) lock buffer for writting on SetShaderWithInterfaces()
369     //    c) call Populate*ShaderInterfaceData11() passing interface bind data
370     //    d) unmap buffer and bind to 'SBindingTableLayout.interfaceConstantBufferIndex'
371     unsigned int   m_InterfaceConstantsSize;        // if 0, interface buffer not used
372     void*          VFuncOffsets;               // call offsets to virtual functions used in kernel program
373 
374     // Helper field containing a pointer to the compiled shader object.
375     // IVB-specific. Should not be used on HSW+ platforms.
376     void*   m_pShaderHandle;
377 
378     // Constant Buffer to Constant Register gather map
379     SConstantGatherEntry*   m_pGatherConstants;
380     // Number of entries in gather constants map. The number of entries is always even
381     // which makes the gather constants map size a multiple of unsigned int.
382     unsigned int    m_GatherConstantsSize;          // if 0, gather map not used
383     // Bitmap of valid constant buffers in the push constants gather.
384     // Specifies which of the 16 constant buffers are used in the push constants gather.
385     // If a bit is set it indicates the corresponding constant buffer is used.
386     // If a bit is clear it indicates the corresponding constant buffer is not used.
387     // ### DW1 3DSTATE_GATHER_CONSTANT_* ###
388     unsigned short  m_GatherConstantsBufferValid;   // if 0, gather buffer not used
389 
390     bool m_IsMessageTargetDataCacheDataPort;
391 
392     // USC enables this to indicate that it expects that the VE component packing
393     // has been applied to the delivered thread's payload
394     // ### Gen9+: (DW0, bit 9) 3DSTATE_VF ###
395     bool ComponentPackingEnable;
396 
397     // ### DW1 3DSTATE_CONSTANT_* ###
398     unsigned int    m_ConstantBuffer1ReadLength;    // Constant Buffer 1 Read Length (DW1, bit 31..16)
399                                                     // In 256-bit units. If 0, gather map not used.
400 
401     // Mask of constant buffers accessed by kernel (if BIT#n==0, kernel does not access CB#n)
402     // CB usage can change after applying ConstantBuffersToConstantRegisters optimization.
403     unsigned int    m_ConstantBufferAccessed;
404 
405     // Bitmask that indicates which MSAA level is used for UAV load/store
406     unsigned int m_MsaaUAVMask;
407 
408     // Additional UAV Binding Table Entries to be used for emulation
409     // of Typed UAV loads from surface formats unsupported by hardware.
410     unsigned int                    m_TypedUAVReadEmulationEntriesSize;
411     STypedUAVReadEmulationEntry*    m_pTypedUAVReadEmulationEntries;
412 
413     unsigned int    m_ShaderHash;
414     unsigned int    m_ShaderOrdinal;
415     unsigned int    m_CompileNum;
416 
417     // ISA to IL map.
418     unsigned int    m_ISA2ILMapSize[3];
419     void*           m_pISA2ILMap[3];
420 
421     // Bitmask of shader resources accessed by gather4 instructions
422     // with green channel select and not accessed by any other then gather4
423     // instruction type. This bitmask is a part of
424     // the WaGather4WithGreenChannelSelectOnR32G32Float workaround.
425     // DW0 - bitmask of resource indexes  0 - 31
426     // DW1 - bitmask of resource indexes 32 - 63
427     // DW2 - bitmask of resource indexes 64 - 95
428     // DW3 - bitmask of resource indexes 96 - 127
429     unsigned int m_WaGather4WithGreenResourceMask[4];
430 
431     // Bitmask of shader resources accessed by sample_c instructions. This
432     // field is only used when shader compiler was created with the
433     // EnableWaCheckResourceFormatForNFSRivals bit set.
434     unsigned int m_SampleWithComparisonResourceMask[4];
435 
436     int             m_UAVSlotsDeclared;             // true if one or more UAVs declared
437     unsigned int    m_ResourceSlotMask[4];
438 
439     // Component(channel) mask provided in the least significant nibble of each table element
440     // ### Gen9+: DW1-DW4 3DSTATE_VF_COMPONENT_PACKING  ###
441     unsigned int ElementComponentDeliverMask[ NUM_VSHADER_INPUT_REGISTERS_PACKAGEABLE ];
442 
443     // Same as above mask but HW-agnostic. Used for cross-shader optimizations.
444     unsigned int ElementComponentUseMask[ NUM_VSHADER_INPUT_REGISTERS_PACKAGEABLE ];
445 
446     // Bitmask of input registers that are *used* by the shader.
447     // The field ElementComponentDeliverMask contains 4-bit nibbles. Subsequent
448     // nibbles are referring to subsequent bits set in this mask. In other words,
449     // for bits cleared in this mask, nibbles are omitted from the field
450     // ElementComponentDeliverMask (only nibbles for bits set here are present).
451     unsigned int ElementDeliverMask;
452 
453     unsigned long long       m_UAVSlotsWAppendConsume;       // used as bitfield, each bit
454     // represent UAV slot that is
455     // referenced with Append/Consume.
456 
457     // Planar YUV formats NOS data.
458     // For each texture with index 'i' declared as planar YUV by
459     // SGen6PixelShaderKernelProgramCacheKey.SetPlanarYUVFormat(i, ...)
460     // this table keeps resource numbers of the additional planes used by sampling.
461     // If three separate planes are defined, Y is at the original texture index
462     // while indices of U and V are given in this table. When Y, V channels are
463     // packed in one plane, both indices are set to the same resource number.
464     // E.g.:
465     // For YV12:
466     // m_planarTextureResourceIndex[i][0] -- index of V plane resource,
467     // m_planarTextureResourceIndex[i][1] -- index of U plane resource.
468     // for NV12:
469     // m_planarTextureResourceIndex[i][0] -- index of interleaved U+V plane resource,
470     // m_planarTextureResourceIndex[i][1] -- unused, same as [i][0]
471     //
472     // If no planar YUV format is defined, both table entries are set to i.
473     unsigned int    m_planarTextureResourceIndex[NUM_TEXTURE_SLOTS][NUM_EXTRA_PLANES];
474 
475     // Max binding table index used for stateful, non-TGSM resources
476     unsigned int m_MaxBindingTableIndex;
477 
478     // If we have indirect sampling and >16 samplers we need to use even slots only, so double the amount
479     bool m_IsUsingDoubleSamplerSlots;
480 
481     // Indicates if the shader has any control flow
482     bool m_hasControlFlow;
483 
484     bool m_UsesTextureFences;
485     PADDING_1_BYTE
486 
487     // used by GenUpdateCB
488     void*          m_ConstantBufferReplaceShaderPatterns;
489     unsigned int   m_ConstantBufferReplaceShaderPatternsSize;
490     unsigned int   m_ConstantBufferUsageMask;
491     unsigned int   m_ConstantBufferReplaceSize;
492     PADDING_4_BYTES_x64_ONLY
493 };
494 
495 /*****************************************************************************\
496 \*****************************************************************************/
497 USC_PARAM()
498 struct SCompilerOutputVertexShader_Gen7 : public SCompilerOutputCommon_Gen7
499 {
500     // ### DW1 3DSTATE_VS ###
501     void*   m_pKernelProgram;           // Kernel Start Pointer         (DW1, bit 31..6)
502     unsigned int    m_KernelProgramSize;
503 
504     // ### DW2 3DSTATE_VS ###
505     int    m_SingleProgramFlow;        // Single Program Flow          (DW2, bit 31)
506     unsigned int    m_SamplerCount;             // Sampler Count                (DW2, bit 29..27)
507     unsigned int    m_BindingTableEntryCount;   // Binding Table Entry Count    (DW2, bit 25..18)
508                                         // Gen7 and Gen7.5+ with HW binding table generation disabled.
509     unsigned int    m_BindingTableEntryBitmap;  // Binding Table Entry Count    (DW2, bit 25..18)
510                                         // Gen7.5+ with HW binding table generation enabled.
511     GFX3DSTATE_FLOATING_POINT_MODE  m_FloatingPointMode; //Floating Point Mode (DW2, bit 16)
512 
513     // ### DW3 3DSTATE_VS ###
514     unsigned int    m_PerThreadScratchSpace;    // Per-Thread Scratch Space     (DW3, bit 3..0)
515 
516     // ### DW4 3DSTATE_VS ###
517     unsigned int    m_DispatchGRFStartRegister; // Dispatch GRF Start Register  (DW4, bit 24..20)
518     unsigned int    m_VertexURBEntryReadLength; // Vertex URB Entry Read Length (DW4, bit 16..11)
519     unsigned int    m_VertexURBEntryReadOffset; // Vertex URB Entry Read Offset (DW4, bit 9..4)
520 
521     // ### DW5 3DSTATE_VS ###
522     unsigned int    m_MaxNumberThreads;         // Maximum Number Of Threads    (DW5, bit 31..25)
523 
524     // ### DW1 3DSTATE_SBE ###
525     unsigned int   m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4)
526 
527     // Other
528     unsigned int    m_URBAllocationSize;
529     unsigned int    m_URBEntryWriteLength;
530     unsigned int    m_URBEntriesPerHandle;
531 
532     int             m_HasInstanceID;
533     int             m_HasVertexID;
534 
535     unsigned int    m_InstanceIDIndex;
536     unsigned int    m_VertexIDIndex;
537 
538     unsigned int    m_InstanceIDMask;
539     unsigned int    m_VertexIDMask;
540 
541     unsigned int    m_UserClipDistancesMask;
542     unsigned int    m_UserCullDistancesMask;
543     unsigned int    m_AntiAliasTextureCoordinateId;
544     unsigned int    m_VsMaxNumInputRegisters;
545 
546     int             m_DeclaresVPAIndex;
547     int             m_DeclaresRTAIndex;
548 
549     unsigned int    m_InstructionCount;
550 
551     PADDING_4_BYTES_x32_ONLY
552 };
553 
554 /*****************************************************************************\
555 \*****************************************************************************/
556 USC_PARAM()
557 struct SCompilerOutputHullShader_Gen7 : public SCompilerOutputCommon_Gen7
558 {
559     // ### DW1 3DSTATE_HS ###
560     unsigned int   m_SamplerCount;             //Sampler Count                 (DW1, bit 29..27)
561     unsigned int   m_BindingTableEntryCount;   //Binding Table Entry Count     (DW1, bit 25..18)
562                                         //Gen7 and Gen7.5+ with HW binding table generation disabled.
563     unsigned int   m_BindingTableEntryBitmap;  //Binding Table Entry Count     (DW2, bit 25..18)
564                                         //Gen7.5+ with HW binding table generation enabled.
565     unsigned int   m_MaxNumberThreads;         //Maximum Number Of Threads     (DW1, bit 6..0)
566 
567     // ### DW2 3DSTATE_HS ###
568     int    m_HSEnable;                 //HS Enable                     (DW2, bit 31)
569                                         //Statistics Enable             (DW2, bit 29)
570     unsigned int   m_InstanceCount;            //InstanceCount                 (DW2, bit 7..0)
571 
572     // ### DW3 3DSTATE_HS ###
573     void*   m_pKernelProgram;           //Kernel Start Pointer          (DW3, bit 31..6)
574     unsigned int   m_KernelProgramSize;
575 
576     // ### DW4 3DSTATE_HS ###
577     unsigned int   m_PerThreadScratchSpace;    //Per-Thread Scratch Space      (DW4, bit 3..0)
578 
579     // ### DW5 3DSTATE_HS ###
580     int    m_SingleProgramFlow;        //Single Program Flow           (DW5, bit 27)
581     int    m_IncludeVertexHandles;     //Include Vertex Handles        (DW5, bit 24)
582     unsigned int   m_DispatchGRFStartRegister; //Dispatch GRF Start Register   (DW5, bit 23..19)
583     unsigned int   m_VertexURBEntryReadLength; //Vertex URB Entry Read Length  (DW5, bit 16..11)
584     unsigned int   m_VertexURBEntryReadOffset; //Vertex URB Entry Read Offset  (DW5, bit 9..4)
585 
586     // Other
587     int    m_HasNOSDefaultTesselationFactors;
588     unsigned int   m_URBAllocationSize;
589     unsigned int   m_URBEntryWriteLength;
590     unsigned int   m_URBEntriesPerHandle;
591 
592     int    m_AttributePullModelUsed;
593     unsigned int   m_PatchConstantURBSize;
594     unsigned int   m_NumberOutputControlPoints;
595     unsigned int   m_NumberInputControlPoints;
596 
597     // Only used when VS is skipped and VF used directly in HS:
598     int             m_HasInstanceID;
599     unsigned int    m_InstanceIDIndex;
600     unsigned int    m_InstanceIDMask;
601 
602     // ### 3DSTATE_TE related fields ###
603     TESSELLATOR_PARTITIONING_TYPE      m_Partitioning;
604     TESSELLATOR_OUTPUT_PRIMITIVE_TYPE  m_OutputPrimitive;
605     TESSELLATOR_DOMAIN_TYPE            m_Domain;
606     float                                       m_MaxTessFactor;
607     unsigned int                                m_InstructionCount;
608 
609     PADDING_4_BYTES_x64_ONLY
610 };
611 
612 /*****************************************************************************\
613 \*****************************************************************************/
614 USC_PARAM()
615 struct SCompilerOutputDomainShader_Gen7 : public SCompilerOutputCommon_Gen7
616 {
617     // ### DW1 3DSTATE_DS ###
618     void*   m_pKernelProgram;           //Kernel Start Pointer          (DW1, bit 31..6)
619     unsigned int   m_KernelProgramSize;
620 
621     // ### DW2 3DSTATE_DS ###
622     int    m_SingleProgramFlow;        //Single Program Flow           (DW2, bit 31)
623     unsigned int   m_SamplerCount;             //Sampler Count                 (DW2, bit 29..27)
624     unsigned int   m_BindingTableEntryCount;   //Binding Table Entry Count     (DW2, bit 25..18)
625                                         //Gen7 and Gen7.5+ with HW binding table generation disabled.
626     unsigned int   m_BindingTableEntryBitmap;  //Binding Table Entry Count     (DW2, bit 25..18)
627                                         //Gen7.5+ with HW binding table generation enabled.
628 
629     // ### DW3 3DSTATE_DS ###
630     unsigned int   m_PerThreadScratchSpace;    //Per-Thread Scratch Space      (DW3, bit 3..0)
631 
632     // ### DW4 3DSTATE_DS ###
633     unsigned int   m_DispatchGRFStartRegister; //Dispatch GRF Start Register   (DW4, bit 24..20)
634     unsigned int   m_PatchURBEntryReadLength;  //Patch URB Entry Read Length   (DW4, bit 17..11)
635     unsigned int   m_PatchURBEntryReadOffset;  //Patch URB Entry Read Offset   (DW4, bit 9..4)
636 
637     // ### DW5 3DSTATE_DS ###
638     unsigned int   m_MaxNumberThreads;         //Maximum Number Of Threads     (DW5, bit 31..25)
639                                         //Statistics Enable             (DW5, bit 10)
640     int    m_ComputeWAttribute;        //Compute W Coordinate Enable   (DW5, bit 2)
641     int    m_DSCacheDisable;           //DS Cache Disable              (DW5, bit 1)
642     int    m_DSEnable;                 //DS Enable                     (DW5, bit 0)
643 
644     // ### DW1 3DSTATE_SBE ###
645     unsigned int   m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4)
646 
647     // Other
648     unsigned int    m_URBAllocationSize;
649     unsigned int    m_URBEntryWriteLength;
650     unsigned int    m_URBEntriesPerHandle;
651 
652     unsigned int    m_UserClipDistancesMask;
653     unsigned int    m_UserCullDistancesMask;
654 
655     // ### 3DSTATE_TE related fields ###
656     TESSELLATOR_DOMAIN_TYPE                m_Domain;
657     TESSELLATOR_PARTITIONING_TYPE          m_Partitioning;
658     TESSELLATOR_OUTPUT_PRIMITIVE_TYPE      m_OutputPrimitive;
659     unsigned int    m_InstructionCount;
660 
661     bool            m_DeclaresVPAIndex;
662     bool            m_DeclaresRTAIndex;
663 
664     PADDING_2_BYTES
665     PADDING_4_BYTES_x32_ONLY
666 };
667 
668 /*****************************************************************************\
669 \*****************************************************************************/
670 USC_PARAM()
671 struct SCompilerOutputGeometryShader_Gen7 : public SCompilerOutputCommon_Gen7
672 {
673     // ### DW1 3DSTATE_GS ###
674     void*   m_pKernelProgram[2];        //Kernel Start Pointer          (DW1, bit 31..6)
675                                         //[Rendering Disabled|Enabled]
676     unsigned int   m_KernelProgramSize[2];     //[Rendering Disabled|Enabled]
677 
678     // ### DW2 3DSTATE_GS ###
679     int    m_SingleProgramFlow;        //Single Program Flow           (DW2, bit 31)
680     unsigned int   m_SamplerCount;             //Sampler Count                 (DW2, bit 29..27)
681     unsigned int   m_BindingTableEntryCount;   //Binding Table Entry Count     (DW2, bit 25..18)
682                                         //Gen7 and Gen7.5+ with HW binding table generation disabled.
683     unsigned int   m_BindingTableEntryBitmap;  //Binding Table Entry Count     (DW2, bit 25..18)
684                                         //Gen7.5+ with HW binding table generation enabled.
685 
686     // ### DW3 3DSTATE_GS ###
687     unsigned int   m_PerThreadScratchSpace;    //Per-Thread Scratch Space      (DW3, bit 3..0)
688 
689     // ### DW4 3DSTATE_GS ###
690     unsigned int   m_OutputVertexSize[2];      //Output Vertex Size            (DW4, bit 28..23)
691                                         //[Rendering Disabled|Enabled]
692     GFX3DPRIMITIVE_TOPOLOGY_TYPE   m_OutputTopology;  //Output Topology (DW4, bit 22..17)
693     unsigned int   m_VertexEntryReadLength;    //Vertex URB Entry Read Length  (DW4, bit 16..11)
694     int    m_IncludeVertexHandles;     //Include Vertex Handles        (DW4, bit 10)
695     unsigned int   m_VertexEntryReadOffset;    //Vertex URB Entry Read Offset  (DW4, bit 9..4)
696     unsigned int   m_DispatchGRFStartRegister; //Dispatch GRF Start Register   (DW4, bit 3..0)
697 
698     // ### DW5 3DSTATE_GS ###
699     unsigned int   m_MaxNumberThreads;         //Maximum Number Of Threads     (DW5, bit 31..25)
700     GFX3DSTATE_CONTROL_DATA_FORMAT m_ControlDataFormat; //Control Data Format (DW5, bit 24)
701     unsigned int   m_ControlDataHeaderSize;    //Control Data Header Size      (DW5, bit 23..20)
702     unsigned int   m_InstanceControl;          //Instance Control              (DW5, bit 19..15)
703     unsigned int   m_DefaultStreamId;          //Default Stream ID             (DW5, bit 14..13)
704     GFX3DSTATE_GEOMETRY_SHADER_DISPATCH_MODE m_DispatchMode; //Control Data Format (DW5, bit 12..11)
705                                         //Statistics Enable             (DW5, bit 10)
706                                         //GS Invocations Increment Value(DW5, bit 9..5)
707     int    m_IncludePrimitiveIdEnable; //Include PrimitiveId Enable    (DW5, bit 4)
708                                         //Rendering Enable Hint         (DW5, bit 3)
709     int    m_ReorderEnable;            //Reorder Enable                (DW5, bit 2)
710     int    m_DiscardAdjacencyEnable;   //Discard Adjacency Enable      (DW5, bit 1)
711     int    m_GSEnable;                 //GS Enable                     (DW5, bit 0)
712 
713     // ### DW1 3DSTATE_SBE ###
714     unsigned int    m_SBEVertexURBEntryReadOffset; // Vertex URB Entry Read Offset in 256bit values (DW1, bit 9..4)
715 
716     // Other
717     unsigned int    m_URBAllocationSize;
718     unsigned int    m_URBEntryWriteLength;
719     unsigned int    m_URBEntriesPerHandle;
720 
721     unsigned int    m_UserClipDistancesMask;
722     unsigned int    m_UserCullDistancesMask;
723     unsigned int    m_MaxOutputVertexCount;
724 
725     unsigned int    m_InstructionCount;
726 
727     bool            m_DeclaresVPAIndex;
728     bool            m_DeclaresRTAIndex;
729 
730     PADDING_2_BYTES
731     PADDING_4_BYTES
732 };
733 
734 /*****************************************************************************\
735 \*****************************************************************************/
736 USC_PARAM()
737 struct SCompilerOutputPixelShader_Gen7 : public SCompilerOutputCommon_Gen7
738 {
739     // ### DW1, DW6, DW7 3DSTATE_PS ###
740 
741     void*           m_pKernelProgram[NUM_PS_DISPATCH_TYPES];
742     unsigned int    m_KernelProgramSize[NUM_PS_DISPATCH_TYPES];
743     int             m_EnablePixelDispatch[NUM_PS_DISPATCH_TYPES];
744 
745     unsigned int    m_NumberOfSFOutputAttributes;
746 
747     // ### DW2 3DSTATE_PS ###
748     GFX3DSTATE_FLOATING_POINT_MODE  m_FloatingPointMode;             // (DW2, bit 16)
749     unsigned int    m_BindingTableEntryCount;        // Binding Table Entry Count    (DW2, bit 25..18)
750                                         // Gen7 and Gen7.5+ with HW binding table generation disabled.
751     unsigned int    m_BindingTableEntryBitmap;  // Binding Table Entry Count    (DW2, bit 25..18)
752                                         // Gen7.5+ with HW binding table generation enabled.
753     unsigned int    m_SamplerCount;             // Sampler Count                (DW2, bit 29..27)
754 
755     int             m_VectorMaskEnable; // Gen8+ need to know if we use VMASK or DMASK for pixel dispatch
756 
757     GFX3DSTATE_PROGRAM_FLOW  m_SingleProgramFlow;              // (DW2, bit 31)
758 
759     // ### DW3 3DSTATE_PS ###
760     unsigned int    m_PerThreadScratchSpace;    // Per-Thread Scratch Space     (DW3, bit 3..0)
761 
762     // ### DW4 3DSTATE_PS ###
763     GFX3DSTATE_POSITIONXY_OFFSET   m_PositionXYOffset;               // (DW4, bit 4..3)
764     int             m_HasOMaskOutput;           // OMask Present to RT          (DW4, bit 9)
765     int             m_AttributeEnable;          // Attribute Enable             (DW4, bit 10)
766     int             m_PushConstantEnable;       // Push Constant Enable         (DW4, bit 11)
767     unsigned int    m_SampleMask;               // Sample Mask, for Gen7.5 only (DW4, bit 19..12)
768     unsigned int    m_MaxNumberThreads;         // Maximum Number Of Threads    (DW4, bit 31..23)
769 
770     // ### DW5 3DSTATE_PS ###
771                                         // Dispatch GRF Start Registers For Constant/Setup Data
772     unsigned int    m_DispatchGRFStartRegForConstSetupData[NUM_PS_DISPATCH_TYPES]; // (DW5, bit 22..16, 14..8, 6..0)
773 
774     // ### DW1 3DSTATE_WM ###
775     int    m_UsesInputCoverageMask;                                 // (DW1, bit 10)
776     int    m_RequiresBarycentricPerspectivePixelLocation;           // (DW1, bit 11)
777     int    m_RequiresBarycentricPerspectiveCentroid;                // (DW1, bit 12)
778     int    m_RequiresBarycentricPerspectiveSample;                  // (DW1, bit 13)
779     int    m_RequiresBarycentricNonPerspectivePixelLocation;        // (DW1, bit 14)
780     int    m_RequiresBarycentricNonPerspectiveCentroid;             // (DW1, bit 15)
781     int    m_RequiresBarycentricNonPerspectiveSample;               // (DW1, bit 16)
782 
783     GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE  m_PositionZWInterpolationMode; // (DW1, bit 18..17)
784     int    m_UsesSourceW;             // PS Uses Source W              (DW1, bit 19)
785     int    m_UsesSourceDepth;         // PS Uses Source Depth          (DW1, bit 20)
786     GFX3DSTATE_EARLY_DEPTH_STENCIL_CONTROL  m_EarlyDepthStencilControl; //  (DW1, bit 22..21)
787     GFX3DSTATE_COMPUTED_DEPTH_MODE  m_ComputedDepthMode;             // (DW1, bit 24..23)
788     int    m_KillsPixel;              // PS Kill Pixel                 (DW1, bit 25)
789 
790     int             m_HasStoreOrAtomicInstructions;
791     unsigned int    m_RenderTargetMask;
792 
793     // ### DW2 3DSTATE_WM ###
794     int    m_UAVOnly;                                               // (DW2, bit 30)
795 
796     // Other
797     int             m_KernelIsPerSample;
798     int             m_HasNOSInputSampleIndex;
799     int             m_HasNOSUnlitCentroidInterpolation;
800     int             m_HasPrimitiveIdInput;
801     unsigned int    m_PrimitiveIdIndex;
802     int             m_OverrideX;
803     int             m_OverrideY;
804     int             m_OverrideZ;
805     int             m_OverrideW;
806     unsigned int    m_SamplersUsageMask;
807 
808     unsigned int    m_ConstantInterpolationEnableMask;
809 
810     // Fields used by the sample_c workaround.
811     // IVB-specific
812     int             m_HasSampleCmpWaCandidates; // shader kernel has sample_c instructions eligible for the wa
813     int             m_SampleCmpWaRequiresSingleLODResources; // sampled resources must have only 1 LOD or have MIP filter disabled
814     int             m_SampleCmpWaSampler; // index of the sampler used by sample_c instructions
815     unsigned int    m_SampleCmpWaResourcesMask[4]; // bitmap of resources sampled by sample_c instructions
816 
817     unsigned int    m_InstructionCount[NUM_PS_DISPATCH_TYPES];
818 
819     bool            m_HigherSIMDRecommended;        // True if a compilation in higher SIMD can be beneficial.
820     bool            m_HasSampleInfoInstruction;      // True if pixel shader uses samplepos instruction.
821 
822     // Used by SWStencil
823     bool            m_IsSWStencilPossible;
824     bool            m_NeedMSAARate;
825 
826     PADDING_4_BYTES_x64_ONLY;
827 };
828 
829 /*****************************************************************************\
830 \*****************************************************************************/
831 USC_PARAM()
832 struct SCompilerOutputComputeShader_Gen7 : public SCompilerOutputCommon_Gen7
833 
834 {
835     // ### DW0 INTERFACE_DESCRIPTOR_DATA ###
836     void*   m_pKernelProgram;           // Kernel Start Pointer         (DW0, bit 31..6)
837     unsigned int   m_KernelProgramSize;
838 
839     // ### DW1 INTERFACE_DESCRIPTOR_DATA ###
840     GFX3DSTATE_FLOATING_POINT_MODE   m_FloatingPointMode;            // (DW1, bit 16)
841     GFX3DSTATE_PROGRAM_FLOW   m_SingleProgramFlow;                   // (DW1, bit 18)
842 
843     // ### DW2 INTERFACE_DESCRIPTOR_DATA ###
844     unsigned int   m_SamplerCount;             // Sampler Count                (DW2, bit 4..2)
845 
846     // ### DW3 INTERFACE_DESCRIPTOR_DATA ###
847     unsigned int   m_BindingTableEntryCount;        // Binding Table Entry Count    (DW3, bit 4..0)
848 
849     // ### DW4 INTERFACE_DESCRIPTOR_DATA ###
850     unsigned int   m_CurbeReadOffset;          // Constant URB Entry Read Offset (DW4, bit 15..0)
851     unsigned int   m_CurbeReadLength;          // Constant URB Entry Read Length (DW4, bit 31..16)
852 
853     // ### DW5 INTERFACE_DESCRIPTOR_DATA ###
854     unsigned int   m_PhysicalThreadsInGroup;   // Number of Threads in GPGPU Thread Group
855                                                                      // (DW5, bit 7..0)
856     unsigned int   m_BarrierReturnByte;        // Barrier Return Byte          (DW5  bit 15..8)
857     int    m_BarrierUsed;              // Barrier Enable               (DW5, bit 21)
858     GFX3DSTATE_ROUNDING_MODE   m_RoundingMode;      // Rounding Mode    (DW5, bit 23..22)
859     unsigned int   m_BarrierReturnGrfOffset;   // Barrier Return GRF Offset    (DW5, bit 31..24)
860 
861     // ### DW6 INTERFACE_DESCRIPTOR_DATA [DevHSW] ###
862     unsigned int   m_ThreadConstantDataReadLength; // [DevHSW] Cross-Thread Constant Data Read Length
863                                             //                          (DW6  bit 7..0)
864                                             // [PreDevHSW] Per Thread Constant Data in 256bit units
865 
866     // ### DW1 MEDIA_VFE_STATE ###
867     unsigned int   m_PerThreadScratchSpace;    // Per-Thread Scratch Space     (DW1, bit 3..0)
868 
869     // ### DW2 MEDIA_VFE_STATE ###
870     GFXMEDIA_GPGPU_MODE    m_GPGPUMode; // GPGPU Mode                   (DW2, bit 2)
871     GFXMEDIA_MMIO_ACCESS_CONTROL   m_GtwMMIOAccess;                  // (DW2, bit 4..3)
872     int    m_FastPreempt;              // Fast Preempt                 (DW2, bit 5)
873     int    m_GtwBypass;                // Bypass Gateway Control       (DW2, bit 6)
874     int    m_GtwResetTimer;            // Reset Gateway Timer          (DW2, bit 7)
875     unsigned int   m_URBEntriesNum;            // Number of URB Entries        (DW2, bit 15..8)
876     unsigned int   m_MaxNumberThreads;         // Maximum Number Of Threads    (DW2, bit 31..16)
877 
878     // ### DW3 MEDIA_VFE_STATE ###
879     unsigned int   m_URBEntryAllocationSize;   // URB Entry Allocation Size    (DW3, bit 31..16)
880 
881     // ### DW2 MEDIA_CURBE_LOAD ###
882     unsigned int   m_CurbeTotalDataLength;     // CURBE Total Data Length      (DW2, bit 16..0)
883 
884     // ### DW3 MEDIA_CURBE_LOAD ###
885     unsigned int   m_CurbeDataOffset;          // CURBE Data Start Address     (DW3, bit 31..0)
886 
887     // ### DW2 GPGPU_WALKER ###
888     GFXMEDIA_GPUWALKER_SIMD   m_SimdWidth;    // SIMD size              (DW2, bit 31..30)
889 
890     // Other (driver has to interpret fields listed below, not for direct copy to HW command).
891     unsigned int   m_TgsmTotalByteCount;
892     unsigned int   m_ThreadGroupSize;          // Number of threads in declared thread group
893 
894     // This member indicates which channel do we use
895     // for threads' dispatch in Compute Shaders.
896     unsigned int m_CSHThreadDispatchChannel;
897 
898     void*   m_pThreadPayloadData;       // Thread payload data to be sent in a CURBE. Size = m_CurbeTotalDataLength
899 
900     // Is set if compiled for Indirect thread payload
901     bool    m_CompiledForIndirectPayload;
902 
903     bool    m_DispatchAlongY;
904 
905     bool m_performSecondCompile;    // Indicate to the driver if a second compilation is needed for CS.
906     bool m_rowMajor;                // Indicate whether this is a row major or column major optimization
907 
908     unsigned int    m_InstructionCount;
909 
910     int  m_numChannelsUsed;         // Indicate the number of channels loaded from each resource.
911 
912     PADDING_4_BYTES
913 };
914 
915 } // namespace USC
916