1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef STRUCTURE_ALINGMENT_VERIFICATION
10 #pragma once
11 #endif
12 
13 #include "../../igfxfmid.h"
14 #include "usc_config.h"
15 #include "CppParserMacros.h"
16 #include "../../gtsysinfo.h"
17 #ifndef _USC_
18 #include "../../sku_wa.h"
19 #endif
20 
21 // redefine simple types to avoid dependency on external headers
22 #if defined( _WIN32 )
23     typedef unsigned long   DWORD;
24     typedef unsigned long   ULONG, *PULONG;
25 #else
26     typedef unsigned int    DWORD;
27     typedef unsigned int    ULONG, *PULONG;
28 #endif
29 
30     typedef unsigned short  USHORT, *PUSHORT;
31     typedef unsigned short  WORD, *PWORD;
32 
33 // Note that this out of USC namespace part is used by .c files.
34 
35 #ifdef __cplusplus
36 USC_PARAM()
37 typedef PLATFORM PLATFORM;
38 #endif // __cplusplus
39 
40 // Slimmed version of the full GT_SYSTEM_INFO structure ( in inc/umKmInc/sharedata.h ).
41 USC_PARAM()
42 typedef struct _SUscGTSystemInfo
43 {
44     // Fields from GT_SYSTEM_INFO structure which contains actual,current number of EU and number of Threads.
45     unsigned int    EUCount;            // Total no. of enabled EUs.
46     unsigned int    ThreadCount;        // Total no. of system threads available.
47     unsigned int    SliceCount;         // Total no. of enabled slices
48     unsigned int    SubSliceCount;      // Total no. of enabled subslices.
49     unsigned int    SLMSizeInKb;        // SLM Size
50 
51     bool           IsDynamicallyPopulated;         // System details populated either via fuse reg. (TRUE) or hard-coded (FALSE)
52 
53     unsigned int   TotalPsThreadsWindowerRange;
54     unsigned int   TotalVsThreads;
55     unsigned int   TotalVsThreads_Pocs;
56     unsigned int   TotalGsThreads;
57     unsigned int   TotalDsThreads;
58     unsigned int   TotalHsThreads;
59     unsigned int   MaxEuPerSubSlice;
60     unsigned int   EuCountPerPoolMax;
61     unsigned int   EuCountPerPoolMin;
62     unsigned int   MaxSlicesSupported;
63     unsigned int   MaxSubSlicesSupported;
64     unsigned int   CsrSizeInMb;
65 } SUscGTSystemInfo;
66 
67 // This slimmed version of the full sku feature table ( in sku_wa.h ).
68 USC_PARAM()
69 typedef struct _SUscSkuFeatureTable
70 {
71     //...//
72     // flags 1 = available, 0 = not available
73 
74     // struct _sku_Core
75     unsigned int   FtrDesktop                       : 1;  // Whether Desktop
76     unsigned int   FtrChannelSwizzlingXOREnabled    : 1;  // Indicates Channel Swizzling XOR feature support
77     //...//
78     unsigned int   FtrGtBigDie                      : 1;  // Indicate Big Die Silicon
79     unsigned int   FtrGtMediumDie                   : 1;  // Indicate Medium Die Silicon
80     unsigned int   FtrGtSmallDie                    : 1;  // Indicate Small Die Silicon
81     //...//
82     unsigned int   FtrGT1                           : 1;  // Indicates GT1 part
83     unsigned int   FtrGT1_5                         : 1;  // Indicates GT1.5 part
84     unsigned int   FtrGT2                           : 1;  // Indicates GT2 part
85     unsigned int   FtrGT3                           : 1;  // Indicates GT3 part
86     unsigned int   FtrGT4                           : 1;  // Indicates GT4 part
87     //...//
88     unsigned int   FtrIVBM0M1Platform               : 1;  // Indicates whether the platform in IVB M0/M1
89     unsigned int   FtrGTL                           : 1;  // Indicates GT Low-end performance part  - New for HSW
90     unsigned int   FtrGTM                           : 1;  // Indicates GT Medium performance part   - New for HSW
91     unsigned int   FtrGTH                           : 1;  // Indicates GT High-end performance part - New for HSW
92     unsigned int   FtrSGTPVSKUStrapPresent          : 1;  // Switchable Graphics Present
93     unsigned int   FtrGTA                           : 1;  // Indicates the platform is a Gen9 based LCLP Broxton platform A
94     unsigned int   FtrGTC                           : 1;  // Indicates the platform is a Gen9 based LCLP Broxton platform C
95     unsigned int   FtrGTX                           : 1;  // Indicates the platform is a Gen9 based LCLP Broxton platform X
96     unsigned int   Ftr5Slice                        : 1;  // Indicates KBL 15x8 SKU
97     //...//
98     unsigned int   FtrGpGpuMidThreadLevelPreempt    : 1;  // Indicates thread level batch Preemption
99     unsigned int   FtrIoMmuPageFaulting             : 1;  // Indicates when PageFaultind is enabled
100     unsigned int   FtrWddm2Svm : 1;   // WDDMv2 SVM Model (Set in platform SKU files, but disabled by GMM as appropriate for given system.)
101     unsigned int   FtrPooledEuEnabled : 1;
102 
103     unsigned int   FtrResourceStreamer : 1;
104     unsigned int   FtrLocalMemory : 1;
105 } SUscSkuFeatureTable;
106 
107 USC_PARAM()
108 typedef struct _SUscAilInfo
109 {
110     unsigned int EnableWaCheckResourceFormatForNFSRivals          : 1;  // Enables the WaCheckResourceFormatForNFSRivals w/a based on UMD AIL
111     unsigned int WaDisableUnsafeArithmeticOperationRefactoring    : 1;  // Holds the WaDisableUnsafeArithmeticOperationRefactoring w/a passed from UMD AIL
112     unsigned int WaTrigFuncRangeReduction                         : 1;  // Compiler Workaround for affected games to do range reduction of trig functions
113     unsigned int WaHiddenIndexableTempSlot                        : 1;  // Reserve extra space for indexable temp for out-of-bound access
114 } SUscAilInfo, SCompilerAilInfo;
115 
116 USC_PARAM()
117 typedef struct _SUscAdapterInfo
118 {
119     SUscSkuFeatureTable UscSkuFeatureTable;
120     SUscGTSystemInfo    UscGTSystemInfo;
121     SUscAilInfo         UscAilInfo;
122 } SUscAdapterInfo;
123 
124 #ifdef _USC_
125 #ifndef SKU_FEATURE_TABLE
126 #define SKU_FEATURE_TABLE  SUscSkuFeatureTable
127 #endif
128 #endif
129 
130 //Updated interface structure that will be used by DX9, DX10 and DX12
131 USC_PARAM()
132  typedef struct _SCompilerPlatformInfo
133 {
134     GT_SYSTEM_INFO          sysInfo;
135     SCompilerAilInfo        AilInfo;
136     SUscSkuFeatureTable     uscSkuFeatureTable; // This slimmed version of the full sku feature table ( in sku_wa.h )
137     SKU_FEATURE_TABLE       skuFeatureTable;
138 } SCompilerPlatformInfo;
139 
140 #if defined ICBE_LHDM || defined ICBE_LINUX
141 #undef SKU_FEATURE_TABLE
142 #endif
143 
144 #ifdef __cplusplus
145 namespace USC
146 {
147 typedef SCompilerPlatformInfo SUSCCompilerPlatformInfo;
148 /*****************************************************************************\
149 
150 Function:
151     InitializeUscAdapterInfo
152 
153 Description:
154     Initializes the USC (slim) adapter info structure by coping required fields
155     from the big sku table and gtSystemInfo structure.
156     This is a helper function for USC clients.
157 
158 Input:
159     BigSkuTable  bigSkuTable - sku feature table containing to initialize
160                               USC sku feature table.
161 
162     BigGTSystemInfo bigGTSystemInfo - GT_SYSTEM_INFO containing actuall data about
163                                       EU and Thread count.
164 Output:
165     SUscAdapterInfo &uscAdpaterInfo - USC adapter info structure correctly
166                                        initialized.
167 
168 \*****************************************************************************/
169 template<typename BigSkuTable, typename BigGTSystemInfo>
InitializeUscAdapterInfo(const BigSkuTable & bigSkuTable,const BigGTSystemInfo & bigGTSystemInfo,SUscAdapterInfo & uscAdpaterInfo)170 inline void InitializeUscAdapterInfo(
171     const BigSkuTable   &bigSkuTable,
172     const BigGTSystemInfo &bigGTSystemInfo,
173     SUscAdapterInfo &uscAdpaterInfo )
174 {
175     uscAdpaterInfo.UscSkuFeatureTable.FtrDesktop         = bigSkuTable.FtrDesktop;          // Whether Desktop
176 
177     uscAdpaterInfo.UscSkuFeatureTable.FtrGtBigDie        = bigSkuTable.FtrGtBigDie;         // Indicates Big Die Silicon.
178     uscAdpaterInfo.UscSkuFeatureTable.FtrGtMediumDie     = bigSkuTable.FtrGtMediumDie;      // Indicates Medium Die Silicon.
179     uscAdpaterInfo.UscSkuFeatureTable.FtrGtSmallDie      = bigSkuTable.FtrGtSmallDie;       // Indicates Small Die Silicon.
180     uscAdpaterInfo.UscSkuFeatureTable.FtrGT1             = bigSkuTable.FtrGT1;              // Indicates GT1 part.
181     uscAdpaterInfo.UscSkuFeatureTable.FtrGT1_5           = bigSkuTable.FtrGT1_5;            // Indicates GT1.5 part.
182     uscAdpaterInfo.UscSkuFeatureTable.FtrGT2             = bigSkuTable.FtrGT2;              // Indicates GT2 part.
183     uscAdpaterInfo.UscSkuFeatureTable.FtrGT3             = bigSkuTable.FtrGT3;              // Indicates GT3 part.
184     uscAdpaterInfo.UscSkuFeatureTable.FtrGT4             = bigSkuTable.FtrGT4;              // Indicates GT4 part.
185     uscAdpaterInfo.UscSkuFeatureTable.FtrGTL             = bigSkuTable.FtrGT1;              // Indicates GT Low-end performance part.
186     uscAdpaterInfo.UscSkuFeatureTable.FtrGTM             = bigSkuTable.FtrGT2;              // Indicates GT Medium performance part.
187     uscAdpaterInfo.UscSkuFeatureTable.FtrGTH             = bigSkuTable.FtrGT3;              // Indicates GT High-end performance part.
188     uscAdpaterInfo.UscSkuFeatureTable.FtrIVBM0M1Platform = bigSkuTable.FtrIVBM0M1Platform;  // Indicates whether the platform in IVB M0/M1
189     uscAdpaterInfo.UscSkuFeatureTable.FtrGTA             = bigSkuTable.FtrGTA;            // Indicates a Gen9 based LCLP Broxton platform A.
190     uscAdpaterInfo.UscSkuFeatureTable.FtrGTC             = bigSkuTable.FtrGTC;            // Indicates a Gen9 based LCLP Broxton platform C.
191     uscAdpaterInfo.UscSkuFeatureTable.FtrGTX             = bigSkuTable.FtrGTX;            // Indicates a Gen9 based LCLP Broxton platform X.
192     uscAdpaterInfo.UscSkuFeatureTable.Ftr5Slice          = bigSkuTable.Ftr5Slice;       // Indicates KBL 15x8 SKU  HALO Sku
193     uscAdpaterInfo.UscSkuFeatureTable.FtrGpGpuMidThreadLevelPreempt = bigSkuTable.FtrGpGpuMidThreadLevelPreempt; //Indicates if preEmption is enabled (HSW+)
194     uscAdpaterInfo.UscSkuFeatureTable.FtrIoMmuPageFaulting = bigSkuTable.FtrIoMmuPageFaulting; //Indicates if page faulting is enabled.
195     uscAdpaterInfo.UscSkuFeatureTable.FtrWddm2Svm = bigSkuTable.FtrWddm2Svm;
196     uscAdpaterInfo.UscSkuFeatureTable.FtrPooledEuEnabled = bigSkuTable.FtrPooledEuEnabled;
197     uscAdpaterInfo.UscSkuFeatureTable.FtrResourceStreamer = bigSkuTable.FtrResourceStreamer;
198     uscAdpaterInfo.UscSkuFeatureTable.FtrLocalMemory = bigSkuTable.FtrLocalMemory;
199 
200     uscAdpaterInfo.UscGTSystemInfo.EUCount = bigGTSystemInfo.EUCount;
201     uscAdpaterInfo.UscGTSystemInfo.ThreadCount = bigGTSystemInfo.ThreadCount;
202     uscAdpaterInfo.UscGTSystemInfo.SliceCount = bigGTSystemInfo.SliceCount;
203     uscAdpaterInfo.UscGTSystemInfo.SubSliceCount = bigGTSystemInfo.SubSliceCount;
204     uscAdpaterInfo.UscGTSystemInfo.SLMSizeInKb = bigGTSystemInfo.SLMSizeInKb;
205     uscAdpaterInfo.UscGTSystemInfo.TotalPsThreadsWindowerRange = bigGTSystemInfo.TotalPsThreadsWindowerRange;
206     uscAdpaterInfo.UscGTSystemInfo.TotalVsThreads = bigGTSystemInfo.TotalVsThreads;
207     uscAdpaterInfo.UscGTSystemInfo.TotalVsThreads_Pocs = bigGTSystemInfo.TotalVsThreads_Pocs;
208     uscAdpaterInfo.UscGTSystemInfo.TotalDsThreads = bigGTSystemInfo.TotalDsThreads;
209     uscAdpaterInfo.UscGTSystemInfo.TotalGsThreads = bigGTSystemInfo.TotalGsThreads;
210     uscAdpaterInfo.UscGTSystemInfo.TotalHsThreads = bigGTSystemInfo.TotalHsThreads;
211     uscAdpaterInfo.UscGTSystemInfo.MaxEuPerSubSlice = bigGTSystemInfo.MaxEuPerSubSlice;
212     uscAdpaterInfo.UscGTSystemInfo.EuCountPerPoolMax = bigGTSystemInfo.EuCountPerPoolMax;
213     uscAdpaterInfo.UscGTSystemInfo.EuCountPerPoolMin = bigGTSystemInfo.EuCountPerPoolMin;
214     uscAdpaterInfo.UscGTSystemInfo.MaxSlicesSupported = bigGTSystemInfo.MaxSlicesSupported;
215     uscAdpaterInfo.UscGTSystemInfo.MaxSubSlicesSupported = bigGTSystemInfo.MaxSubSlicesSupported;
216     uscAdpaterInfo.UscGTSystemInfo.IsDynamicallyPopulated = bigGTSystemInfo.IsDynamicallyPopulated;
217     uscAdpaterInfo.UscGTSystemInfo.CsrSizeInMb = bigGTSystemInfo.CsrSizeInMb;
218 }
219 
220 /*****************************************************************************\
221 
222 Function:
223     InitializeUscSkuTable
224 
225 Description:
226     Initializes the USC (slim) sku table by coping required fields from the big
227     sku table. This is a helper function for USC clients.
228 
229 Input:
230     BigSkuTable  bigSkuTable - sku feature table containing to initialize
231                               USC sku feature table.
232 
233 Output:
234     SUscSkuFeatureTable &uscSkuTable - USC sku feature table correctly
235                                        initialized.
236 
237 \*****************************************************************************/
238 // Some API clients (OCL,DXVA) do not provide yet GT_SYSTEM_INFO structure.
239 // Shader Compiler object must be created with "old" way i.e. usage of fixed EU,Thread count values.
240 // The InitializeUscSkuTable method is called and uscAdpaterInfo.UscGTSystemInfo is zeroed.
241 
242 // This method should be removed in the future.
243 template<typename BigSkuTable>
InitializeUscSkuTable(const BigSkuTable & bigSkuTable,SUscSkuFeatureTable & uscSkuTable)244 inline void InitializeUscSkuTable(
245     const BigSkuTable   &bigSkuTable,
246     SUscSkuFeatureTable &uscSkuTable )
247 {
248     uscSkuTable.FtrDesktop         = bigSkuTable.FtrDesktop;          // Whether Desktop
249     uscSkuTable.FtrChannelSwizzlingXOREnabled = bigSkuTable.FtrChannelSwizzlingXOREnabled; // Indicates Channel Swizzling XOR feature support
250 
251     uscSkuTable.FtrGtBigDie        = bigSkuTable.FtrGtBigDie;         // Indicates Big Die Silicon.
252     uscSkuTable.FtrGtMediumDie     = bigSkuTable.FtrGtMediumDie;      // Indicates Medium Die Silicon.
253     uscSkuTable.FtrGtSmallDie      = bigSkuTable.FtrGtSmallDie;       // Indicates Small Die Silicon.
254     uscSkuTable.FtrGT1             = bigSkuTable.FtrGT1;              // Indicates GT1 part.
255     uscSkuTable.FtrGT1_5           = bigSkuTable.FtrGT1_5;            // Indicates GT1.5 part.
256     uscSkuTable.FtrGT2             = bigSkuTable.FtrGT2;              // Indicates GT2 part.
257     uscSkuTable.FtrGT3             = bigSkuTable.FtrGT3;              // Indicates GT3 part.
258     uscSkuTable.FtrGT4             = bigSkuTable.FtrGT4;              // Indicates GT4 part.
259     uscSkuTable.FtrGTL             = bigSkuTable.FtrGT1;              // Indicates GT Low-end performance part.
260     uscSkuTable.FtrGTM             = bigSkuTable.FtrGT2;              // Indicates GT Medium performance part.
261     uscSkuTable.FtrGTH             = bigSkuTable.FtrGT3;              // Indicates GT High-end performance part.
262     uscSkuTable.FtrIVBM0M1Platform = bigSkuTable.FtrIVBM0M1Platform;  // Indicates whether the platform in IVB M0/M1
263     uscSkuTable.FtrGTA            = bigSkuTable.FtrGTA;            // Indicates a Gen9 based LCLP Broxton platform A.
264     uscSkuTable.FtrGTC            = bigSkuTable.FtrGTC;            // Indicates a Gen9 based LCLP Broxton platform C.
265     uscSkuTable.FtrGTX            = bigSkuTable.FtrGTX;            // Indicates a Gen9 based LCLP Broxton platform X.
266     uscSkuTable.Ftr5Slice         = bigSkuTable.Ftr5Slice;       // Indicates KBL 15x8 SKU  HALO Sku
267     uscSkuTable.FtrGpGpuMidThreadLevelPreempt = bigSkuTable.FtrGpGpuMidThreadLevelPreempt; //Indicates if preEmption is enabled (HSW+)
268     uscSkuTable.FtrIoMmuPageFaulting = bigSkuTable.FtrIoMmuPageFaulting; //Indicates if page faulting is enabled.
269     uscSkuTable.FtrLocalMemory = bigSkuTable.FtrLocalMemory;
270 }
271 
272 
273 USC_PARAM()
274 enum OPTIMIZER_LEVEL
275 {
276     OPTIMIZER_LEVEL_0,     // -o0 fast compilation
277     OPTIMIZER_LEVEL_1,     // -o1 full compilation (default)
278     OPTIMIZER_LEVEL_2,     // -o2 specialized compilation
279     USC_PARAM_HIDE()
280     NUM_OPTIMIZER_LEVELS
281 };
282 
283 USC_PARAM()
284 enum SIMD_LEVEL
285 {
286     SIMD_LEVEL_DEFAULT,     // request all SIMD compilations at once
287     SIMD_LEVEL_LOW,         // request SIMD8 only
288     SIMD_LEVEL_HIGH,        // request all higher SIMD modes
289     USC_PARAM_HIDE()
290     NUM_SIMD_LEVELS
291 };
292 
293 enum SHADER_TYPE
294 {
295     VERTEX_SHADER,
296     GEOMETRY_SHADER,
297     PIXEL_SHADER,
298     HULL_SHADER,
299     DOMAIN_SHADER,
300     COMPUTE_SHADER,
301     NUM_SHADER_TYPES
302 };
303 
304 enum SIMD_MODE
305 {
306     SIMD_MODE_8 = 0,
307     SIMD_MODE_16,
308     SIMD_MODE_32,
309     SIMD_MODE_4x2,
310     NUM_SIMD_MODES
311 };
312 
313 enum PS_DISPATCH_TYPES
314 {
315     PS_SIMD8_DISPATCH,
316     PS_SIMD16_DISPATCH,
317     PS_SIMD32_DISPATCH,
318     NUM_PS_DISPATCH_TYPES
319 };
320 
321 enum USC_CLIENT_TYPE
322 {
323     USC_CLIENT_D3D9,
324     USC_CLIENT_D3D10,
325     USC_CLIENT_D3D12,
326     USC_CLIENT_OGL,
327     USC_CLIENT_OCL,
328     NUM_USC_CLIENT_TYPES
329 };
330 
331 struct SShaderStageBTLayout
332 {
333     // systemThreadIdx should be the same for all shader stages.
334     unsigned int   systemThreadIdx;
335 
336     unsigned int   minConstantBufferIdx;
337     unsigned int   maxConstantBufferIdx;
338     unsigned int   streamOutmputStatisticsIdx;
339     unsigned int   minStreamOutputBufferIdx;
340     unsigned int   maxStreamOutputBufferIdx;
341     unsigned int   minUAVIdx;          // minRTorUAVIdx
342     unsigned int   maxUAVIdx;          // maxRTorUAVIdx
343     unsigned int   minUAVCounterIdx;   // pre-DEVHSW
344     unsigned int   maxUAVCounterIdx;   // pre-DEVHSW
345     unsigned int   JournalIdx;         // journal resource index used by Kernel Trace / Profiling query to measure kernel execution time
346     unsigned int   JournalCounterIdx;  // journal counter resource index
347     unsigned int   TGSMIdx;
348     unsigned int   minColorBufferIdx;
349     unsigned int   maxColorBufferIdx;
350     unsigned int   minResourceIdx;
351     unsigned int   maxResourceIdx;
352     unsigned int   NULLSurfaceIdx;
353     unsigned int   RasterizerInfoSurfaceIdx;    // Special SS for SampleInfo on rasterizer0 for OGL.
354     unsigned int   TPMIdx;
355     unsigned int   surfaceScratchIdx;
356     unsigned int   maxBTsize;
357 
358     // Three following fields are offsets from minConstantBufferIdx:
359     // NULL CB offset should be programmed right after the
360     // last constant buffer index. Such programming will allow USC
361     // to correctly clamp indexable CB indexes (when relative constant
362     // buffer addressing is used in a shader) and out of
363     // bounds reads will return 0. Incorrect programming of this
364     // field may cause out of bounds accesses not to return 0.
365     unsigned int   constantBufferNullBoundOffset;
366     unsigned int   immediateConstantBufferOffset;
367     unsigned int   interfaceConstantBufferOffset;
368 
369     // Following field is an offset from minResourceIdx:
370     // NULL resource offset should be programmed right after the
371     // last shader resource index. Such programming will allow USC
372     // to correctly clamp indexable resource indexes (when relative
373     // shader resource addressing is used in a shader )and out of
374     // bounds reads will return 0. Incorrect programming of this
375     // field may cause out of bounds accesses not to return 0.
376     unsigned int   resourceNullBoundOffset;
377 
378     // Passing this flags reroutes all BTI reads via Bindless heap from the shader
379     // for SKL Bindless for DX Testing
380     bool   BindLessBTIEnable;
381 
382     // Used to access the indirect draw arguments buffer.  Used by
383     // geometry reordering optimization.
384     unsigned int   indirectBufferOffset;
385 };
386 
387 USC_PARAM()
388 struct SBindingTableLayout
389 {
390     SShaderStageBTLayout m_Layout[NUM_SHADER_TYPES];
391 };
392 
393 // global const to start with when defining custom BTI layout.
394 const SBindingTableLayout  g_cZeroBindingTableLayout  = {};
395 const SShaderStageBTLayout g_cZeroShaderStageBTLayout = {};
396 
397 /*****************************************************************************\
398 DEFINE: GTDI_MAX_KI_OFFSETS
399 \*****************************************************************************/
400 #define GTDI_MAX_KI_OFFSETS 26
401 
402 /*****************************************************************************\
403 DEFINE: GTDI_MAX_KI_AGGREGATED_OFFSETS
404 \*****************************************************************************/
405 #define GTDI_MAX_KI_AGGREGATED_OFFSETS 20
406 
407 /*****************************************************************************\
408 ENUM: GTDI_KI_BUILD_TYPE_ENUM
409 \*****************************************************************************/
410 typedef enum GTDI_KI_BUILD_TYPE_ENUM
411 {
412     GTDI_KERNEL_REGULAR                 = 0,   //use that value to switch off kernel build override
413     GTDI_KERNEL_TRACE                   = 1,
414     GTDI_KERNEL_PROFILE                 = 2,
415     GTDI_KERNEL_ISA_PROFILE             = 3,
416     GTDI_KERNEL_PROFILE_AGGREGATED      = 4,
417     GTDI_KERNEL_ISA_PROFILE_AGGREGATED  = 5,
418     GTDI_KERNEL_ISA_COUNTERS            = 6,
419     GTDI_KERNEL_GT_PIN_COMPILER         = 7,
420     GTDI_KERNEL_GPGPU_TRACE             = 8,
421     GTDI_NUM_KERNEL_PROFILING_TYPES     = 9
422 } GTDI_KI_BUILD_TYPE;
423 
424 /*****************************************************************************\
425 ENUM: GTDI_PROFILING_POINT_TYPE_ENUM
426 \*****************************************************************************/
427 typedef enum GTDI_KERNEL_PROFILING_POINT_TYPE_ENUM
428 {
429     GTDI_PROFILING_POINT_TIMESTAMP_INTEL          =   1 << 0,
430     GTDI_PROFILING_POINT_STALL_COUNTER_INTEL      =   1 << 1,
431     GTDI_PROFILING_POINT_SAMPLER_MESSAGE_INTEL    =   1 << 2,
432     GTDI_PROFILING_POINT_DATA_PORT_MESSAGE_INTEL  =   1 << 3,
433     GTDI_PROFILING_POINT_WORKGROUP_ID_X_INTEL     =   1 << 4,
434     GTDI_PROFILING_POINT_WORKGROUP_ID_Y_INTEL     =   1 << 5,
435     GTDI_PROFILING_POINT_WORKGROUP_ID_Z_INTEL     =   1 << 6,
436     GTDI_PROFILING_POINT_EXECUTION_MASK_INTEL     =   1 << 7,
437     GTDI_PROFILING_POINT_HIT_COUNTER_INTEL        =   1 << 8
438 } GTDI_KERNEL_PROFILING_POINT_TYPE;
439 
440 /*****************************************************************************\
441 STRUCT: STracingOptions
442 \*****************************************************************************/
443 struct STracingOptions
444 {
445     unsigned int        InstrumentationType;
446     unsigned int        KernelID;
447     unsigned int        OffsetCount;
448     unsigned int        Offsets[GTDI_MAX_KI_OFFSETS];
449     unsigned int        OffsetType[GTDI_MAX_KI_OFFSETS];
450     unsigned int        UseEUThreadMasks;
451     unsigned int        PartitionCount;
452     unsigned int        EUMask;
453     unsigned int        ThreadCount;
454     unsigned int        CfgID;
455     unsigned int        GatherGatewayTimestamp;
456     unsigned int        ShaderILCodeSize;
457     void*               ShaderILCode;
458     char*               KernelName;
459 };
460 
461 /*****************************************************************************\
462 ENUM: GFX3DPRIMITIVE_TOPOLOGY_TYPE
463 \*****************************************************************************/
464 enum GFX3DPRIMITIVE_TOPOLOGY_TYPE
465 {
466     GFX3DPRIM_POINTLIST         = 0x01,
467     GFX3DPRIM_LINELIST          = 0x02,
468     GFX3DPRIM_LINESTRIP         = 0x03,
469     GFX3DPRIM_TRILIST           = 0x04,
470     GFX3DPRIM_TRISTRIP          = 0x05,
471     GFX3DPRIM_TRIFAN            = 0x06,
472     GFX3DPRIM_QUADLIST          = 0x07,
473     GFX3DPRIM_QUADSTRIP         = 0x08,
474     GFX3DPRIM_LINELIST_ADJ      = 0x09,
475     GFX3DPRIM_LINESTRIP_ADJ     = 0x0A,
476     GFX3DPRIM_TRILIST_ADJ       = 0x0B,
477     GFX3DPRIM_TRISTRIP_ADJ      = 0x0C,
478     GFX3DPRIM_TRISTRIP_REVERSE  = 0x0D,
479     GFX3DPRIM_POLYGON           = 0x0E,
480     GFX3DPRIM_RECTLIST          = 0x0F,
481     GFX3DPRIM_LINELOOP          = 0x10,
482     GFX3DPRIM_POINTLIST_BF      = 0x11,
483     GFX3DPRIM_LINESTRIP_CONT    = 0x12,
484     GFX3DPRIM_LINESTRIP_BF      = 0x13,
485     GFX3DPRIM_LINESTRIP_CONT_BF = 0x14,
486     GFX3DPRIM_TRIFAN_NOSTIPPLE  = 0x16,
487     GFX3DPRIM_PATCHLIST_1       = 0x20,
488     GFX3DPRIM_PATCHLIST_2       = 0x21,
489     GFX3DPRIM_PATCHLIST_3       = 0x22,
490     GFX3DPRIM_PATCHLIST_4       = 0x23,
491     GFX3DPRIM_PATCHLIST_5       = 0x24,
492     GFX3DPRIM_PATCHLIST_6       = 0x25,
493     GFX3DPRIM_PATCHLIST_7       = 0x26,
494     GFX3DPRIM_PATCHLIST_8       = 0x27,
495     GFX3DPRIM_PATCHLIST_9       = 0x28,
496     GFX3DPRIM_PATCHLIST_10      = 0x29,
497     GFX3DPRIM_PATCHLIST_11      = 0x2A,
498     GFX3DPRIM_PATCHLIST_12      = 0x2B,
499     GFX3DPRIM_PATCHLIST_13      = 0x2C,
500     GFX3DPRIM_PATCHLIST_14      = 0x2D,
501     GFX3DPRIM_PATCHLIST_15      = 0x2E,
502     GFX3DPRIM_PATCHLIST_16      = 0x2F,
503     GFX3DPRIM_PATCHLIST_17      = 0x30,
504     GFX3DPRIM_PATCHLIST_18      = 0x31,
505     GFX3DPRIM_PATCHLIST_19      = 0x32,
506     GFX3DPRIM_PATCHLIST_20      = 0x33,
507     GFX3DPRIM_PATCHLIST_21      = 0x34,
508     GFX3DPRIM_PATCHLIST_22      = 0x35,
509     GFX3DPRIM_PATCHLIST_23      = 0x36,
510     GFX3DPRIM_PATCHLIST_24      = 0x37,
511     GFX3DPRIM_PATCHLIST_25      = 0x38,
512     GFX3DPRIM_PATCHLIST_26      = 0x39,
513     GFX3DPRIM_PATCHLIST_27      = 0x3A,
514     GFX3DPRIM_PATCHLIST_28      = 0x3B,
515     GFX3DPRIM_PATCHLIST_29      = 0x3C,
516     GFX3DPRIM_PATCHLIST_30      = 0x3D,
517     GFX3DPRIM_PATCHLIST_31      = 0x3E,
518     GFX3DPRIM_PATCHLIST_32      = 0x3F
519 };
520 
521 /*****************************************************************************\
522 ENUM: GFX3DSTATE_PROGRAM_FLOW
523 \*****************************************************************************/
524 enum GFX3DSTATE_PROGRAM_FLOW
525 {
526     GFX3DSTATE_PROGRAM_FLOW_MULTIPLE  = 0x0,
527     GFX3DSTATE_PROGRAM_FLOW_SINGLE    = 0x1
528 };
529 
530 /*****************************************************************************\
531 ENUM: GFX3DSTATE_FLOATING_POINT_MODE
532 
533 Description:
534     Indicates the floating point mode to be used by the hardware when running
535     compiled kernel program.
536 \*****************************************************************************/
537 enum GFX3DSTATE_FLOATING_POINT_MODE
538 {
539     GFX3DSTATE_FLOATING_POINT_IEEE_754        = 0x0,
540     GFX3DSTATE_FLOATING_POINT_NON_IEEE_754    = 0x1
541 };
542 
543 
544 /*****************************************************************************\
545 ENUM: GFX3DSTATE_POSITIONXY_OFFSET
546 \*****************************************************************************/
547 enum GFX3DSTATE_POSITIONXY_OFFSET
548 {
549     GFX3DSTATE_POSITIONXY_OFFSET_NONE       = 0x0,
550     // Reserved                             = 0x1,
551     GFX3DSTATE_POSITIONXY_OFFSET_CENTROID   = 0x2,
552     GFX3DSTATE_POSITIONXY_OFFSET_SAMPLE     = 0x3
553 };
554 
555 /*****************************************************************************\
556 ENUM: GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE
557 \*****************************************************************************/
558 enum GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE
559 {
560     GFX3DSTATE_POSITIONZW_INTERPOLATION_PIXEL       = 0x0,
561     // Reserved                                     = 0x1,
562     GFX3DSTATE_POSITIONZW_INTERPOLATION_CENTROID    = 0x2,
563     GFX3DSTATE_POSITIONZW_INTERPOLATION_SAMPLE      = 0x3
564 };
565 
566 
567 //////////////////////////////////////////////////////////////////////////////
568 enum RENDERSTATE_FOG_FUNCTION
569 {
570     RENDERSTATE_FOG_VERTEX,
571     RENDERSTATE_FOG_PIXEL_EXP,
572     RENDERSTATE_FOG_PIXEL_EXP2,
573     RENDERSTATE_FOG_PIXEL_LINEAR,
574     NUM_RENDERSTATE_FOG_FUNCTIONS
575 };
576 
577 //////////////////////////////////////////////////////////////////////////////
578 enum RENDERSTATE_FOG_SOURCE
579 {
580     RENDERSTATE_FOG_SOURCE_Z,
581     RENDERSTATE_FOG_SOURCE_W,
582     RENDERSTATE_FOG_SOURCE_FOG_COORDINATE,
583     NUM_RENDERSTATE_FOG_SOURCES
584 };
585 
586 //////////////////////////////////////////////////////////////////////////////
587 enum RENDERSTATE_ALPHATEST_FORMAT
588 {
589     RENDERSTATE_ALPHATEST_FORMAT_UNORM8,
590     RENDERSTATE_ALPHATEST_FORMAT_FLOAT32,
591     NUM_RENDERSTATE_ALPHATEST_FORMATS
592 };
593 
594 /*****************************************************************************\
595 ENUM: MAPFILTER_TYPE
596 \*****************************************************************************/
597 enum MAPFILTER_TYPE
598 {
599     MAPFILTER_TYPE_POINT,
600     MAPFILTER_TYPE_LINEAR,
601     NUM_MAPFILTER_TYPES
602 };
603 
604 /*****************************************************************************\
605 ENUM: COMPARE_FUNCTION
606 \*****************************************************************************/
607 enum COMPARE_FUNCTION
608 {
609     COMPARE_FUNCTION_ALWAYS,
610     COMPARE_FUNCTION_NEVER,
611     COMPARE_FUNCTION_LESS,
612     COMPARE_FUNCTION_EQUAL,
613     COMPARE_FUNCTION_LEQUAL,
614     COMPARE_FUNCTION_GREATER,
615     COMPARE_FUNCTION_NOTEQUAL,
616     COMPARE_FUNCTION_GEQUAL,
617     NUM_COMPARE_FUNCTIONS
618 };
619 
620 /*****************************************************************************\
621 ENUM: STENCIL_PASS_OPERATION
622 \*****************************************************************************/
623 enum STENCIL_OPERATION
624 {
625     STENCIL_OP_STENCILOP_KEEP = 0x0,
626     STENCIL_OP_STENCILOP_ZERO = 0x1,
627     STENCIL_OP_STENCILOP_REPLACE = 0x2,
628     STENCIL_OP_STENCILOP_INCRSAT = 0x3,
629     STENCIL_OP_STENCILOP_DECRSAT = 0x4,
630     STENCIL_OP_STENCILOP_INCR = 0x5,
631     STENCIL_OP_STENCILOP_DECR = 0x6,
632     STENCIL_OP_STENCILOP_INVERT = 0x7
633 };
634 
635 /*****************************************************************************\
636 STRUCT: SSWStencilParams
637 \*****************************************************************************/
638 struct SSWStencilParams
639 {
640     unsigned char           CheckForSWStencil : 1;
641     unsigned char           CompileForSWStencil : 1;
642     COMPARE_FUNCTION        FrontFaceStencilFunc;
643     COMPARE_FUNCTION        BackFaceStencilFunc;
644     STENCIL_OPERATION       FrontFaceStencilPassOp;
645     STENCIL_OPERATION  BackFaceStencilPassOp;
646 
SSWStencilParamsSSWStencilParams647     SSWStencilParams()
648     {
649         CheckForSWStencil = 0;
650         CompileForSWStencil = 0;
651         FrontFaceStencilFunc = COMPARE_FUNCTION_ALWAYS;
652         BackFaceStencilFunc = COMPARE_FUNCTION_ALWAYS;
653         FrontFaceStencilPassOp = STENCIL_OP_STENCILOP_KEEP;
654         BackFaceStencilPassOp = STENCIL_OP_STENCILOP_KEEP;
655     }
656 };
657 
658 struct NOSParams
659 {
660     SSWStencilParams* pSWStencilParams;
661 
NOSParamsNOSParams662     NOSParams()
663     {
664         pSWStencilParams = 0;
665     }
666 };
667 
668 /*****************************************************************************\
669 ENUM: INPUT_COVERAGE_MASK_MODE
670 \*****************************************************************************/
671 enum INPUT_COVERAGE_MASK_MODE
672 {
673     INPUT_COVERAGE_MASK_MODE_NORMAL,
674     INPUT_COVERAGE_MASK_MODE_INNER,
675     INPUT_COVERAGE_MASK_MODE_DEPTH
676 };
677 
678 /*****************************************************************************\
679 ENUM: SYSTEM_THREAD_MODE
680 
681 Description:
682     Enum type bitmask describing the System Thread mode. The System Thread might
683     support shader debugging and/or the Context Save Restore (CSR) subroutine
684     called GPGPU preemption.
685 \*****************************************************************************/
686 typedef enum SYSTEM_THREAD_MODE_ENUM
687 {
688     SYSTEM_THREAD_MODE_NONE         = 0x0,
689     SYSTEM_THREAD_MODE_DEBUG        = 0x1,
690     SYSTEM_THREAD_MODE_CSR          = 0x2,
691     SYSTEM_THREAD_MODE_DEBUG_LOCAL  = 0x4
692 } SYSTEM_THREAD_MODE;
693 
694 /*****************************************************************************\
695 \*****************************************************************************/
696 struct SSystemThreadKernelOutput
697 {
698     void*                           m_pKernelProgram;           // Kernel Start Pointer
699     unsigned int                    m_KernelProgramSize;
700     unsigned int                    m_SystemThreadScratchSpace; // Scratch Space size in bytes
701     unsigned int                    m_SystemThreadResourceSize; // Resource size in bytes
702     void*                           m_pStateSaveAreaHeader;     // State save area header
703     unsigned int                    m_StateSaveAreaHeaderSize;  // State save aread header size in bytes
704 };
705 
706 /*****************************************************************************\
707 ENUM: USC_SHADER_COMPILER_CONTROLS
708 
709       IL level optimizations.
710 
711 \*****************************************************************************/
712 USC_PARAM()
713 enum USC_SHADER_COMPILER_CONTROLS
714 {
715     IndirectTemporaryRemovalEnable,
716     CallCndTranslationEnable,
717     LoopUnrollingEnable,
718     ILPatternMatchingEnable,
719     ConditionalExpressionSimplificationEnable,
720     TrivialSwitchRemovalEnable,
721     TrivialIfRemovalEnable,
722     EarlyEOTAfterDiscardEnable,
723     SwitchTranslationEnable,
724     EarlyReturnRemovalEnable,
725     InlineSubstitutionEnable,
726     CallToSubroutineCallEnable,
727     ConstantBufferToConstantRegisterEnable,
728     ConstantBufferToConstantRegisterLDRAWEnable,
729     ParseOpcodesToPrecModifierEnable,
730     IndexedTempGRFCachingEnable,
731     ILConstantFoldingEnable,
732     ILConstantFoldingAggressive,
733     PrintfExpansionEnable,
734     TranslateVendorExtensionsEnable,
735     RemoveDeadOutputEnable,
736     PerformImageSerializeEnable,
737     PointerLoadToIndexedLoad,
738 
739     // PatternMatch controls.
740     FMulFAddToFMad,
741     MovFCmpBranchToMovFCmp,
742     MovCndToMov,
743     LDStructuredScalarToVector,
744     FLogFExp2ScalarToVector,
745     MovFCmpToFMax,
746     IfDiscardToDiscardCND,
747     IfBranchFlattening,
748     ContinueCndRemoval,
749     FCmpGtToFMaxOrFCmpLtToFMin,
750     PreserveFunctionRetMovs,
751 
752     MaxLoopUnrollLength,
753     MaxConstantBufferPairs,
754     PartialUnrollFactor,
755     ProcessDynamicResourceIndexingEnable,
756     IfConversionLength,
757 
758     USC_PARAM_HIDE()
759     NUM_USC_SHADER_COMPILER_CONTROLS
760 };
761 
762 /*****************************************************************************\
763 ENUM: USC_KERNEL_COMPILER_CONTROLS
764 
765       LLIR/ISA level optimizations.
766 
767 \*****************************************************************************/
768 USC_PARAM()
769 enum USC_KERNEL_COMPILER_CONTROLS
770 {
771     ValueNumberingEnable,
772     BlockLoadDirectConstantsEnable,
773     BlockLoadIndirectConstantsEnable,
774     BlockLoadScatteredConstantsEnable,
775     OptimizeReplicateEnable,
776     Optimize64bitReplicateEnable,
777     ReorderInstructionsEnable,
778     ClusterSamplesEnable,
779     DeferredInterpolationEnable,
780     AtomicReorderInstructionsEnable,
781     CoalescingEnable,
782     CoalesceCopiesEnable,
783     CoalesceBitcastsEnable,
784     CoalesceSplitsEnable,
785     CoalesceJoinsEnable,
786     CoalesceMultiplePayloadsEnable,
787     CoalesceHeadersLastEnable,
788     OptimizeResourceLoadsEnable,
789     ISASchedulingEnable,
790     Reduce64To32ALUBitEnable,
791     Reduce32To8ALUBitEnable,
792     Reduce64To32ALUTopDownPassBitEnable,
793     Reduce64To32ALUBottomUpPassBitEnable,
794     Reduce64To32ALUSplitPassBitEnable,
795     MergeSplitJoinDpEnable,
796     FoldUnpacksEnable,
797     ConstantFoldingEnable,
798     LoopInvariantCodeMotionEnable,
799     InputMarkingEnable,
800     DispatchDetectionEnable,
801     SimdReductionEnable,
802     LocallyScalarSimdReductionEnable,
803     CPLoadBufferOptimizationEnable,
804     RoundRobinRegisterAllocationEnable,
805     PatternMatchReplaceEnable,
806     EuBypassEnable,
807     GRFBankAlignmentEnable,
808     OptimizeValuesNamespaceEnable,
809     UrbAtomicsEnable,
810     ScalarAtomicEnable,
811     ComputeToAccumulatorEnable,
812     OptimizeSimd8MovsEnable,
813     AlignedPointerDetectionEnable,
814     BlockLoadGloballyScalarPointerEnable,
815     ChannelPropagationEnable,
816     CoalesceLdEnable,
817     CoalesceLdThreadEnable,
818     CoalesceLdCrossLaneEnable,
819     CoalesceStoreEnable,
820     CutNonspillableLiveRangesEnable,
821     DecreaseGRFPressureIfSpilledEnable,
822     CodeSinkingEnable,
823     MovPropagationEnable,
824     CondModPropagationEnable,
825     ImmediatesToConstantBufferEnable,
826     ImmediatesToConstantBufferOptimizeALUEnable,
827     PointerALUOptimizationEnable,
828     KillAfterDiscardEnable,
829     RematerializationEnable,
830     RegionPreSchedulingEnable,
831     PruningEnable,
832     DeadBranchRemovalEnable,
833     NoSrcDepSetEnable,
834     ShaderHWInputPackingEnable,
835     ShaderDeclarationPackingEnable,
836     TPMPromotionEnable,
837     SSAAllocatorEnable,
838     SSAAllocator1BBOnly,
839     SSAAllocatorSIMD8Only,
840     GotoJoinOptEnable,
841     GotoAroundGotoMergeEnable,
842     StatefulCompilationEnable,
843     AtomicDstRemovalEnable,
844     MergeSimd8SamplerCBLoadsToSimd16Enable,
845     SoftwareFp16PayloadEnable,
846     SplitQuadTo32bitForALUEnable,
847     SIMD32DivergentLoopHeuristicEnable,
848     SIMD32SampleCountHeuristicEnable,
849     SIMD32ConcurrentValuesHeuristicEnable,
850     SIMD32ExtraHeuristicsEnable,
851 
852     // *******  Switches affecting floating point math optimizations ******
853     NoSignedZerosEnable,           // allow optimizations to disregard the sign of zero
854     FiniteMathOnlyEnable,          // assume floating point arguments and results are never Inf, NaN values
855     UnsafeMathOptimizationsEnable, // allow algebraically equivalent transformations which may not hold in IEEE 754 arithmetic, e.g. (x+y)-y -> x
856 
857     // Required controls.
858     TrackParallelInterferences,
859     ForceUnalignedPointerReadDetectionEnable,
860     ForceUnalignedPointerWriteDetectionEnable,
861     DivergentPointerEnable,
862     StructuralAnalysisEnable,
863 
864     // Numeric controls.
865     IndirectCBOptimizationMode,
866     ImmediatesToConstantBufferMinImmediates,
867     MaxNumOfMulInstructionsPerPowUnwind,
868     MulWeightOfSqrtInstructionInPowUnwind,
869     MulWeightOfInvInstructionInPowUnwind,
870 
871     // Decomposer controls.
872     DecomposeFDivToFRcpFMul,
873 
874     // LIR Pattern Match controls - they're all dependent from PatternMatchReplaceEnable.
875     // i.e. PatternMatchReplaceEnable set to 0 will disable them all.
876     PMRChannelMatchEnable,
877     PMRPowerMatchEnable,
878     PMREUBypassMatchEnable,
879     PMRComparisonMatchEnable,
880     PMRFlowControlMatchEnable,
881     PMRMultiplyMatchEnable,
882     PMRMulMadMatchEnable,
883     PMRSqrtMatchEnable,
884     PMRFDivMatchEnable,
885     PMRSelectMatchEnable,
886     PMRMinMaxMatchEnable,
887     PMRMulDivMatchEnable,
888     PMRFDP3MatchEnable,
889     PMRFDP4ToHMatchEnable,
890     PMRMov0FDPMatchEnable,
891     PMRMadMatchEnable,    // equal to old MadPatternMatchReplaceEnable
892     PMRBfeMatchEnable,
893     PMRLrpMatchEnable,
894     PMRTrivialLrpMatchEnable,
895     PMRMovLrpToAddMadMatchEnable,
896     PMRBfiMatchEnable,
897     PMRShrShlMatchEnable,
898     PMRAddShlMatchEnable,
899     PMRAddAddMatchEnable,
900     PMRAndShiftMatchEnable,
901     PMRJOIN_DPMatchEnable,
902     PMRConvert64bitTo32bit,
903     PMRGetValueFromActiveChannelMatchEnable,
904     PMRCselMatchEnable,
905     PMRReplicateComponentMatchEnable,
906     PMRFDPHMatchEnable,
907     PMRPackMatchEnable,
908     PMRFFRCMatchEnable,
909     PMRConstantPropagationMatchEnable,
910     PMRTrivialPOWMatchEnable,
911     PMRMulAddToMulMatchEnable,
912     PMRVecImmScalarMatchEnable,
913     PMRMovTwoLowPrecImmEnable,
914     PMRIntConvertToBitcastEnable,
915     PMRHoistBitcastsEnable,
916     PMRMediaBlockReadPackMatchEnable,
917     PMRAverageMatchEnable,
918     PMRPropIntConvMatchEnable,
919     PMRConvToMovMatchEnable,
920     PMRPropagateRedundantPackEnable,
921     PMRHoistSaturateEnable,
922     PMRMergeWordByteUnpacksEnable,
923     PMRMergeWordUnpacksEnable,
924     PMRPropLowPrecEnable,
925 
926     USC_PARAM_HIDE()
927     NUM_USC_KERNEL_COMPILER_CONTROLS
928 };
929 
930 } // namespace USC
931 
932 #endif // __cplusplus
933