1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #ifndef STRUCTURE_ALINGMENT_VERIFICATION
10 #pragma once
11 #endif
12
13 #include "../../igfxfmid.h"
14 #include "usc_config.h"
15 #include "CppParserMacros.h"
16 #include "../../gtsysinfo.h"
17 #ifndef _USC_
18 #include "../../sku_wa.h"
19 #endif
20
21 // redefine simple types to avoid dependency on external headers
22 #if defined( _WIN32 )
23 typedef unsigned long DWORD;
24 typedef unsigned long ULONG, *PULONG;
25 #else
26 typedef unsigned int DWORD;
27 typedef unsigned int ULONG, *PULONG;
28 #endif
29
30 typedef unsigned short USHORT, *PUSHORT;
31 typedef unsigned short WORD, *PWORD;
32
33 // Note that this out of USC namespace part is used by .c files.
34
35 #ifdef __cplusplus
36 USC_PARAM()
37 typedef PLATFORM PLATFORM;
38 #endif // __cplusplus
39
40 // Slimmed version of the full GT_SYSTEM_INFO structure ( in inc/umKmInc/sharedata.h ).
41 USC_PARAM()
42 typedef struct _SUscGTSystemInfo
43 {
44 // Fields from GT_SYSTEM_INFO structure which contains actual,current number of EU and number of Threads.
45 unsigned int EUCount; // Total no. of enabled EUs.
46 unsigned int ThreadCount; // Total no. of system threads available.
47 unsigned int SliceCount; // Total no. of enabled slices
48 unsigned int SubSliceCount; // Total no. of enabled subslices.
49 unsigned int SLMSizeInKb; // SLM Size
50
51 bool IsDynamicallyPopulated; // System details populated either via fuse reg. (TRUE) or hard-coded (FALSE)
52
53 unsigned int TotalPsThreadsWindowerRange;
54 unsigned int TotalVsThreads;
55 unsigned int TotalVsThreads_Pocs;
56 unsigned int TotalGsThreads;
57 unsigned int TotalDsThreads;
58 unsigned int TotalHsThreads;
59 unsigned int MaxEuPerSubSlice;
60 unsigned int EuCountPerPoolMax;
61 unsigned int EuCountPerPoolMin;
62 unsigned int MaxSlicesSupported;
63 unsigned int MaxSubSlicesSupported;
64 unsigned int CsrSizeInMb;
65 } SUscGTSystemInfo;
66
67 // This slimmed version of the full sku feature table ( in sku_wa.h ).
68 USC_PARAM()
69 typedef struct _SUscSkuFeatureTable
70 {
71 //...//
72 // flags 1 = available, 0 = not available
73
74 // struct _sku_Core
75 unsigned int FtrDesktop : 1; // Whether Desktop
76 unsigned int FtrChannelSwizzlingXOREnabled : 1; // Indicates Channel Swizzling XOR feature support
77 //...//
78 unsigned int FtrGtBigDie : 1; // Indicate Big Die Silicon
79 unsigned int FtrGtMediumDie : 1; // Indicate Medium Die Silicon
80 unsigned int FtrGtSmallDie : 1; // Indicate Small Die Silicon
81 //...//
82 unsigned int FtrGT1 : 1; // Indicates GT1 part
83 unsigned int FtrGT1_5 : 1; // Indicates GT1.5 part
84 unsigned int FtrGT2 : 1; // Indicates GT2 part
85 unsigned int FtrGT3 : 1; // Indicates GT3 part
86 unsigned int FtrGT4 : 1; // Indicates GT4 part
87 //...//
88 unsigned int FtrIVBM0M1Platform : 1; // Indicates whether the platform in IVB M0/M1
89 unsigned int FtrGTL : 1; // Indicates GT Low-end performance part - New for HSW
90 unsigned int FtrGTM : 1; // Indicates GT Medium performance part - New for HSW
91 unsigned int FtrGTH : 1; // Indicates GT High-end performance part - New for HSW
92 unsigned int FtrSGTPVSKUStrapPresent : 1; // Switchable Graphics Present
93 unsigned int FtrGTA : 1; // Indicates the platform is a Gen9 based LCLP Broxton platform A
94 unsigned int FtrGTC : 1; // Indicates the platform is a Gen9 based LCLP Broxton platform C
95 unsigned int FtrGTX : 1; // Indicates the platform is a Gen9 based LCLP Broxton platform X
96 unsigned int Ftr5Slice : 1; // Indicates KBL 15x8 SKU
97 //...//
98 unsigned int FtrGpGpuMidThreadLevelPreempt : 1; // Indicates thread level batch Preemption
99 unsigned int FtrIoMmuPageFaulting : 1; // Indicates when PageFaultind is enabled
100 unsigned int FtrWddm2Svm : 1; // WDDMv2 SVM Model (Set in platform SKU files, but disabled by GMM as appropriate for given system.)
101 unsigned int FtrPooledEuEnabled : 1;
102
103 unsigned int FtrResourceStreamer : 1;
104 unsigned int FtrLocalMemory : 1;
105 } SUscSkuFeatureTable;
106
107 USC_PARAM()
108 typedef struct _SUscAilInfo
109 {
110 unsigned int EnableWaCheckResourceFormatForNFSRivals : 1; // Enables the WaCheckResourceFormatForNFSRivals w/a based on UMD AIL
111 unsigned int WaDisableUnsafeArithmeticOperationRefactoring : 1; // Holds the WaDisableUnsafeArithmeticOperationRefactoring w/a passed from UMD AIL
112 unsigned int WaTrigFuncRangeReduction : 1; // Compiler Workaround for affected games to do range reduction of trig functions
113 unsigned int WaHiddenIndexableTempSlot : 1; // Reserve extra space for indexable temp for out-of-bound access
114 } SUscAilInfo, SCompilerAilInfo;
115
116 USC_PARAM()
117 typedef struct _SUscAdapterInfo
118 {
119 SUscSkuFeatureTable UscSkuFeatureTable;
120 SUscGTSystemInfo UscGTSystemInfo;
121 SUscAilInfo UscAilInfo;
122 } SUscAdapterInfo;
123
124 #ifdef _USC_
125 #ifndef SKU_FEATURE_TABLE
126 #define SKU_FEATURE_TABLE SUscSkuFeatureTable
127 #endif
128 #endif
129
130 //Updated interface structure that will be used by DX9, DX10 and DX12
131 USC_PARAM()
132 typedef struct _SCompilerPlatformInfo
133 {
134 GT_SYSTEM_INFO sysInfo;
135 SCompilerAilInfo AilInfo;
136 SUscSkuFeatureTable uscSkuFeatureTable; // This slimmed version of the full sku feature table ( in sku_wa.h )
137 SKU_FEATURE_TABLE skuFeatureTable;
138 } SCompilerPlatformInfo;
139
140 #if defined ICBE_LHDM || defined ICBE_LINUX
141 #undef SKU_FEATURE_TABLE
142 #endif
143
144 #ifdef __cplusplus
145 namespace USC
146 {
147 typedef SCompilerPlatformInfo SUSCCompilerPlatformInfo;
148 /*****************************************************************************\
149
150 Function:
151 InitializeUscAdapterInfo
152
153 Description:
154 Initializes the USC (slim) adapter info structure by coping required fields
155 from the big sku table and gtSystemInfo structure.
156 This is a helper function for USC clients.
157
158 Input:
159 BigSkuTable bigSkuTable - sku feature table containing to initialize
160 USC sku feature table.
161
162 BigGTSystemInfo bigGTSystemInfo - GT_SYSTEM_INFO containing actuall data about
163 EU and Thread count.
164 Output:
165 SUscAdapterInfo &uscAdpaterInfo - USC adapter info structure correctly
166 initialized.
167
168 \*****************************************************************************/
169 template<typename BigSkuTable, typename BigGTSystemInfo>
InitializeUscAdapterInfo(const BigSkuTable & bigSkuTable,const BigGTSystemInfo & bigGTSystemInfo,SUscAdapterInfo & uscAdpaterInfo)170 inline void InitializeUscAdapterInfo(
171 const BigSkuTable &bigSkuTable,
172 const BigGTSystemInfo &bigGTSystemInfo,
173 SUscAdapterInfo &uscAdpaterInfo )
174 {
175 uscAdpaterInfo.UscSkuFeatureTable.FtrDesktop = bigSkuTable.FtrDesktop; // Whether Desktop
176
177 uscAdpaterInfo.UscSkuFeatureTable.FtrGtBigDie = bigSkuTable.FtrGtBigDie; // Indicates Big Die Silicon.
178 uscAdpaterInfo.UscSkuFeatureTable.FtrGtMediumDie = bigSkuTable.FtrGtMediumDie; // Indicates Medium Die Silicon.
179 uscAdpaterInfo.UscSkuFeatureTable.FtrGtSmallDie = bigSkuTable.FtrGtSmallDie; // Indicates Small Die Silicon.
180 uscAdpaterInfo.UscSkuFeatureTable.FtrGT1 = bigSkuTable.FtrGT1; // Indicates GT1 part.
181 uscAdpaterInfo.UscSkuFeatureTable.FtrGT1_5 = bigSkuTable.FtrGT1_5; // Indicates GT1.5 part.
182 uscAdpaterInfo.UscSkuFeatureTable.FtrGT2 = bigSkuTable.FtrGT2; // Indicates GT2 part.
183 uscAdpaterInfo.UscSkuFeatureTable.FtrGT3 = bigSkuTable.FtrGT3; // Indicates GT3 part.
184 uscAdpaterInfo.UscSkuFeatureTable.FtrGT4 = bigSkuTable.FtrGT4; // Indicates GT4 part.
185 uscAdpaterInfo.UscSkuFeatureTable.FtrGTL = bigSkuTable.FtrGT1; // Indicates GT Low-end performance part.
186 uscAdpaterInfo.UscSkuFeatureTable.FtrGTM = bigSkuTable.FtrGT2; // Indicates GT Medium performance part.
187 uscAdpaterInfo.UscSkuFeatureTable.FtrGTH = bigSkuTable.FtrGT3; // Indicates GT High-end performance part.
188 uscAdpaterInfo.UscSkuFeatureTable.FtrIVBM0M1Platform = bigSkuTable.FtrIVBM0M1Platform; // Indicates whether the platform in IVB M0/M1
189 uscAdpaterInfo.UscSkuFeatureTable.FtrGTA = bigSkuTable.FtrGTA; // Indicates a Gen9 based LCLP Broxton platform A.
190 uscAdpaterInfo.UscSkuFeatureTable.FtrGTC = bigSkuTable.FtrGTC; // Indicates a Gen9 based LCLP Broxton platform C.
191 uscAdpaterInfo.UscSkuFeatureTable.FtrGTX = bigSkuTable.FtrGTX; // Indicates a Gen9 based LCLP Broxton platform X.
192 uscAdpaterInfo.UscSkuFeatureTable.Ftr5Slice = bigSkuTable.Ftr5Slice; // Indicates KBL 15x8 SKU HALO Sku
193 uscAdpaterInfo.UscSkuFeatureTable.FtrGpGpuMidThreadLevelPreempt = bigSkuTable.FtrGpGpuMidThreadLevelPreempt; //Indicates if preEmption is enabled (HSW+)
194 uscAdpaterInfo.UscSkuFeatureTable.FtrIoMmuPageFaulting = bigSkuTable.FtrIoMmuPageFaulting; //Indicates if page faulting is enabled.
195 uscAdpaterInfo.UscSkuFeatureTable.FtrWddm2Svm = bigSkuTable.FtrWddm2Svm;
196 uscAdpaterInfo.UscSkuFeatureTable.FtrPooledEuEnabled = bigSkuTable.FtrPooledEuEnabled;
197 uscAdpaterInfo.UscSkuFeatureTable.FtrResourceStreamer = bigSkuTable.FtrResourceStreamer;
198 uscAdpaterInfo.UscSkuFeatureTable.FtrLocalMemory = bigSkuTable.FtrLocalMemory;
199
200 uscAdpaterInfo.UscGTSystemInfo.EUCount = bigGTSystemInfo.EUCount;
201 uscAdpaterInfo.UscGTSystemInfo.ThreadCount = bigGTSystemInfo.ThreadCount;
202 uscAdpaterInfo.UscGTSystemInfo.SliceCount = bigGTSystemInfo.SliceCount;
203 uscAdpaterInfo.UscGTSystemInfo.SubSliceCount = bigGTSystemInfo.SubSliceCount;
204 uscAdpaterInfo.UscGTSystemInfo.SLMSizeInKb = bigGTSystemInfo.SLMSizeInKb;
205 uscAdpaterInfo.UscGTSystemInfo.TotalPsThreadsWindowerRange = bigGTSystemInfo.TotalPsThreadsWindowerRange;
206 uscAdpaterInfo.UscGTSystemInfo.TotalVsThreads = bigGTSystemInfo.TotalVsThreads;
207 uscAdpaterInfo.UscGTSystemInfo.TotalVsThreads_Pocs = bigGTSystemInfo.TotalVsThreads_Pocs;
208 uscAdpaterInfo.UscGTSystemInfo.TotalDsThreads = bigGTSystemInfo.TotalDsThreads;
209 uscAdpaterInfo.UscGTSystemInfo.TotalGsThreads = bigGTSystemInfo.TotalGsThreads;
210 uscAdpaterInfo.UscGTSystemInfo.TotalHsThreads = bigGTSystemInfo.TotalHsThreads;
211 uscAdpaterInfo.UscGTSystemInfo.MaxEuPerSubSlice = bigGTSystemInfo.MaxEuPerSubSlice;
212 uscAdpaterInfo.UscGTSystemInfo.EuCountPerPoolMax = bigGTSystemInfo.EuCountPerPoolMax;
213 uscAdpaterInfo.UscGTSystemInfo.EuCountPerPoolMin = bigGTSystemInfo.EuCountPerPoolMin;
214 uscAdpaterInfo.UscGTSystemInfo.MaxSlicesSupported = bigGTSystemInfo.MaxSlicesSupported;
215 uscAdpaterInfo.UscGTSystemInfo.MaxSubSlicesSupported = bigGTSystemInfo.MaxSubSlicesSupported;
216 uscAdpaterInfo.UscGTSystemInfo.IsDynamicallyPopulated = bigGTSystemInfo.IsDynamicallyPopulated;
217 uscAdpaterInfo.UscGTSystemInfo.CsrSizeInMb = bigGTSystemInfo.CsrSizeInMb;
218 }
219
220 /*****************************************************************************\
221
222 Function:
223 InitializeUscSkuTable
224
225 Description:
226 Initializes the USC (slim) sku table by coping required fields from the big
227 sku table. This is a helper function for USC clients.
228
229 Input:
230 BigSkuTable bigSkuTable - sku feature table containing to initialize
231 USC sku feature table.
232
233 Output:
234 SUscSkuFeatureTable &uscSkuTable - USC sku feature table correctly
235 initialized.
236
237 \*****************************************************************************/
238 // Some API clients (OCL,DXVA) do not provide yet GT_SYSTEM_INFO structure.
239 // Shader Compiler object must be created with "old" way i.e. usage of fixed EU,Thread count values.
240 // The InitializeUscSkuTable method is called and uscAdpaterInfo.UscGTSystemInfo is zeroed.
241
242 // This method should be removed in the future.
243 template<typename BigSkuTable>
InitializeUscSkuTable(const BigSkuTable & bigSkuTable,SUscSkuFeatureTable & uscSkuTable)244 inline void InitializeUscSkuTable(
245 const BigSkuTable &bigSkuTable,
246 SUscSkuFeatureTable &uscSkuTable )
247 {
248 uscSkuTable.FtrDesktop = bigSkuTable.FtrDesktop; // Whether Desktop
249 uscSkuTable.FtrChannelSwizzlingXOREnabled = bigSkuTable.FtrChannelSwizzlingXOREnabled; // Indicates Channel Swizzling XOR feature support
250
251 uscSkuTable.FtrGtBigDie = bigSkuTable.FtrGtBigDie; // Indicates Big Die Silicon.
252 uscSkuTable.FtrGtMediumDie = bigSkuTable.FtrGtMediumDie; // Indicates Medium Die Silicon.
253 uscSkuTable.FtrGtSmallDie = bigSkuTable.FtrGtSmallDie; // Indicates Small Die Silicon.
254 uscSkuTable.FtrGT1 = bigSkuTable.FtrGT1; // Indicates GT1 part.
255 uscSkuTable.FtrGT1_5 = bigSkuTable.FtrGT1_5; // Indicates GT1.5 part.
256 uscSkuTable.FtrGT2 = bigSkuTable.FtrGT2; // Indicates GT2 part.
257 uscSkuTable.FtrGT3 = bigSkuTable.FtrGT3; // Indicates GT3 part.
258 uscSkuTable.FtrGT4 = bigSkuTable.FtrGT4; // Indicates GT4 part.
259 uscSkuTable.FtrGTL = bigSkuTable.FtrGT1; // Indicates GT Low-end performance part.
260 uscSkuTable.FtrGTM = bigSkuTable.FtrGT2; // Indicates GT Medium performance part.
261 uscSkuTable.FtrGTH = bigSkuTable.FtrGT3; // Indicates GT High-end performance part.
262 uscSkuTable.FtrIVBM0M1Platform = bigSkuTable.FtrIVBM0M1Platform; // Indicates whether the platform in IVB M0/M1
263 uscSkuTable.FtrGTA = bigSkuTable.FtrGTA; // Indicates a Gen9 based LCLP Broxton platform A.
264 uscSkuTable.FtrGTC = bigSkuTable.FtrGTC; // Indicates a Gen9 based LCLP Broxton platform C.
265 uscSkuTable.FtrGTX = bigSkuTable.FtrGTX; // Indicates a Gen9 based LCLP Broxton platform X.
266 uscSkuTable.Ftr5Slice = bigSkuTable.Ftr5Slice; // Indicates KBL 15x8 SKU HALO Sku
267 uscSkuTable.FtrGpGpuMidThreadLevelPreempt = bigSkuTable.FtrGpGpuMidThreadLevelPreempt; //Indicates if preEmption is enabled (HSW+)
268 uscSkuTable.FtrIoMmuPageFaulting = bigSkuTable.FtrIoMmuPageFaulting; //Indicates if page faulting is enabled.
269 uscSkuTable.FtrLocalMemory = bigSkuTable.FtrLocalMemory;
270 }
271
272
273 USC_PARAM()
274 enum OPTIMIZER_LEVEL
275 {
276 OPTIMIZER_LEVEL_0, // -o0 fast compilation
277 OPTIMIZER_LEVEL_1, // -o1 full compilation (default)
278 OPTIMIZER_LEVEL_2, // -o2 specialized compilation
279 USC_PARAM_HIDE()
280 NUM_OPTIMIZER_LEVELS
281 };
282
283 USC_PARAM()
284 enum SIMD_LEVEL
285 {
286 SIMD_LEVEL_DEFAULT, // request all SIMD compilations at once
287 SIMD_LEVEL_LOW, // request SIMD8 only
288 SIMD_LEVEL_HIGH, // request all higher SIMD modes
289 USC_PARAM_HIDE()
290 NUM_SIMD_LEVELS
291 };
292
293 enum SHADER_TYPE
294 {
295 VERTEX_SHADER,
296 GEOMETRY_SHADER,
297 PIXEL_SHADER,
298 HULL_SHADER,
299 DOMAIN_SHADER,
300 COMPUTE_SHADER,
301 NUM_SHADER_TYPES
302 };
303
304 enum SIMD_MODE
305 {
306 SIMD_MODE_8 = 0,
307 SIMD_MODE_16,
308 SIMD_MODE_32,
309 SIMD_MODE_4x2,
310 NUM_SIMD_MODES
311 };
312
313 enum PS_DISPATCH_TYPES
314 {
315 PS_SIMD8_DISPATCH,
316 PS_SIMD16_DISPATCH,
317 PS_SIMD32_DISPATCH,
318 NUM_PS_DISPATCH_TYPES
319 };
320
321 enum USC_CLIENT_TYPE
322 {
323 USC_CLIENT_D3D9,
324 USC_CLIENT_D3D10,
325 USC_CLIENT_D3D12,
326 USC_CLIENT_OGL,
327 USC_CLIENT_OCL,
328 NUM_USC_CLIENT_TYPES
329 };
330
331 struct SShaderStageBTLayout
332 {
333 // systemThreadIdx should be the same for all shader stages.
334 unsigned int systemThreadIdx;
335
336 unsigned int minConstantBufferIdx;
337 unsigned int maxConstantBufferIdx;
338 unsigned int streamOutmputStatisticsIdx;
339 unsigned int minStreamOutputBufferIdx;
340 unsigned int maxStreamOutputBufferIdx;
341 unsigned int minUAVIdx; // minRTorUAVIdx
342 unsigned int maxUAVIdx; // maxRTorUAVIdx
343 unsigned int minUAVCounterIdx; // pre-DEVHSW
344 unsigned int maxUAVCounterIdx; // pre-DEVHSW
345 unsigned int JournalIdx; // journal resource index used by Kernel Trace / Profiling query to measure kernel execution time
346 unsigned int JournalCounterIdx; // journal counter resource index
347 unsigned int TGSMIdx;
348 unsigned int minColorBufferIdx;
349 unsigned int maxColorBufferIdx;
350 unsigned int minResourceIdx;
351 unsigned int maxResourceIdx;
352 unsigned int NULLSurfaceIdx;
353 unsigned int RasterizerInfoSurfaceIdx; // Special SS for SampleInfo on rasterizer0 for OGL.
354 unsigned int TPMIdx;
355 unsigned int surfaceScratchIdx;
356 unsigned int maxBTsize;
357
358 // Three following fields are offsets from minConstantBufferIdx:
359 // NULL CB offset should be programmed right after the
360 // last constant buffer index. Such programming will allow USC
361 // to correctly clamp indexable CB indexes (when relative constant
362 // buffer addressing is used in a shader) and out of
363 // bounds reads will return 0. Incorrect programming of this
364 // field may cause out of bounds accesses not to return 0.
365 unsigned int constantBufferNullBoundOffset;
366 unsigned int immediateConstantBufferOffset;
367 unsigned int interfaceConstantBufferOffset;
368
369 // Following field is an offset from minResourceIdx:
370 // NULL resource offset should be programmed right after the
371 // last shader resource index. Such programming will allow USC
372 // to correctly clamp indexable resource indexes (when relative
373 // shader resource addressing is used in a shader )and out of
374 // bounds reads will return 0. Incorrect programming of this
375 // field may cause out of bounds accesses not to return 0.
376 unsigned int resourceNullBoundOffset;
377
378 // Passing this flags reroutes all BTI reads via Bindless heap from the shader
379 // for SKL Bindless for DX Testing
380 bool BindLessBTIEnable;
381
382 // Used to access the indirect draw arguments buffer. Used by
383 // geometry reordering optimization.
384 unsigned int indirectBufferOffset;
385 };
386
387 USC_PARAM()
388 struct SBindingTableLayout
389 {
390 SShaderStageBTLayout m_Layout[NUM_SHADER_TYPES];
391 };
392
393 // global const to start with when defining custom BTI layout.
394 const SBindingTableLayout g_cZeroBindingTableLayout = {};
395 const SShaderStageBTLayout g_cZeroShaderStageBTLayout = {};
396
397 /*****************************************************************************\
398 DEFINE: GTDI_MAX_KI_OFFSETS
399 \*****************************************************************************/
400 #define GTDI_MAX_KI_OFFSETS 26
401
402 /*****************************************************************************\
403 DEFINE: GTDI_MAX_KI_AGGREGATED_OFFSETS
404 \*****************************************************************************/
405 #define GTDI_MAX_KI_AGGREGATED_OFFSETS 20
406
407 /*****************************************************************************\
408 ENUM: GTDI_KI_BUILD_TYPE_ENUM
409 \*****************************************************************************/
410 typedef enum GTDI_KI_BUILD_TYPE_ENUM
411 {
412 GTDI_KERNEL_REGULAR = 0, //use that value to switch off kernel build override
413 GTDI_KERNEL_TRACE = 1,
414 GTDI_KERNEL_PROFILE = 2,
415 GTDI_KERNEL_ISA_PROFILE = 3,
416 GTDI_KERNEL_PROFILE_AGGREGATED = 4,
417 GTDI_KERNEL_ISA_PROFILE_AGGREGATED = 5,
418 GTDI_KERNEL_ISA_COUNTERS = 6,
419 GTDI_KERNEL_GT_PIN_COMPILER = 7,
420 GTDI_KERNEL_GPGPU_TRACE = 8,
421 GTDI_NUM_KERNEL_PROFILING_TYPES = 9
422 } GTDI_KI_BUILD_TYPE;
423
424 /*****************************************************************************\
425 ENUM: GTDI_PROFILING_POINT_TYPE_ENUM
426 \*****************************************************************************/
427 typedef enum GTDI_KERNEL_PROFILING_POINT_TYPE_ENUM
428 {
429 GTDI_PROFILING_POINT_TIMESTAMP_INTEL = 1 << 0,
430 GTDI_PROFILING_POINT_STALL_COUNTER_INTEL = 1 << 1,
431 GTDI_PROFILING_POINT_SAMPLER_MESSAGE_INTEL = 1 << 2,
432 GTDI_PROFILING_POINT_DATA_PORT_MESSAGE_INTEL = 1 << 3,
433 GTDI_PROFILING_POINT_WORKGROUP_ID_X_INTEL = 1 << 4,
434 GTDI_PROFILING_POINT_WORKGROUP_ID_Y_INTEL = 1 << 5,
435 GTDI_PROFILING_POINT_WORKGROUP_ID_Z_INTEL = 1 << 6,
436 GTDI_PROFILING_POINT_EXECUTION_MASK_INTEL = 1 << 7,
437 GTDI_PROFILING_POINT_HIT_COUNTER_INTEL = 1 << 8
438 } GTDI_KERNEL_PROFILING_POINT_TYPE;
439
440 /*****************************************************************************\
441 STRUCT: STracingOptions
442 \*****************************************************************************/
443 struct STracingOptions
444 {
445 unsigned int InstrumentationType;
446 unsigned int KernelID;
447 unsigned int OffsetCount;
448 unsigned int Offsets[GTDI_MAX_KI_OFFSETS];
449 unsigned int OffsetType[GTDI_MAX_KI_OFFSETS];
450 unsigned int UseEUThreadMasks;
451 unsigned int PartitionCount;
452 unsigned int EUMask;
453 unsigned int ThreadCount;
454 unsigned int CfgID;
455 unsigned int GatherGatewayTimestamp;
456 unsigned int ShaderILCodeSize;
457 void* ShaderILCode;
458 char* KernelName;
459 };
460
461 /*****************************************************************************\
462 ENUM: GFX3DPRIMITIVE_TOPOLOGY_TYPE
463 \*****************************************************************************/
464 enum GFX3DPRIMITIVE_TOPOLOGY_TYPE
465 {
466 GFX3DPRIM_POINTLIST = 0x01,
467 GFX3DPRIM_LINELIST = 0x02,
468 GFX3DPRIM_LINESTRIP = 0x03,
469 GFX3DPRIM_TRILIST = 0x04,
470 GFX3DPRIM_TRISTRIP = 0x05,
471 GFX3DPRIM_TRIFAN = 0x06,
472 GFX3DPRIM_QUADLIST = 0x07,
473 GFX3DPRIM_QUADSTRIP = 0x08,
474 GFX3DPRIM_LINELIST_ADJ = 0x09,
475 GFX3DPRIM_LINESTRIP_ADJ = 0x0A,
476 GFX3DPRIM_TRILIST_ADJ = 0x0B,
477 GFX3DPRIM_TRISTRIP_ADJ = 0x0C,
478 GFX3DPRIM_TRISTRIP_REVERSE = 0x0D,
479 GFX3DPRIM_POLYGON = 0x0E,
480 GFX3DPRIM_RECTLIST = 0x0F,
481 GFX3DPRIM_LINELOOP = 0x10,
482 GFX3DPRIM_POINTLIST_BF = 0x11,
483 GFX3DPRIM_LINESTRIP_CONT = 0x12,
484 GFX3DPRIM_LINESTRIP_BF = 0x13,
485 GFX3DPRIM_LINESTRIP_CONT_BF = 0x14,
486 GFX3DPRIM_TRIFAN_NOSTIPPLE = 0x16,
487 GFX3DPRIM_PATCHLIST_1 = 0x20,
488 GFX3DPRIM_PATCHLIST_2 = 0x21,
489 GFX3DPRIM_PATCHLIST_3 = 0x22,
490 GFX3DPRIM_PATCHLIST_4 = 0x23,
491 GFX3DPRIM_PATCHLIST_5 = 0x24,
492 GFX3DPRIM_PATCHLIST_6 = 0x25,
493 GFX3DPRIM_PATCHLIST_7 = 0x26,
494 GFX3DPRIM_PATCHLIST_8 = 0x27,
495 GFX3DPRIM_PATCHLIST_9 = 0x28,
496 GFX3DPRIM_PATCHLIST_10 = 0x29,
497 GFX3DPRIM_PATCHLIST_11 = 0x2A,
498 GFX3DPRIM_PATCHLIST_12 = 0x2B,
499 GFX3DPRIM_PATCHLIST_13 = 0x2C,
500 GFX3DPRIM_PATCHLIST_14 = 0x2D,
501 GFX3DPRIM_PATCHLIST_15 = 0x2E,
502 GFX3DPRIM_PATCHLIST_16 = 0x2F,
503 GFX3DPRIM_PATCHLIST_17 = 0x30,
504 GFX3DPRIM_PATCHLIST_18 = 0x31,
505 GFX3DPRIM_PATCHLIST_19 = 0x32,
506 GFX3DPRIM_PATCHLIST_20 = 0x33,
507 GFX3DPRIM_PATCHLIST_21 = 0x34,
508 GFX3DPRIM_PATCHLIST_22 = 0x35,
509 GFX3DPRIM_PATCHLIST_23 = 0x36,
510 GFX3DPRIM_PATCHLIST_24 = 0x37,
511 GFX3DPRIM_PATCHLIST_25 = 0x38,
512 GFX3DPRIM_PATCHLIST_26 = 0x39,
513 GFX3DPRIM_PATCHLIST_27 = 0x3A,
514 GFX3DPRIM_PATCHLIST_28 = 0x3B,
515 GFX3DPRIM_PATCHLIST_29 = 0x3C,
516 GFX3DPRIM_PATCHLIST_30 = 0x3D,
517 GFX3DPRIM_PATCHLIST_31 = 0x3E,
518 GFX3DPRIM_PATCHLIST_32 = 0x3F
519 };
520
521 /*****************************************************************************\
522 ENUM: GFX3DSTATE_PROGRAM_FLOW
523 \*****************************************************************************/
524 enum GFX3DSTATE_PROGRAM_FLOW
525 {
526 GFX3DSTATE_PROGRAM_FLOW_MULTIPLE = 0x0,
527 GFX3DSTATE_PROGRAM_FLOW_SINGLE = 0x1
528 };
529
530 /*****************************************************************************\
531 ENUM: GFX3DSTATE_FLOATING_POINT_MODE
532
533 Description:
534 Indicates the floating point mode to be used by the hardware when running
535 compiled kernel program.
536 \*****************************************************************************/
537 enum GFX3DSTATE_FLOATING_POINT_MODE
538 {
539 GFX3DSTATE_FLOATING_POINT_IEEE_754 = 0x0,
540 GFX3DSTATE_FLOATING_POINT_NON_IEEE_754 = 0x1
541 };
542
543
544 /*****************************************************************************\
545 ENUM: GFX3DSTATE_POSITIONXY_OFFSET
546 \*****************************************************************************/
547 enum GFX3DSTATE_POSITIONXY_OFFSET
548 {
549 GFX3DSTATE_POSITIONXY_OFFSET_NONE = 0x0,
550 // Reserved = 0x1,
551 GFX3DSTATE_POSITIONXY_OFFSET_CENTROID = 0x2,
552 GFX3DSTATE_POSITIONXY_OFFSET_SAMPLE = 0x3
553 };
554
555 /*****************************************************************************\
556 ENUM: GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE
557 \*****************************************************************************/
558 enum GFX3DSTATE_POSITIONZW_INTERPOLATION_MODE
559 {
560 GFX3DSTATE_POSITIONZW_INTERPOLATION_PIXEL = 0x0,
561 // Reserved = 0x1,
562 GFX3DSTATE_POSITIONZW_INTERPOLATION_CENTROID = 0x2,
563 GFX3DSTATE_POSITIONZW_INTERPOLATION_SAMPLE = 0x3
564 };
565
566
567 //////////////////////////////////////////////////////////////////////////////
568 enum RENDERSTATE_FOG_FUNCTION
569 {
570 RENDERSTATE_FOG_VERTEX,
571 RENDERSTATE_FOG_PIXEL_EXP,
572 RENDERSTATE_FOG_PIXEL_EXP2,
573 RENDERSTATE_FOG_PIXEL_LINEAR,
574 NUM_RENDERSTATE_FOG_FUNCTIONS
575 };
576
577 //////////////////////////////////////////////////////////////////////////////
578 enum RENDERSTATE_FOG_SOURCE
579 {
580 RENDERSTATE_FOG_SOURCE_Z,
581 RENDERSTATE_FOG_SOURCE_W,
582 RENDERSTATE_FOG_SOURCE_FOG_COORDINATE,
583 NUM_RENDERSTATE_FOG_SOURCES
584 };
585
586 //////////////////////////////////////////////////////////////////////////////
587 enum RENDERSTATE_ALPHATEST_FORMAT
588 {
589 RENDERSTATE_ALPHATEST_FORMAT_UNORM8,
590 RENDERSTATE_ALPHATEST_FORMAT_FLOAT32,
591 NUM_RENDERSTATE_ALPHATEST_FORMATS
592 };
593
594 /*****************************************************************************\
595 ENUM: MAPFILTER_TYPE
596 \*****************************************************************************/
597 enum MAPFILTER_TYPE
598 {
599 MAPFILTER_TYPE_POINT,
600 MAPFILTER_TYPE_LINEAR,
601 NUM_MAPFILTER_TYPES
602 };
603
604 /*****************************************************************************\
605 ENUM: COMPARE_FUNCTION
606 \*****************************************************************************/
607 enum COMPARE_FUNCTION
608 {
609 COMPARE_FUNCTION_ALWAYS,
610 COMPARE_FUNCTION_NEVER,
611 COMPARE_FUNCTION_LESS,
612 COMPARE_FUNCTION_EQUAL,
613 COMPARE_FUNCTION_LEQUAL,
614 COMPARE_FUNCTION_GREATER,
615 COMPARE_FUNCTION_NOTEQUAL,
616 COMPARE_FUNCTION_GEQUAL,
617 NUM_COMPARE_FUNCTIONS
618 };
619
620 /*****************************************************************************\
621 ENUM: STENCIL_PASS_OPERATION
622 \*****************************************************************************/
623 enum STENCIL_OPERATION
624 {
625 STENCIL_OP_STENCILOP_KEEP = 0x0,
626 STENCIL_OP_STENCILOP_ZERO = 0x1,
627 STENCIL_OP_STENCILOP_REPLACE = 0x2,
628 STENCIL_OP_STENCILOP_INCRSAT = 0x3,
629 STENCIL_OP_STENCILOP_DECRSAT = 0x4,
630 STENCIL_OP_STENCILOP_INCR = 0x5,
631 STENCIL_OP_STENCILOP_DECR = 0x6,
632 STENCIL_OP_STENCILOP_INVERT = 0x7
633 };
634
635 /*****************************************************************************\
636 STRUCT: SSWStencilParams
637 \*****************************************************************************/
638 struct SSWStencilParams
639 {
640 unsigned char CheckForSWStencil : 1;
641 unsigned char CompileForSWStencil : 1;
642 COMPARE_FUNCTION FrontFaceStencilFunc;
643 COMPARE_FUNCTION BackFaceStencilFunc;
644 STENCIL_OPERATION FrontFaceStencilPassOp;
645 STENCIL_OPERATION BackFaceStencilPassOp;
646
SSWStencilParamsSSWStencilParams647 SSWStencilParams()
648 {
649 CheckForSWStencil = 0;
650 CompileForSWStencil = 0;
651 FrontFaceStencilFunc = COMPARE_FUNCTION_ALWAYS;
652 BackFaceStencilFunc = COMPARE_FUNCTION_ALWAYS;
653 FrontFaceStencilPassOp = STENCIL_OP_STENCILOP_KEEP;
654 BackFaceStencilPassOp = STENCIL_OP_STENCILOP_KEEP;
655 }
656 };
657
658 struct NOSParams
659 {
660 SSWStencilParams* pSWStencilParams;
661
NOSParamsNOSParams662 NOSParams()
663 {
664 pSWStencilParams = 0;
665 }
666 };
667
668 /*****************************************************************************\
669 ENUM: INPUT_COVERAGE_MASK_MODE
670 \*****************************************************************************/
671 enum INPUT_COVERAGE_MASK_MODE
672 {
673 INPUT_COVERAGE_MASK_MODE_NORMAL,
674 INPUT_COVERAGE_MASK_MODE_INNER,
675 INPUT_COVERAGE_MASK_MODE_DEPTH
676 };
677
678 /*****************************************************************************\
679 ENUM: SYSTEM_THREAD_MODE
680
681 Description:
682 Enum type bitmask describing the System Thread mode. The System Thread might
683 support shader debugging and/or the Context Save Restore (CSR) subroutine
684 called GPGPU preemption.
685 \*****************************************************************************/
686 typedef enum SYSTEM_THREAD_MODE_ENUM
687 {
688 SYSTEM_THREAD_MODE_NONE = 0x0,
689 SYSTEM_THREAD_MODE_DEBUG = 0x1,
690 SYSTEM_THREAD_MODE_CSR = 0x2,
691 SYSTEM_THREAD_MODE_DEBUG_LOCAL = 0x4
692 } SYSTEM_THREAD_MODE;
693
694 /*****************************************************************************\
695 \*****************************************************************************/
696 struct SSystemThreadKernelOutput
697 {
698 void* m_pKernelProgram; // Kernel Start Pointer
699 unsigned int m_KernelProgramSize;
700 unsigned int m_SystemThreadScratchSpace; // Scratch Space size in bytes
701 unsigned int m_SystemThreadResourceSize; // Resource size in bytes
702 void* m_pStateSaveAreaHeader; // State save area header
703 unsigned int m_StateSaveAreaHeaderSize; // State save aread header size in bytes
704 };
705
706 /*****************************************************************************\
707 ENUM: USC_SHADER_COMPILER_CONTROLS
708
709 IL level optimizations.
710
711 \*****************************************************************************/
712 USC_PARAM()
713 enum USC_SHADER_COMPILER_CONTROLS
714 {
715 IndirectTemporaryRemovalEnable,
716 CallCndTranslationEnable,
717 LoopUnrollingEnable,
718 ILPatternMatchingEnable,
719 ConditionalExpressionSimplificationEnable,
720 TrivialSwitchRemovalEnable,
721 TrivialIfRemovalEnable,
722 EarlyEOTAfterDiscardEnable,
723 SwitchTranslationEnable,
724 EarlyReturnRemovalEnable,
725 InlineSubstitutionEnable,
726 CallToSubroutineCallEnable,
727 ConstantBufferToConstantRegisterEnable,
728 ConstantBufferToConstantRegisterLDRAWEnable,
729 ParseOpcodesToPrecModifierEnable,
730 IndexedTempGRFCachingEnable,
731 ILConstantFoldingEnable,
732 ILConstantFoldingAggressive,
733 PrintfExpansionEnable,
734 TranslateVendorExtensionsEnable,
735 RemoveDeadOutputEnable,
736 PerformImageSerializeEnable,
737 PointerLoadToIndexedLoad,
738
739 // PatternMatch controls.
740 FMulFAddToFMad,
741 MovFCmpBranchToMovFCmp,
742 MovCndToMov,
743 LDStructuredScalarToVector,
744 FLogFExp2ScalarToVector,
745 MovFCmpToFMax,
746 IfDiscardToDiscardCND,
747 IfBranchFlattening,
748 ContinueCndRemoval,
749 FCmpGtToFMaxOrFCmpLtToFMin,
750 PreserveFunctionRetMovs,
751
752 MaxLoopUnrollLength,
753 MaxConstantBufferPairs,
754 PartialUnrollFactor,
755 ProcessDynamicResourceIndexingEnable,
756 IfConversionLength,
757
758 USC_PARAM_HIDE()
759 NUM_USC_SHADER_COMPILER_CONTROLS
760 };
761
762 /*****************************************************************************\
763 ENUM: USC_KERNEL_COMPILER_CONTROLS
764
765 LLIR/ISA level optimizations.
766
767 \*****************************************************************************/
768 USC_PARAM()
769 enum USC_KERNEL_COMPILER_CONTROLS
770 {
771 ValueNumberingEnable,
772 BlockLoadDirectConstantsEnable,
773 BlockLoadIndirectConstantsEnable,
774 BlockLoadScatteredConstantsEnable,
775 OptimizeReplicateEnable,
776 Optimize64bitReplicateEnable,
777 ReorderInstructionsEnable,
778 ClusterSamplesEnable,
779 DeferredInterpolationEnable,
780 AtomicReorderInstructionsEnable,
781 CoalescingEnable,
782 CoalesceCopiesEnable,
783 CoalesceBitcastsEnable,
784 CoalesceSplitsEnable,
785 CoalesceJoinsEnable,
786 CoalesceMultiplePayloadsEnable,
787 CoalesceHeadersLastEnable,
788 OptimizeResourceLoadsEnable,
789 ISASchedulingEnable,
790 Reduce64To32ALUBitEnable,
791 Reduce32To8ALUBitEnable,
792 Reduce64To32ALUTopDownPassBitEnable,
793 Reduce64To32ALUBottomUpPassBitEnable,
794 Reduce64To32ALUSplitPassBitEnable,
795 MergeSplitJoinDpEnable,
796 FoldUnpacksEnable,
797 ConstantFoldingEnable,
798 LoopInvariantCodeMotionEnable,
799 InputMarkingEnable,
800 DispatchDetectionEnable,
801 SimdReductionEnable,
802 LocallyScalarSimdReductionEnable,
803 CPLoadBufferOptimizationEnable,
804 RoundRobinRegisterAllocationEnable,
805 PatternMatchReplaceEnable,
806 EuBypassEnable,
807 GRFBankAlignmentEnable,
808 OptimizeValuesNamespaceEnable,
809 UrbAtomicsEnable,
810 ScalarAtomicEnable,
811 ComputeToAccumulatorEnable,
812 OptimizeSimd8MovsEnable,
813 AlignedPointerDetectionEnable,
814 BlockLoadGloballyScalarPointerEnable,
815 ChannelPropagationEnable,
816 CoalesceLdEnable,
817 CoalesceLdThreadEnable,
818 CoalesceLdCrossLaneEnable,
819 CoalesceStoreEnable,
820 CutNonspillableLiveRangesEnable,
821 DecreaseGRFPressureIfSpilledEnable,
822 CodeSinkingEnable,
823 MovPropagationEnable,
824 CondModPropagationEnable,
825 ImmediatesToConstantBufferEnable,
826 ImmediatesToConstantBufferOptimizeALUEnable,
827 PointerALUOptimizationEnable,
828 KillAfterDiscardEnable,
829 RematerializationEnable,
830 RegionPreSchedulingEnable,
831 PruningEnable,
832 DeadBranchRemovalEnable,
833 NoSrcDepSetEnable,
834 ShaderHWInputPackingEnable,
835 ShaderDeclarationPackingEnable,
836 TPMPromotionEnable,
837 SSAAllocatorEnable,
838 SSAAllocator1BBOnly,
839 SSAAllocatorSIMD8Only,
840 GotoJoinOptEnable,
841 GotoAroundGotoMergeEnable,
842 StatefulCompilationEnable,
843 AtomicDstRemovalEnable,
844 MergeSimd8SamplerCBLoadsToSimd16Enable,
845 SoftwareFp16PayloadEnable,
846 SplitQuadTo32bitForALUEnable,
847 SIMD32DivergentLoopHeuristicEnable,
848 SIMD32SampleCountHeuristicEnable,
849 SIMD32ConcurrentValuesHeuristicEnable,
850 SIMD32ExtraHeuristicsEnable,
851
852 // ******* Switches affecting floating point math optimizations ******
853 NoSignedZerosEnable, // allow optimizations to disregard the sign of zero
854 FiniteMathOnlyEnable, // assume floating point arguments and results are never Inf, NaN values
855 UnsafeMathOptimizationsEnable, // allow algebraically equivalent transformations which may not hold in IEEE 754 arithmetic, e.g. (x+y)-y -> x
856
857 // Required controls.
858 TrackParallelInterferences,
859 ForceUnalignedPointerReadDetectionEnable,
860 ForceUnalignedPointerWriteDetectionEnable,
861 DivergentPointerEnable,
862 StructuralAnalysisEnable,
863
864 // Numeric controls.
865 IndirectCBOptimizationMode,
866 ImmediatesToConstantBufferMinImmediates,
867 MaxNumOfMulInstructionsPerPowUnwind,
868 MulWeightOfSqrtInstructionInPowUnwind,
869 MulWeightOfInvInstructionInPowUnwind,
870
871 // Decomposer controls.
872 DecomposeFDivToFRcpFMul,
873
874 // LIR Pattern Match controls - they're all dependent from PatternMatchReplaceEnable.
875 // i.e. PatternMatchReplaceEnable set to 0 will disable them all.
876 PMRChannelMatchEnable,
877 PMRPowerMatchEnable,
878 PMREUBypassMatchEnable,
879 PMRComparisonMatchEnable,
880 PMRFlowControlMatchEnable,
881 PMRMultiplyMatchEnable,
882 PMRMulMadMatchEnable,
883 PMRSqrtMatchEnable,
884 PMRFDivMatchEnable,
885 PMRSelectMatchEnable,
886 PMRMinMaxMatchEnable,
887 PMRMulDivMatchEnable,
888 PMRFDP3MatchEnable,
889 PMRFDP4ToHMatchEnable,
890 PMRMov0FDPMatchEnable,
891 PMRMadMatchEnable, // equal to old MadPatternMatchReplaceEnable
892 PMRBfeMatchEnable,
893 PMRLrpMatchEnable,
894 PMRTrivialLrpMatchEnable,
895 PMRMovLrpToAddMadMatchEnable,
896 PMRBfiMatchEnable,
897 PMRShrShlMatchEnable,
898 PMRAddShlMatchEnable,
899 PMRAddAddMatchEnable,
900 PMRAndShiftMatchEnable,
901 PMRJOIN_DPMatchEnable,
902 PMRConvert64bitTo32bit,
903 PMRGetValueFromActiveChannelMatchEnable,
904 PMRCselMatchEnable,
905 PMRReplicateComponentMatchEnable,
906 PMRFDPHMatchEnable,
907 PMRPackMatchEnable,
908 PMRFFRCMatchEnable,
909 PMRConstantPropagationMatchEnable,
910 PMRTrivialPOWMatchEnable,
911 PMRMulAddToMulMatchEnable,
912 PMRVecImmScalarMatchEnable,
913 PMRMovTwoLowPrecImmEnable,
914 PMRIntConvertToBitcastEnable,
915 PMRHoistBitcastsEnable,
916 PMRMediaBlockReadPackMatchEnable,
917 PMRAverageMatchEnable,
918 PMRPropIntConvMatchEnable,
919 PMRConvToMovMatchEnable,
920 PMRPropagateRedundantPackEnable,
921 PMRHoistSaturateEnable,
922 PMRMergeWordByteUnpacksEnable,
923 PMRMergeWordUnpacksEnable,
924 PMRPropLowPrecEnable,
925
926 USC_PARAM_HIDE()
927 NUM_USC_KERNEL_COMPILER_CONTROLS
928 };
929
930 } // namespace USC
931
932 #endif // __cplusplus
933