1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "common/igc_regkeys.hpp"
12 #include "common/Types.hpp"
13 #include "inc/common/igfxfmid.h"
14 
15 /*
16 This provides hook to query whether a feature is supported by the runtime we are compiling for
17 This file has default value, then each adapter can overload any of the query to tell the backend
18 what it supports and what it doesn't. This also implements some workaround in case some API
19 or driver doesn't support something
20 */
21 
22 namespace IGC
23 {
24 
25     class CDriverInfo
26     {
27     public:
28         /// The driver implements the WA using constant buffer 2 for NOS constants instead of 0
implementPushConstantWA() const29         virtual bool implementPushConstantWA() const { return false; }
30 
31         /// Driver supports Simple Push Mechanism only.
SupportsSimplePushOnly() const32         virtual bool SupportsSimplePushOnly() const { return false; }
33 
34         /// Driver supports Gather Constant Mechanism only.
SupportsGatherConstantOnly() const35         virtual bool SupportsGatherConstantOnly() const { return false; }
36 
37         /// Driver supports resource streamer if HW supportes it, otherwise simple push
SupportsHWResourceStreameAndSimplePush() const38         virtual bool SupportsHWResourceStreameAndSimplePush() const { return false; }
39 
40         /// Driver supports dynamic uniform buffers.
SupportsDynamicUniformBuffers() const41         virtual bool SupportsDynamicUniformBuffers() const { return false; }
42 
43         /// Is any special metadata translation required
NeedsMetadataTranslation() const44         virtual bool NeedsMetadataTranslation() const { return false; }
45 
46         /// Do we need to break down the fmuladd
NeedsBreakdownMulAdd() const47         virtual bool NeedsBreakdownMulAdd() const { return false; }
48 
49         /// The driver supports using scratch space to store the private memory
supportsScratchSpacePrivateMemory() const50         virtual bool supportsScratchSpacePrivateMemory() const { return true; }
51 
52         /// The driver supports using stateless space to store the private memory
53         /// Driver must be able to use at least one way to store the private memory: either "scratch space" or "stateless space"
54         /// and by default, driver only supports one of them.
supportsStatelessSpacePrivateMemory() const55         virtual bool supportsStatelessSpacePrivateMemory() const { return !supportsScratchSpacePrivateMemory(); }
56 
57         /// The driver requires to align each entry (a workgroup item) of private scratch memory in a stateless
58         /// buffer.
requiresPowerOfTwoStatelessSpacePrivateMemorySize() const59         virtual bool requiresPowerOfTwoStatelessSpacePrivateMemorySize() const { return false; }
60 
61         /// The driver supports splitting up scratch memory space into two areas:
62         /// - private scratch memory space: non-promoted alloca instructions (early allocated scratch
63         ///   memory space based on llvm IR)
64         /// - spill/fill and Gtpin scratch memory space: (late allocated scratch memory space based
65         ///   registry allocation)
supportsSeparatingSpillAndPrivateScratchMemorySpace() const66         virtual bool supportsSeparatingSpillAndPrivateScratchMemorySpace() const { return IGC_IS_FLAG_ENABLED(SeparateSpillPvtScratchSpace); }
67 
68         /// The max size in bytes of the scratch space per thread.
maxPerThreadScratchSpace() const69         unsigned int maxPerThreadScratchSpace() const { return 2 * 1024 * 1024; }
70 
71         /// The driver Uses special states to push constants beyond index 256
Uses3DSTATE_DX9_CONSTANT() const72         virtual bool Uses3DSTATE_DX9_CONSTANT() const { return false; }
73 
74         /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
UsesTypedConstantBuffers3D() const75         virtual bool UsesTypedConstantBuffers3D() const { return true; }
76 
77         /// The driver uses typed constant buffers requiring byte address access.
UsesTypedConstantBuffersWithByteAddress() const78         virtual bool UsesTypedConstantBuffersWithByteAddress() const { return false; }
79 
80         /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
UsesTypedConstantBuffersGPGPU() const81         virtual bool UsesTypedConstantBuffersGPGPU() const { return true; }
82 
83         /// Overwrite UsesTypedConstantBuffers3D() and UsesTypedConstantBuffersGPGPU()
84         /// for bindless buffers only.
ForceUntypedBindlessConstantBuffers() const85         virtual bool ForceUntypedBindlessConstantBuffers() const { return false; }
86 
87         /// The driver uses sparse aliased residency
UsesSparseAliasedResidency() const88         virtual bool UsesSparseAliasedResidency() const { return false; }
89 
90         /// The driver doesn't clear the vertex header so it needs to be done in the compiler
NeedClearVertexHeader() const91         virtual bool NeedClearVertexHeader() const { return false; }
92 
93         /// Do Fastest Stage1 only for 3D
SupportFastestStage1() const94         virtual bool SupportFastestStage1() const { return true; }
95 
96         /// do code sinking before CFGSimplification, helps some workloads
CodeSinkingBeforeCFGSimplification() const97         virtual bool CodeSinkingBeforeCFGSimplification() const { return false; }
98 
99         /// allow executing constant buffer on the CPU
AllowGenUpdateCB(ShaderType shaderType) const100         virtual bool AllowGenUpdateCB(ShaderType shaderType) const { return false; }
101 
102         /// The driver implements single instance vertex dispatch feature
SupportsSingleInstanceVertexDispatch() const103         virtual bool SupportsSingleInstanceVertexDispatch() const { return false; }
104 
105         // Allow branch swapping for better Nan perf
BranchSwapping() const106         virtual bool BranchSwapping() const { return false; }
107 
108         /// Allow propagation up-converstion of half if it can generate better code
AllowUnsafeHalf() const109         virtual bool AllowUnsafeHalf() const { return true; }
110 
111         /// Allow send fusion (Some API have perf regressions, temp use to turn it off)
AllowSendFusion() const112         virtual bool AllowSendFusion() const { return true; }
113 
114         /// Supports more than 16 samplers
SupportMoreThan16Samplers() const115         virtual bool SupportMoreThan16Samplers() const { return false; }
116 
117         /// API supports IEEE min/max
SupportsIEEEMinMax() const118         virtual bool SupportsIEEEMinMax() const { return false; }
119 
NeedCountSROA() const120         virtual bool NeedCountSROA() const { return false; }
121 
122         /// Can we always contract mul and add
NeedCheckContractionAllowed() const123         virtual bool NeedCheckContractionAllowed() const { return false; }
124 
125         /// The API generates load/store of doubles which needs to be broken down
HasDoubleLoadStore() const126         virtual bool HasDoubleLoadStore() const { return false; }
127 
128         /// Needs emulation of 64bits instructions
NeedI64BitDivRem() const129         virtual bool NeedI64BitDivRem() const { return false; }
130 
131         /// Return true if IGC needs FP64 emulation. (Valid if platform has no double inst.)
NeedFP64(PRODUCT_FAMILY productFamily) const132         virtual bool NeedFP64(PRODUCT_FAMILY productFamily) const { return false; }
133 
134         /// Needs IEEE fp64 div/sqrt
NeedFP64DivSqrt() const135         virtual bool NeedFP64DivSqrt() const { return false; }
136 
137         /// Must support of f32 IEEE divide (also sqrt)
NeedIEEESPDiv() const138         virtual bool NeedIEEESPDiv() const { return false; }
139 
140         /// Has memcpy/memset intrinsic
HasMemoryIntrinsics() const141         virtual bool HasMemoryIntrinsics() const { return false; }
142 
143         /// Has load store not natively supported
HasNonNativeLoadStore() const144         virtual bool HasNonNativeLoadStore() const { return false; }
145 
146         /// Need lowering global inlined constant buffers
NeedLoweringInlinedConstants() const147         virtual bool NeedLoweringInlinedConstants() const { return false; }
148 
149         /// Turn on type demotion, not tested on all APIs
benefitFromTypeDemotion() const150         virtual bool benefitFromTypeDemotion() const { return false; }
151 
152         /// Turn on type rematerialization of flag register, not tested on all APIs
benefitFromPreRARematFlag() const153         virtual bool benefitFromPreRARematFlag() const { return false; }
154 
155         /// add extra optimization passes after AlwaysInlinerPass to support two phase inlining
NeedExtraPassesAfterAlwaysInlinerPass() const156         virtual bool NeedExtraPassesAfterAlwaysInlinerPass() const { return false; }
157 
158         /// Turn on vISA pre-RA scheduler. Not tested on all APIs
enableVISAPreRAScheduler() const159         virtual bool enableVISAPreRAScheduler() const { return false; }
160 
161         /// Turn on vISA pre-RA scheduler for retry
enableVISAPreRASchedulerForRetry() const162         virtual bool enableVISAPreRASchedulerForRetry() const { return false; }
163 
164         /// Configure vISA pre-RA scheduler. Not tested on all APIs
getVISAPreRASchedulerCtrl() const165         virtual unsigned getVISAPreRASchedulerCtrl() const { return 4; }
166 
167         /// Turn on sampler clustering. Hopefully VISA PreRA scheduler with latency hiding can replace it.
enableSampleClustering() const168         virtual bool enableSampleClustering() const { return true; }
169 
170         /// Make sure optimization are consistent to avoid Z-fighting issue
PreventZFighting() const171         virtual bool PreventZFighting() const { return false; }
172 
173         /// Force enabling SIMD32 in case we exepct latency problem. Helps some workloads
AlwaysEnableSimd32() const174         virtual bool AlwaysEnableSimd32() const { return false; }
175 
176         /// Driver supports promoting buffers to bindful
SupportsStatelessToStatefullBufferTransformation() const177         virtual bool SupportsStatelessToStatefullBufferTransformation() const { return false; }
178 
179         /// Need emulation of 64bits type for HW not supporting it natively
Enable64BitEmu() const180         virtual bool Enable64BitEmu() const { return false; }
181 
182         /// In some cases several BTI may alias
DisableDpSendReordering() const183         virtual bool DisableDpSendReordering() const { return false; }
184 
185         /// Driver uses HW alt math mode, this cause floating point operations to behave differently
UseALTMode() const186         virtual bool UseALTMode() const { return false; }
187 
188         /// Whether the driver supports blend to fill opt
SupportBlendToFillOpt() const189         virtual bool SupportBlendToFillOpt() const { return false; }
190 
191         /// Need to know if the driver can accept more than one SIMD mode for compute shaders
sendMultipleSIMDModes() const192         virtual bool sendMultipleSIMDModes() const { return false; }
193 
194         /// pick behavior whether we need to keep discarded helper pixels to calculate
195         /// gradient correctly for sampler or we need to force early out discarded pixels
KeepDiscardHelperPixels() const196         virtual bool KeepDiscardHelperPixels() const { return false; }
197 
198         // Choose to support parsing inlined asm instructions on specific platforms
SupportInlineAssembly() const199         virtual bool SupportInlineAssembly() const { return false; }
200 
201         /// support predicate add pattern match
SupportMatchPredAdd() const202         virtual bool SupportMatchPredAdd() const { return false; }
203 
204         /// Adjust adapter to adjust the loop unrolling threshold
GetLoopUnrollThreshold() const205         virtual unsigned int GetLoopUnrollThreshold() const
206         {
207             return 4000;
208         }
209 
210         // ----------------------------------------------------------------------
211         // Below are workaround for bugs in front end or IGC will be removed once
212         // the bugs are fixed
213 
214         /// Need workaround for A32 messages used along with A64
NeedWAToTransformA32MessagesToA64() const215         virtual bool NeedWAToTransformA32MessagesToA64() const { return false; }
216 
217         /// disable mad in Vertex shader to avoid ZFigthing issues
DisabeMatchMad() const218         virtual bool DisabeMatchMad() const { return false; }
219 
220         /// Some FE sends SLM pointers in DWORD units
WASLMPointersDwordUnit() const221         virtual bool WASLMPointersDwordUnit() const { return false; }
222 
223         /// Custom pass haven't been tested on all APIs
WADisableCustomPass() const224         virtual bool WADisableCustomPass() const { return false; }
225 
226         /// MemOpt2ForOCL pass not tested on all APIs
WAEnableMemOpt2ForOCL() const227         virtual bool WAEnableMemOpt2ForOCL() const { return false; }
228 
229         /// disable some optimizations for front end which sends IR with unresolved NOS function when optimizing
WaNOSNotResolved() const230         virtual bool WaNOSNotResolved() const { return false; }
231 
232         /// WA for APIs where frc generates a different precision than x - rndd(x) for small negative values
233         /// Needs to switch to use fast math flags
DisableMatchFrcPatternMatch() const234         virtual bool DisableMatchFrcPatternMatch() const { return false; }
235 
236         /// Based on the type of inlined sampler we get we program different output.
ProgrammableBorderColorInCompute() const237         virtual bool ProgrammableBorderColorInCompute() const { return false; }
238 
239         /// WA for failures with HS with push constants
WaDisablePushConstantsForHS() const240         virtual bool WaDisablePushConstantsForHS() const { return false; }
241 
242         /// Check if we have to worry about stack overflow while recursing in loop analysis
HasSmallStack() const243         virtual bool HasSmallStack() const { return false; }
244 
245         /// Check if the stateful token is supported
SupportStatefulToken() const246         virtual bool SupportStatefulToken() const { return false; }
247 
248         /// Disables dual patch dispatch for APIs that don't use it
APIDisableDSDualPatchDispatch() const249         virtual bool APIDisableDSDualPatchDispatch() const { return false; }
250 
251         /// WA to make sure scratch writes are globally observed before EOT
clearScratchWriteBeforeEOT() const252         virtual bool clearScratchWriteBeforeEOT() const { return false; }
253 
254         /// Should unaligned vectors be split before processing in EmitVISA
splitUnalignedVectors() const255         virtual bool splitUnalignedVectors() const { return true; }
256 
257         /// Does not emit an error if recursive functions calls are detected.
AllowRecursion() const258         virtual bool AllowRecursion() const { return false; }
259 
260         /// Restrict dessa aliasing level. -1 : no restriction; max level otherwise.
DessaAliasLevel() const261         virtual int DessaAliasLevel() const { return -1; }
262 
263         /// Rounding mode used for DP emulated function, defaults to Round to nearest
DPEmulationRoundingMode() const264         virtual unsigned DPEmulationRoundingMode() const { return 0; }
265 
266         /// Check for flushing denormals for DP emulated function
DPEmulationFlushDenorm() const267         virtual bool DPEmulationFlushDenorm() const { return false; }
268 
269         /// Check for flush to zero for DP emulated function
DPEmulationFlushToZero() const270         virtual bool DPEmulationFlushToZero() const { return false; }
271 
272         // Maximum id that can be used by simple push constant buffers. The default is maximum unsigned int (no restriction)
MaximumSimplePushBufferID() const273         virtual unsigned int MaximumSimplePushBufferID() const { return std::numeric_limits<unsigned int>::max(); }
274 
275         /// Enables the use of inline data on XeHP_SDV+
UseInlineData() const276         virtual bool UseInlineData() const { return false; }
277 
278         /// Use first VB to send vertex&base instance and second for draw index
UsesVertexBuffersToSendShaderDrawParameters() const279         virtual bool UsesVertexBuffersToSendShaderDrawParameters() const { return false; }
280 
281         /// Use indirect payload in CS
UsesIndirectPayload() const282         virtual bool UsesIndirectPayload() const { return true; }
283 
SupportsDispatchGPGPUWalkerAlongYFirst() const284         virtual bool SupportsDispatchGPGPUWalkerAlongYFirst() const { return true; }
285 
286         /// Check if integer mad is enabled
EnableIntegerMad() const287         virtual bool EnableIntegerMad() const { return false; }
288 
289         /// Respect per instruction 'contract' Fast-Math flag
RespectPerInstructionContractFlag() const290         virtual bool RespectPerInstructionContractFlag() const { return false; }
291 
292         /// add shader hash code after EOT for debug purposes
EnableShaderDebugHashCodeInKernel() const293         virtual bool EnableShaderDebugHashCodeInKernel() const { return false; }
294 
295         // The size of output printf buffer is 4 MB by default by agreement with Runtime.
getPrintfBufferSize() const296         virtual uint32_t getPrintfBufferSize() const
297         {
298             return 4 * sizeof(MEGABYTE);
299         }
300 
301         // Limits simple push constants based on pushed inputs
EnableSimplePushRestriction() const302         virtual bool EnableSimplePushRestriction() const { return false; }
303 
304         // Determines whether the PAYLOAD_HEADER implicit arg must be present
RequirePayloadHeader() const305         virtual bool RequirePayloadHeader() const { return true; }
306 
supportsAutoGRFSelection() const307         virtual bool supportsAutoGRFSelection() const { return autoGRFSelection; }
setAutoGRFSelection(bool value)308         virtual void setAutoGRFSelection(bool value) { autoGRFSelection = value; }
309 
310 
311 protected:
312     bool autoGRFSelection = false;
313     };
314 
315 }//namespace IGC
316