1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "common/igc_regkeys.hpp" 12 #include "common/Types.hpp" 13 #include "inc/common/igfxfmid.h" 14 15 /* 16 This provides hook to query whether a feature is supported by the runtime we are compiling for 17 This file has default value, then each adapter can overload any of the query to tell the backend 18 what it supports and what it doesn't. This also implements some workaround in case some API 19 or driver doesn't support something 20 */ 21 22 namespace IGC 23 { 24 25 class CDriverInfo 26 { 27 public: 28 /// The driver implements the WA using constant buffer 2 for NOS constants instead of 0 implementPushConstantWA() const29 virtual bool implementPushConstantWA() const { return false; } 30 31 /// Driver supports Simple Push Mechanism only. SupportsSimplePushOnly() const32 virtual bool SupportsSimplePushOnly() const { return false; } 33 34 /// Driver supports Gather Constant Mechanism only. SupportsGatherConstantOnly() const35 virtual bool SupportsGatherConstantOnly() const { return false; } 36 37 /// Driver supports resource streamer if HW supportes it, otherwise simple push SupportsHWResourceStreameAndSimplePush() const38 virtual bool SupportsHWResourceStreameAndSimplePush() const { return false; } 39 40 /// Driver supports dynamic uniform buffers. SupportsDynamicUniformBuffers() const41 virtual bool SupportsDynamicUniformBuffers() const { return false; } 42 43 /// Is any special metadata translation required NeedsMetadataTranslation() const44 virtual bool NeedsMetadataTranslation() const { return false; } 45 46 /// Do we need to break down the fmuladd NeedsBreakdownMulAdd() const47 virtual bool NeedsBreakdownMulAdd() const { return false; } 48 49 /// The driver supports using scratch space to store the private memory supportsScratchSpacePrivateMemory() const50 virtual bool supportsScratchSpacePrivateMemory() const { return true; } 51 52 /// The driver supports using stateless space to store the private memory 53 /// Driver must be able to use at least one way to store the private memory: either "scratch space" or "stateless space" 54 /// and by default, driver only supports one of them. supportsStatelessSpacePrivateMemory() const55 virtual bool supportsStatelessSpacePrivateMemory() const { return !supportsScratchSpacePrivateMemory(); } 56 57 /// The driver requires to align each entry (a workgroup item) of private scratch memory in a stateless 58 /// buffer. requiresPowerOfTwoStatelessSpacePrivateMemorySize() const59 virtual bool requiresPowerOfTwoStatelessSpacePrivateMemorySize() const { return false; } 60 61 /// The driver supports splitting up scratch memory space into two areas: 62 /// - private scratch memory space: non-promoted alloca instructions (early allocated scratch 63 /// memory space based on llvm IR) 64 /// - spill/fill and Gtpin scratch memory space: (late allocated scratch memory space based 65 /// registry allocation) supportsSeparatingSpillAndPrivateScratchMemorySpace() const66 virtual bool supportsSeparatingSpillAndPrivateScratchMemorySpace() const { return IGC_IS_FLAG_ENABLED(SeparateSpillPvtScratchSpace); } 67 68 /// The max size in bytes of the scratch space per thread. maxPerThreadScratchSpace() const69 unsigned int maxPerThreadScratchSpace() const { return 2 * 1024 * 1024; } 70 71 /// The driver Uses special states to push constants beyond index 256 Uses3DSTATE_DX9_CONSTANT() const72 virtual bool Uses3DSTATE_DX9_CONSTANT() const { return false; } 73 74 /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler) UsesTypedConstantBuffers3D() const75 virtual bool UsesTypedConstantBuffers3D() const { return true; } 76 77 /// The driver uses typed constant buffers requiring byte address access. UsesTypedConstantBuffersWithByteAddress() const78 virtual bool UsesTypedConstantBuffersWithByteAddress() const { return false; } 79 80 /// The driver uses typed or untyped constant buffers (for ld_raw vs sampler) UsesTypedConstantBuffersGPGPU() const81 virtual bool UsesTypedConstantBuffersGPGPU() const { return true; } 82 83 /// Overwrite UsesTypedConstantBuffers3D() and UsesTypedConstantBuffersGPGPU() 84 /// for bindless buffers only. ForceUntypedBindlessConstantBuffers() const85 virtual bool ForceUntypedBindlessConstantBuffers() const { return false; } 86 87 /// The driver uses sparse aliased residency UsesSparseAliasedResidency() const88 virtual bool UsesSparseAliasedResidency() const { return false; } 89 90 /// The driver doesn't clear the vertex header so it needs to be done in the compiler NeedClearVertexHeader() const91 virtual bool NeedClearVertexHeader() const { return false; } 92 93 /// Do Fastest Stage1 only for 3D SupportFastestStage1() const94 virtual bool SupportFastestStage1() const { return true; } 95 96 /// do code sinking before CFGSimplification, helps some workloads CodeSinkingBeforeCFGSimplification() const97 virtual bool CodeSinkingBeforeCFGSimplification() const { return false; } 98 99 /// allow executing constant buffer on the CPU AllowGenUpdateCB(ShaderType shaderType) const100 virtual bool AllowGenUpdateCB(ShaderType shaderType) const { return false; } 101 102 /// The driver implements single instance vertex dispatch feature SupportsSingleInstanceVertexDispatch() const103 virtual bool SupportsSingleInstanceVertexDispatch() const { return false; } 104 105 // Allow branch swapping for better Nan perf BranchSwapping() const106 virtual bool BranchSwapping() const { return false; } 107 108 /// Allow propagation up-converstion of half if it can generate better code AllowUnsafeHalf() const109 virtual bool AllowUnsafeHalf() const { return true; } 110 111 /// Allow send fusion (Some API have perf regressions, temp use to turn it off) AllowSendFusion() const112 virtual bool AllowSendFusion() const { return true; } 113 114 /// Supports more than 16 samplers SupportMoreThan16Samplers() const115 virtual bool SupportMoreThan16Samplers() const { return false; } 116 117 /// API supports IEEE min/max SupportsIEEEMinMax() const118 virtual bool SupportsIEEEMinMax() const { return false; } 119 NeedCountSROA() const120 virtual bool NeedCountSROA() const { return false; } 121 122 /// Can we always contract mul and add NeedCheckContractionAllowed() const123 virtual bool NeedCheckContractionAllowed() const { return false; } 124 125 /// The API generates load/store of doubles which needs to be broken down HasDoubleLoadStore() const126 virtual bool HasDoubleLoadStore() const { return false; } 127 128 /// Needs emulation of 64bits instructions NeedI64BitDivRem() const129 virtual bool NeedI64BitDivRem() const { return false; } 130 131 /// Return true if IGC needs FP64 emulation. (Valid if platform has no double inst.) NeedFP64(PRODUCT_FAMILY productFamily) const132 virtual bool NeedFP64(PRODUCT_FAMILY productFamily) const { return false; } 133 134 /// Needs IEEE fp64 div/sqrt NeedFP64DivSqrt() const135 virtual bool NeedFP64DivSqrt() const { return false; } 136 137 /// Must support of f32 IEEE divide (also sqrt) NeedIEEESPDiv() const138 virtual bool NeedIEEESPDiv() const { return false; } 139 140 /// Has memcpy/memset intrinsic HasMemoryIntrinsics() const141 virtual bool HasMemoryIntrinsics() const { return false; } 142 143 /// Has load store not natively supported HasNonNativeLoadStore() const144 virtual bool HasNonNativeLoadStore() const { return false; } 145 146 /// Need lowering global inlined constant buffers NeedLoweringInlinedConstants() const147 virtual bool NeedLoweringInlinedConstants() const { return false; } 148 149 /// Turn on type demotion, not tested on all APIs benefitFromTypeDemotion() const150 virtual bool benefitFromTypeDemotion() const { return false; } 151 152 /// Turn on type rematerialization of flag register, not tested on all APIs benefitFromPreRARematFlag() const153 virtual bool benefitFromPreRARematFlag() const { return false; } 154 155 /// add extra optimization passes after AlwaysInlinerPass to support two phase inlining NeedExtraPassesAfterAlwaysInlinerPass() const156 virtual bool NeedExtraPassesAfterAlwaysInlinerPass() const { return false; } 157 158 /// Turn on vISA pre-RA scheduler. Not tested on all APIs enableVISAPreRAScheduler() const159 virtual bool enableVISAPreRAScheduler() const { return false; } 160 161 /// Turn on vISA pre-RA scheduler for retry enableVISAPreRASchedulerForRetry() const162 virtual bool enableVISAPreRASchedulerForRetry() const { return false; } 163 164 /// Configure vISA pre-RA scheduler. Not tested on all APIs getVISAPreRASchedulerCtrl() const165 virtual unsigned getVISAPreRASchedulerCtrl() const { return 4; } 166 167 /// Turn on sampler clustering. Hopefully VISA PreRA scheduler with latency hiding can replace it. enableSampleClustering() const168 virtual bool enableSampleClustering() const { return true; } 169 170 /// Make sure optimization are consistent to avoid Z-fighting issue PreventZFighting() const171 virtual bool PreventZFighting() const { return false; } 172 173 /// Force enabling SIMD32 in case we exepct latency problem. Helps some workloads AlwaysEnableSimd32() const174 virtual bool AlwaysEnableSimd32() const { return false; } 175 176 /// Driver supports promoting buffers to bindful SupportsStatelessToStatefullBufferTransformation() const177 virtual bool SupportsStatelessToStatefullBufferTransformation() const { return false; } 178 179 /// Need emulation of 64bits type for HW not supporting it natively Enable64BitEmu() const180 virtual bool Enable64BitEmu() const { return false; } 181 182 /// In some cases several BTI may alias DisableDpSendReordering() const183 virtual bool DisableDpSendReordering() const { return false; } 184 185 /// Driver uses HW alt math mode, this cause floating point operations to behave differently UseALTMode() const186 virtual bool UseALTMode() const { return false; } 187 188 /// Whether the driver supports blend to fill opt SupportBlendToFillOpt() const189 virtual bool SupportBlendToFillOpt() const { return false; } 190 191 /// Need to know if the driver can accept more than one SIMD mode for compute shaders sendMultipleSIMDModes() const192 virtual bool sendMultipleSIMDModes() const { return false; } 193 194 /// pick behavior whether we need to keep discarded helper pixels to calculate 195 /// gradient correctly for sampler or we need to force early out discarded pixels KeepDiscardHelperPixels() const196 virtual bool KeepDiscardHelperPixels() const { return false; } 197 198 // Choose to support parsing inlined asm instructions on specific platforms SupportInlineAssembly() const199 virtual bool SupportInlineAssembly() const { return false; } 200 201 /// support predicate add pattern match SupportMatchPredAdd() const202 virtual bool SupportMatchPredAdd() const { return false; } 203 204 /// Adjust adapter to adjust the loop unrolling threshold GetLoopUnrollThreshold() const205 virtual unsigned int GetLoopUnrollThreshold() const 206 { 207 return 4000; 208 } 209 210 // ---------------------------------------------------------------------- 211 // Below are workaround for bugs in front end or IGC will be removed once 212 // the bugs are fixed 213 214 /// Need workaround for A32 messages used along with A64 NeedWAToTransformA32MessagesToA64() const215 virtual bool NeedWAToTransformA32MessagesToA64() const { return false; } 216 217 /// disable mad in Vertex shader to avoid ZFigthing issues DisabeMatchMad() const218 virtual bool DisabeMatchMad() const { return false; } 219 220 /// Some FE sends SLM pointers in DWORD units WASLMPointersDwordUnit() const221 virtual bool WASLMPointersDwordUnit() const { return false; } 222 223 /// Custom pass haven't been tested on all APIs WADisableCustomPass() const224 virtual bool WADisableCustomPass() const { return false; } 225 226 /// MemOpt2ForOCL pass not tested on all APIs WAEnableMemOpt2ForOCL() const227 virtual bool WAEnableMemOpt2ForOCL() const { return false; } 228 229 /// disable some optimizations for front end which sends IR with unresolved NOS function when optimizing WaNOSNotResolved() const230 virtual bool WaNOSNotResolved() const { return false; } 231 232 /// WA for APIs where frc generates a different precision than x - rndd(x) for small negative values 233 /// Needs to switch to use fast math flags DisableMatchFrcPatternMatch() const234 virtual bool DisableMatchFrcPatternMatch() const { return false; } 235 236 /// Based on the type of inlined sampler we get we program different output. ProgrammableBorderColorInCompute() const237 virtual bool ProgrammableBorderColorInCompute() const { return false; } 238 239 /// WA for failures with HS with push constants WaDisablePushConstantsForHS() const240 virtual bool WaDisablePushConstantsForHS() const { return false; } 241 242 /// Check if we have to worry about stack overflow while recursing in loop analysis HasSmallStack() const243 virtual bool HasSmallStack() const { return false; } 244 245 /// Check if the stateful token is supported SupportStatefulToken() const246 virtual bool SupportStatefulToken() const { return false; } 247 248 /// Disables dual patch dispatch for APIs that don't use it APIDisableDSDualPatchDispatch() const249 virtual bool APIDisableDSDualPatchDispatch() const { return false; } 250 251 /// WA to make sure scratch writes are globally observed before EOT clearScratchWriteBeforeEOT() const252 virtual bool clearScratchWriteBeforeEOT() const { return false; } 253 254 /// Should unaligned vectors be split before processing in EmitVISA splitUnalignedVectors() const255 virtual bool splitUnalignedVectors() const { return true; } 256 257 /// Does not emit an error if recursive functions calls are detected. AllowRecursion() const258 virtual bool AllowRecursion() const { return false; } 259 260 /// Restrict dessa aliasing level. -1 : no restriction; max level otherwise. DessaAliasLevel() const261 virtual int DessaAliasLevel() const { return -1; } 262 263 /// Rounding mode used for DP emulated function, defaults to Round to nearest DPEmulationRoundingMode() const264 virtual unsigned DPEmulationRoundingMode() const { return 0; } 265 266 /// Check for flushing denormals for DP emulated function DPEmulationFlushDenorm() const267 virtual bool DPEmulationFlushDenorm() const { return false; } 268 269 /// Check for flush to zero for DP emulated function DPEmulationFlushToZero() const270 virtual bool DPEmulationFlushToZero() const { return false; } 271 272 // Maximum id that can be used by simple push constant buffers. The default is maximum unsigned int (no restriction) MaximumSimplePushBufferID() const273 virtual unsigned int MaximumSimplePushBufferID() const { return std::numeric_limits<unsigned int>::max(); } 274 275 /// Enables the use of inline data on XeHP_SDV+ UseInlineData() const276 virtual bool UseInlineData() const { return false; } 277 278 /// Use first VB to send vertex&base instance and second for draw index UsesVertexBuffersToSendShaderDrawParameters() const279 virtual bool UsesVertexBuffersToSendShaderDrawParameters() const { return false; } 280 281 /// Use indirect payload in CS UsesIndirectPayload() const282 virtual bool UsesIndirectPayload() const { return true; } 283 SupportsDispatchGPGPUWalkerAlongYFirst() const284 virtual bool SupportsDispatchGPGPUWalkerAlongYFirst() const { return true; } 285 286 /// Check if integer mad is enabled EnableIntegerMad() const287 virtual bool EnableIntegerMad() const { return false; } 288 289 /// Respect per instruction 'contract' Fast-Math flag RespectPerInstructionContractFlag() const290 virtual bool RespectPerInstructionContractFlag() const { return false; } 291 292 /// add shader hash code after EOT for debug purposes EnableShaderDebugHashCodeInKernel() const293 virtual bool EnableShaderDebugHashCodeInKernel() const { return false; } 294 295 // The size of output printf buffer is 4 MB by default by agreement with Runtime. getPrintfBufferSize() const296 virtual uint32_t getPrintfBufferSize() const 297 { 298 return 4 * sizeof(MEGABYTE); 299 } 300 301 // Limits simple push constants based on pushed inputs EnableSimplePushRestriction() const302 virtual bool EnableSimplePushRestriction() const { return false; } 303 304 // Determines whether the PAYLOAD_HEADER implicit arg must be present RequirePayloadHeader() const305 virtual bool RequirePayloadHeader() const { return true; } 306 supportsAutoGRFSelection() const307 virtual bool supportsAutoGRFSelection() const { return autoGRFSelection; } setAutoGRFSelection(bool value)308 virtual void setAutoGRFSelection(bool value) { autoGRFSelection = value; } 309 310 311 protected: 312 bool autoGRFSelection = false; 313 }; 314 315 }//namespace IGC 316