1 //
2 // Copyright 2015 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // FeaturesD3D.h: Features and workarounds for D3D driver bugs and other issues.
8 
9 #ifndef ANGLE_PLATFORM_FEATURESD3D_H_
10 #define ANGLE_PLATFORM_FEATURESD3D_H_
11 
12 #include "platform/Feature.h"
13 
14 namespace angle
15 {
16 
17 // Workarounds attached to each shader. Do not need to expose information about these workarounds so
18 // a simple bool struct suffices.
19 struct CompilerWorkaroundsD3D
20 {
21     bool skipOptimization = false;
22 
23     bool useMaxOptimization = false;
24 
25     // IEEE strictness needs to be enabled for NANs to work.
26     bool enableIEEEStrictness = false;
27 };
28 
29 struct FeaturesD3D : FeatureSetBase
30 {
31     FeaturesD3D();
32     ~FeaturesD3D();
33 
34     // On some systems, having extra rendertargets than necessary slows down the shader.
35     // We can fix this by optimizing those out of the shader. At the same time, we can
36     // work around a bug on some nVidia drivers that they ignore "null" render targets
37     // in D3D11, by compacting the active color attachments list to omit null entries.
38     Feature mrtPerfWorkaround = {"mrt_perf_workaround", FeatureCategory::D3DWorkarounds,
39                                  "Some drivers have a bug where they ignore null render targets",
40                                  &members};
41 
42     Feature setDataFasterThanImageUpload = {"set_data_faster_than_image_upload",
43                                             FeatureCategory::D3DWorkarounds,
44                                             "Set data faster than image upload", &members};
45 
46     Feature setDataFasterThanImageUploadOn128bitFormats = {
47         "set_data_faster_than_image_upload_on_128bit_formats", FeatureCategory::D3DWorkarounds,
48         "Set data faster than image upload on 128bit formats", &members};
49 
50     // Some renderers can't disable mipmaps on a mipmapped texture (i.e. solely sample from level
51     // zero, and ignore the other levels). D3D11 Feature Level 10+ does this by setting MaxLOD to
52     // 0.0f in the Sampler state. D3D9 sets D3DSAMP_MIPFILTER to D3DTEXF_NONE. There is no
53     // equivalent to this in D3D11 Feature Level 9_3. This causes problems when (for example) an
54     // application creates a mipmapped texture2D, but sets GL_TEXTURE_MIN_FILTER to GL_NEAREST
55     // (i.e disables mipmaps). To work around this, D3D11 FL9_3 has to create two copies of the
56     // texture. The textures' level zeros are identical, but only one texture has mips.
57     Feature zeroMaxLodWorkaround = {"zero_max_lod", FeatureCategory::D3DWorkarounds,
58                                     "Missing an option to disable mipmaps on a mipmapped texture",
59                                     &members};
60 
61     // Some renderers do not support Geometry Shaders so the Geometry Shader-based PointSprite
62     // emulation will not work. To work around this, D3D11 FL9_3 has to use a different pointsprite
63     // emulation that is implemented using instanced quads.
64     Feature useInstancedPointSpriteEmulation = {
65         "use_instanced_point_sprite_emulation", FeatureCategory::D3DWorkarounds,
66         "Some D3D11 renderers do not support geometry shaders for pointsprite emulation", &members};
67 
68     // A bug fixed in NVIDIA driver version 347.88 < x <= 368.81 triggers a TDR when using
69     // CopySubresourceRegion from a staging texture to a depth/stencil in D3D11. The workaround
70     // is to use UpdateSubresource to trigger an extra copy. We disable this workaround on newer
71     // NVIDIA driver versions because of a second driver bug present with the workaround enabled.
72     // (See: http://anglebug.com/1452)
73     Feature depthStencilBlitExtraCopy = {
74         "depth_stencil_blit_extra_copy", FeatureCategory::D3DWorkarounds,
75         "Bug in some drivers triggers a TDR when using CopySubresourceRegion from a staging "
76         "texture to a depth/stencil",
77         &members, "http://anglebug.com/1452"};
78 
79     // The HLSL optimizer has a bug with optimizing "pow" in certain integer-valued expressions.
80     // We can work around this by expanding the pow into a series of multiplies if we're running
81     // under the affected compiler.
82     Feature expandIntegerPowExpressions = {
83         "expand_integer_pow_expressions", FeatureCategory::D3DWorkarounds,
84         "The HLSL optimizer has a bug with optimizing 'pow' in certain integer-valued expressions",
85         &members};
86 
87     // NVIDIA drivers sometimes write out-of-order results to StreamOut buffers when transform
88     // feedback is used to repeatedly write to the same buffer positions.
89     Feature flushAfterEndingTransformFeedback = {
90         "flush_after_ending_transform_feedback", FeatureCategory::D3DWorkarounds,
91         "Some drivers sometimes write out-of-order results to StreamOut buffers when transform "
92         "feedback is used to repeatedly write to the same buffer positions",
93         &members};
94 
95     // Some drivers (NVIDIA) do not take into account the base level of the texture in the results
96     // of the HLSL GetDimensions builtin.
97     Feature getDimensionsIgnoresBaseLevel = {
98         "get_dimensions_ignores_base_level", FeatureCategory::D3DWorkarounds,
99         "Some drivers do not take into account the base level of the "
100         "texture in the results of the HLSL GetDimensions builtin",
101         &members};
102 
103     // On some Intel drivers, HLSL's function texture.Load returns 0 when the parameter Location
104     // is negative, even if the sum of Offset and Location is in range. This may cause errors when
105     // translating GLSL's function texelFetchOffset into texture.Load, as it is valid for
106     // texelFetchOffset to use negative texture coordinates as its parameter P when the sum of P
107     // and Offset is in range. To work around this, we translate texelFetchOffset into texelFetch
108     // by adding Offset directly to Location before reading the texture.
109     Feature preAddTexelFetchOffsets = {
110         "pre_add_texel_fetch_offsets", FeatureCategory::D3DWorkarounds,
111         "HLSL's function texture.Load returns 0 when the parameter Location is negative, even if "
112         "the sum of Offset and Location is in range",
113         &members};
114 
115     // On some AMD drivers, 1x1 and 2x2 mips of depth/stencil textures aren't sampled correctly.
116     // We can work around this bug by doing an internal blit to a temporary single-channel texture
117     // before we sample.
118     Feature emulateTinyStencilTextures = {
119         "emulate_tiny_stencil_textures", FeatureCategory::D3DWorkarounds,
120         "1x1 and 2x2 mips of depth/stencil textures aren't sampled correctly", &members};
121 
122     // In Intel driver, the data with format DXGI_FORMAT_B5G6R5_UNORM will be parsed incorrectly.
123     // This workaroud will disable B5G6R5 support when it's Intel driver. By default, it will use
124     // R8G8B8A8 format. This bug is fixed in version 4539 on Intel drivers.
125     // On older AMD drivers, the data in DXGI_FORMAT_B5G6R5_UNORM becomes corrupted for unknown
126     // reasons.
127     Feature disableB5G6R5Support = {"disable_b5g6r5_support", FeatureCategory::D3DWorkarounds,
128                                     "Textures with the format "
129                                     "DXGI_FORMAT_B5G6R5_UNORM have incorrect data",
130                                     &members};
131 
132     // On some Intel drivers, evaluating unary minus operator on integer may get wrong answer in
133     // vertex shaders. To work around this bug, we translate -(int) into ~(int)+1.
134     // This driver bug is fixed in 20.19.15.4624.
135     Feature rewriteUnaryMinusOperator = {
136         "rewrite_unary_minus_operator", FeatureCategory::D3DWorkarounds,
137         "Evaluating unary minus operator on integer may get wrong answer in vertex shaders",
138         &members};
139 
140     // On some Intel drivers, using isnan() on highp float will get wrong answer. To work around
141     // this bug, we use an expression to emulate function isnan().
142     // Tracking bug: https://crbug.com/650547
143     // This driver bug is fixed in 21.20.16.4542.
144     Feature emulateIsnanFloat = {"emulate_isnan_float", FeatureCategory::D3DWorkarounds,
145                                  "Using isnan() on highp float will get wrong answer", &members,
146                                  "https://crbug.com/650547"};
147 
148     // On some Intel drivers, using clear() may not take effect. To work around this bug, we call
149     // clear() twice on these platforms.
150     // Tracking bug: https://crbug.com/655534
151     Feature callClearTwice = {"call_clear_twice", FeatureCategory::D3DWorkarounds,
152                               "Using clear() may not take effect", &members,
153                               "https://crbug.com/655534"};
154 
155     // On Sandybridge, calling ClearView after using dual source blending causes the hardware to
156     // hang. See: https://bugzilla.mozilla.org/show_bug.cgi?id=1633628
157     Feature emulateClearViewAfterDualSourceBlending = {
158         "emulate_clear_view_after_dual_source_blending", FeatureCategory::D3DWorkarounds,
159         "On Sandybridge, calling ClearView after using dual source blending causes "
160         "the hardware to hang",
161         &members, "https://bugzilla.mozilla.org/show_bug.cgi?id=1633628"};
162 
163     // On some Intel drivers, copying from staging storage to constant buffer storage does not
164     // seem to work. Work around this by keeping system memory storage as a canonical reference
165     // for buffer data.
166     // D3D11-only workaround. See http://crbug.com/593024.
167     Feature useSystemMemoryForConstantBuffers = {"use_system_memory_for_constant_buffers",
168                                                  FeatureCategory::D3DWorkarounds,
169                                                  "Copying from staging storage to constant buffer "
170                                                  "storage does not work",
171                                                  &members, "https://crbug.com/593024"};
172 
173     // This workaround is for the ANGLE_multiview extension. If enabled the viewport or render
174     // target slice will be selected in the geometry shader stage. The workaround flag is added to
175     // make it possible to select the code path in end2end and performance tests.
176     Feature selectViewInGeometryShader = {
177         "select_view_in_geometry_shader", FeatureCategory::D3DWorkarounds,
178         "The viewport or render target slice will be selected in the geometry shader stage for "
179         "the ANGLE_multiview extension",
180         &members};
181 
182     // When rendering with no render target on D3D, two bugs lead to incorrect behavior on Intel
183     // drivers < 4815. The rendering samples always pass neglecting discard statements in pixel
184     // shader.
185     // 1. If rendertarget is not set, the pixel shader will be recompiled to drop 'SV_TARGET'.
186     // When using a pixel shader with no 'SV_TARGET' in a draw, the pixels are always generated even
187     // if they should be discard by 'discard' statements.
188     // 2. If ID3D11BlendState.RenderTarget[].RenderTargetWriteMask is 0 and rendertarget is not set,
189     // then rendering samples also pass neglecting discard statements in pixel shader.
190     // So we add a mock texture as render target in such case. See http://anglebug.com/2152
191     Feature addMockTextureNoRenderTarget = {
192         "add_mock_texture_no_render_target", FeatureCategory::D3DWorkarounds,
193         "On some drivers when rendering with no render target, two bugs lead to incorrect behavior",
194         &members, "http://anglebug.com/2152"};
195 
196     // Don't use D3D constant register zero when allocating space for uniforms in the vertex shader.
197     // This is targeted to work around a bug in NVIDIA D3D driver version 388.59 where in very
198     // specific cases the driver would not handle constant register zero correctly.
199     Feature skipVSConstantRegisterZero = {
200         "skip_vs_constant_register_zero", FeatureCategory::D3DWorkarounds,
201         "In specific cases the driver doesn't handle constant register zero correctly", &members};
202 
203     // Forces the value returned from an atomic operations to be always be resolved. This is
204     // targeted to workaround a bug in NVIDIA D3D driver where the return value from
205     // RWByteAddressBuffer.InterlockedAdd does not get resolved when used in the .yzw components of
206     // a RWByteAddressBuffer.Store operation. Only has an effect on HLSL translation.
207     // http://anglebug.com/3246
208     Feature forceAtomicValueResolution = {
209         "force_atomic_value_resolution", FeatureCategory::D3DWorkarounds,
210         "On some drivers the return value from RWByteAddressBuffer.InterlockedAdd does not resolve "
211         "when used in the .yzw components of a RWByteAddressBuffer.Store operation",
212         &members, "http://anglebug.com/3246"};
213 
214     // Match chromium's robust resource init behaviour by always prefering to upload texture data
215     // instead of clearing. Clear calls have been observed to cause texture corruption for some
216     // formats.
217     Feature allowClearForRobustResourceInit = {
218         "allow_clear_for_robust_resource_init", FeatureCategory::D3DWorkarounds,
219         "Some drivers corrupt texture data when clearing for robust resource initialization.",
220         &members, "http://crbug.com/941620"};
221 
222     // Allow translating uniform block to StructuredBuffer. This is targeted to work around a slow
223     // fxc compile performance issue with dynamic uniform indexing. http://anglebug.com/3682
224     Feature allowTranslateUniformBlockToStructuredBuffer = {
225         "allow_translate_uniform_block_to_structured_buffer", FeatureCategory::D3DWorkarounds,
226         "There is a slow fxc compile performance issue with dynamic uniform indexing if "
227         "translating a uniform block with a large array member to cbuffer.",
228         &members, "http://anglebug.com/3682"};
229 
230     Feature allowES3OnFL10_0 = {"allowES3OnFL10_0", FeatureCategory::D3DWorkarounds,
231                                 "Allow ES3 on 10.0 devices", &members};
232 };
233 
234 inline FeaturesD3D::FeaturesD3D()  = default;
235 inline FeaturesD3D::~FeaturesD3D() = default;
236 
237 }  // namespace angle
238 
239 #endif  // ANGLE_PLATFORM_FEATURESD3D_H_
240