1 //
2 // Copyright (c) 2012 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // InputLayoutCache.cpp: Defines InputLayoutCache, a class that builds and caches
8 // D3D11 input layouts.
9 
10 #include "libANGLE/renderer/d3d/d3d11/InputLayoutCache.h"
11 
12 #include "common/bitset_utils.h"
13 #include "common/utilities.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/Program.h"
16 #include "libANGLE/VertexArray.h"
17 #include "libANGLE/VertexAttribute.h"
18 #include "libANGLE/renderer/d3d/IndexDataManager.h"
19 #include "libANGLE/renderer/d3d/ProgramD3D.h"
20 #include "libANGLE/renderer/d3d/VertexDataManager.h"
21 #include "libANGLE/renderer/d3d/d3d11/Buffer11.h"
22 #include "libANGLE/renderer/d3d/d3d11/Context11.h"
23 #include "libANGLE/renderer/d3d/d3d11/Renderer11.h"
24 #include "libANGLE/renderer/d3d/d3d11/ShaderExecutable11.h"
25 #include "libANGLE/renderer/d3d/d3d11/VertexArray11.h"
26 #include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h"
27 #include "libANGLE/renderer/d3d/d3d11/formatutils11.h"
28 
29 namespace rx
30 {
31 
32 namespace
33 {
34 
GetReservedBufferCount(bool usesPointSpriteEmulation)35 size_t GetReservedBufferCount(bool usesPointSpriteEmulation)
36 {
37     return usesPointSpriteEmulation ? 1 : 0;
38 }
39 
GetGLSLAttributeType(const std::vector<sh::Attribute> & shaderAttributes,size_t index)40 GLenum GetGLSLAttributeType(const std::vector<sh::Attribute> &shaderAttributes, size_t index)
41 {
42     // Count matrices differently
43     for (const sh::Attribute &attrib : shaderAttributes)
44     {
45         if (attrib.location == -1)
46         {
47             continue;
48         }
49 
50         GLenum transposedType = gl::TransposeMatrixType(attrib.type);
51         int rows              = gl::VariableRowCount(transposedType);
52         int intIndex          = static_cast<int>(index);
53 
54         if (intIndex >= attrib.location && intIndex < attrib.location + rows)
55         {
56             return transposedType;
57         }
58     }
59 
60     UNREACHABLE();
61     return GL_NONE;
62 }
63 
64 struct PackedAttribute
65 {
66     uint8_t attribType;
67     uint8_t semanticIndex;
68     uint8_t vertexFormatType;
69     uint8_t divisor;
70 };
71 
72 } // anonymous namespace
73 
PackedAttributeLayout()74 PackedAttributeLayout::PackedAttributeLayout() : numAttributes(0), flags(0), attributeData({})
75 {
76 }
77 
78 PackedAttributeLayout::PackedAttributeLayout(const PackedAttributeLayout &other) = default;
79 
addAttributeData(GLenum glType,UINT semanticIndex,gl::VertexFormatType vertexFormatType,unsigned int divisor)80 void PackedAttributeLayout::addAttributeData(GLenum glType,
81                                              UINT semanticIndex,
82                                              gl::VertexFormatType vertexFormatType,
83                                              unsigned int divisor)
84 {
85     gl::AttributeType attribType = gl::GetAttributeType(glType);
86 
87     PackedAttribute packedAttrib;
88     packedAttrib.attribType = static_cast<uint8_t>(attribType);
89     packedAttrib.semanticIndex = static_cast<uint8_t>(semanticIndex);
90     packedAttrib.vertexFormatType = static_cast<uint8_t>(vertexFormatType);
91     packedAttrib.divisor = static_cast<uint8_t>(divisor);
92 
93     ASSERT(static_cast<gl::AttributeType>(packedAttrib.attribType) == attribType);
94     ASSERT(static_cast<UINT>(packedAttrib.semanticIndex) == semanticIndex);
95     ASSERT(static_cast<gl::VertexFormatType>(packedAttrib.vertexFormatType) == vertexFormatType);
96     ASSERT(static_cast<unsigned int>(packedAttrib.divisor) == divisor);
97 
98     static_assert(sizeof(uint32_t) == sizeof(PackedAttribute), "PackedAttributes must be 32-bits exactly.");
99 
100     attributeData[numAttributes++] = gl::bitCast<uint32_t>(packedAttrib);
101 }
102 
operator ==(const PackedAttributeLayout & other) const103 bool PackedAttributeLayout::operator==(const PackedAttributeLayout &other) const
104 {
105     return (numAttributes == other.numAttributes) && (flags == other.flags) &&
106            (attributeData == other.attributeData);
107 }
108 
InputLayoutCache()109 InputLayoutCache::InputLayoutCache()
110     : mLayoutCache(kDefaultCacheSize * 2), mPointSpriteVertexBuffer(), mPointSpriteIndexBuffer()
111 {
112 }
113 
~InputLayoutCache()114 InputLayoutCache::~InputLayoutCache()
115 {
116 }
117 
clear()118 void InputLayoutCache::clear()
119 {
120     mLayoutCache.Clear();
121     mPointSpriteVertexBuffer.reset();
122     mPointSpriteIndexBuffer.reset();
123 }
124 
applyVertexBuffers(const gl::Context * context,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,GLint start,bool isIndexedRendering)125 gl::Error InputLayoutCache::applyVertexBuffers(
126     const gl::Context *context,
127     const std::vector<const TranslatedAttribute *> &currentAttributes,
128     GLenum mode,
129     GLint start,
130     bool isIndexedRendering)
131 {
132     Renderer11 *renderer   = GetImplAs<Context11>(context)->getRenderer();
133     const gl::State &state = context->getGLState();
134     auto *stateManager     = renderer->getStateManager();
135     gl::Program *program   = state.getProgram();
136     ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
137 
138     bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
139     bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
140 
141     // Note that if we use instance emulation, we reserve the first buffer slot.
142     size_t reservedBuffers = GetReservedBufferCount(programUsesInstancedPointSprites);
143 
144     for (size_t attribIndex = 0; attribIndex < (gl::MAX_VERTEX_ATTRIBS - reservedBuffers);
145          ++attribIndex)
146     {
147         ID3D11Buffer *buffer = nullptr;
148         UINT vertexStride    = 0;
149         UINT vertexOffset    = 0;
150 
151         if (attribIndex < currentAttributes.size())
152         {
153             const auto &attrib      = *currentAttributes[attribIndex];
154             Buffer11 *bufferStorage = attrib.storage ? GetAs<Buffer11>(attrib.storage) : nullptr;
155 
156             // If indexed pointsprite emulation is active, then we need to take a less efficent code path.
157             // Emulated indexed pointsprite rendering requires that the vertex buffers match exactly to
158             // the indices passed by the caller.  This could expand or shrink the vertex buffer depending
159             // on the number of points indicated by the index list or how many duplicates are found on the index list.
160             if (bufferStorage == nullptr)
161             {
162                 ASSERT(attrib.vertexBuffer.get());
163                 buffer = GetAs<VertexBuffer11>(attrib.vertexBuffer.get())->getBuffer().get();
164             }
165             else if (instancedPointSpritesActive && isIndexedRendering)
166             {
167                 VertexArray11 *vao11 = GetImplAs<VertexArray11>(state.getVertexArray());
168                 ASSERT(vao11->isCachedIndexInfoValid());
169                 TranslatedIndexData *indexInfo = vao11->getCachedIndexInfo();
170                 if (indexInfo->srcIndexData.srcBuffer != nullptr)
171                 {
172                     const uint8_t *bufferData = nullptr;
173                     ANGLE_TRY(indexInfo->srcIndexData.srcBuffer->getData(context, &bufferData));
174                     ASSERT(bufferData != nullptr);
175 
176                     ptrdiff_t offset =
177                         reinterpret_cast<ptrdiff_t>(indexInfo->srcIndexData.srcIndices);
178                     indexInfo->srcIndexData.srcBuffer  = nullptr;
179                     indexInfo->srcIndexData.srcIndices = bufferData + offset;
180                 }
181 
182                 ANGLE_TRY_RESULT(bufferStorage->getEmulatedIndexedBuffer(
183                                      context, &indexInfo->srcIndexData, attrib, start),
184                                  buffer);
185             }
186             else
187             {
188                 ANGLE_TRY_RESULT(
189                     bufferStorage->getBuffer(context, BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK),
190                     buffer);
191             }
192 
193             vertexStride = attrib.stride;
194             ANGLE_TRY_RESULT(attrib.computeOffset(start), vertexOffset);
195         }
196 
197         size_t bufferIndex = reservedBuffers + attribIndex;
198 
199         stateManager->queueVertexBufferChange(bufferIndex, buffer, vertexStride, vertexOffset);
200     }
201 
202     // Instanced PointSprite emulation requires two additional ID3D11Buffers. A vertex buffer needs
203     // to be created and added to the list of current buffers, strides and offsets collections.
204     // This buffer contains the vertices for a single PointSprite quad.
205     // An index buffer also needs to be created and applied because rendering instanced data on
206     // D3D11 FL9_3 requires DrawIndexedInstanced() to be used. Shaders that contain gl_PointSize and
207     // used without the GL_POINTS rendering mode require a vertex buffer because some drivers cannot
208     // handle missing vertex data and will TDR the system.
209     if (programUsesInstancedPointSprites)
210     {
211         const UINT pointSpriteVertexStride = sizeof(float) * 5;
212 
213         if (!mPointSpriteVertexBuffer.valid())
214         {
215             static const float pointSpriteVertices[] =
216             {
217                 // Position        // TexCoord
218                -1.0f, -1.0f, 0.0f, 0.0f, 1.0f,
219                -1.0f,  1.0f, 0.0f, 0.0f, 0.0f,
220                 1.0f,  1.0f, 0.0f, 1.0f, 0.0f,
221                 1.0f, -1.0f, 0.0f, 1.0f, 1.0f,
222                -1.0f, -1.0f, 0.0f, 0.0f, 1.0f,
223                 1.0f,  1.0f, 0.0f, 1.0f, 0.0f,
224             };
225 
226             D3D11_SUBRESOURCE_DATA vertexBufferData = { pointSpriteVertices, 0, 0 };
227             D3D11_BUFFER_DESC vertexBufferDesc;
228             vertexBufferDesc.ByteWidth = sizeof(pointSpriteVertices);
229             vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
230             vertexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
231             vertexBufferDesc.CPUAccessFlags = 0;
232             vertexBufferDesc.MiscFlags = 0;
233             vertexBufferDesc.StructureByteStride = 0;
234 
235             ANGLE_TRY(renderer->allocateResource(vertexBufferDesc, &vertexBufferData,
236                                                  &mPointSpriteVertexBuffer));
237         }
238 
239         // Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver to avoid
240         // indexing into the vertex buffer.
241         UINT stride = instancedPointSpritesActive ? pointSpriteVertexStride : 0;
242         stateManager->queueVertexBufferChange(0, mPointSpriteVertexBuffer.get(), stride, 0);
243 
244         if (!mPointSpriteIndexBuffer.valid())
245         {
246             // Create an index buffer and set it for pointsprite rendering
247             static const unsigned short pointSpriteIndices[] =
248             {
249                 0, 1, 2, 3, 4, 5,
250             };
251 
252             D3D11_SUBRESOURCE_DATA indexBufferData = { pointSpriteIndices, 0, 0 };
253             D3D11_BUFFER_DESC indexBufferDesc;
254             indexBufferDesc.ByteWidth = sizeof(pointSpriteIndices);
255             indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
256             indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
257             indexBufferDesc.CPUAccessFlags = 0;
258             indexBufferDesc.MiscFlags = 0;
259             indexBufferDesc.StructureByteStride = 0;
260 
261             ANGLE_TRY(renderer->allocateResource(indexBufferDesc, &indexBufferData,
262                                                  &mPointSpriteIndexBuffer));
263         }
264 
265         if (instancedPointSpritesActive)
266         {
267             // The index buffer is applied here because Instanced PointSprite emulation uses the a
268             // non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer()
269             // on the renderer will not be called and setting this buffer here ensures that the
270             // rendering path will contain the correct index buffers.
271             stateManager->setIndexBuffer(mPointSpriteIndexBuffer.get(), DXGI_FORMAT_R16_UINT, 0);
272         }
273     }
274 
275     stateManager->applyVertexBufferChanges();
276     return gl::NoError();
277 }
278 
updateVertexOffsetsForPointSpritesEmulation(Renderer11 * renderer,const std::vector<const TranslatedAttribute * > & currentAttributes,GLint startVertex,GLsizei emulatedInstanceId)279 gl::Error InputLayoutCache::updateVertexOffsetsForPointSpritesEmulation(
280     Renderer11 *renderer,
281     const std::vector<const TranslatedAttribute *> &currentAttributes,
282     GLint startVertex,
283     GLsizei emulatedInstanceId)
284 {
285     auto *stateManager = renderer->getStateManager();
286 
287     size_t reservedBuffers = GetReservedBufferCount(true);
288     for (size_t attribIndex = 0; attribIndex < currentAttributes.size(); ++attribIndex)
289     {
290         const auto &attrib = *currentAttributes[attribIndex];
291         size_t bufferIndex = reservedBuffers + attribIndex;
292 
293         if (attrib.divisor > 0)
294         {
295             unsigned int offset = 0;
296             ANGLE_TRY_RESULT(attrib.computeOffset(startVertex), offset);
297             offset += (attrib.stride * (emulatedInstanceId / attrib.divisor));
298             stateManager->queueVertexOffsetChange(bufferIndex, offset);
299         }
300     }
301 
302     stateManager->applyVertexBufferChanges();
303     return gl::NoError();
304 }
305 
updateInputLayout(Renderer11 * renderer,const gl::State & state,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,const AttribIndexArray & sortedSemanticIndices,const DrawCallVertexParams & vertexParams)306 gl::Error InputLayoutCache::updateInputLayout(
307     Renderer11 *renderer,
308     const gl::State &state,
309     const std::vector<const TranslatedAttribute *> &currentAttributes,
310     GLenum mode,
311     const AttribIndexArray &sortedSemanticIndices,
312     const DrawCallVertexParams &vertexParams)
313 {
314     gl::Program *program         = state.getProgram();
315     const auto &shaderAttributes = program->getAttributes();
316     PackedAttributeLayout layout;
317 
318     ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
319     bool programUsesInstancedPointSprites =
320         programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
321     bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
322 
323     if (programUsesInstancedPointSprites)
324     {
325         layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES;
326     }
327 
328     if (instancedPointSpritesActive)
329     {
330         layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE;
331     }
332 
333     if (vertexParams.instances() > 0)
334     {
335         layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_RENDERING_ACTIVE;
336     }
337 
338     const auto &attribs            = state.getVertexArray()->getVertexAttributes();
339     const auto &bindings           = state.getVertexArray()->getVertexBindings();
340     const auto &locationToSemantic = programD3D->getAttribLocationToD3DSemantics();
341     int divisorMultiplier          = program->usesMultiview() ? program->getNumViews() : 1;
342 
343     for (size_t attribIndex : program->getActiveAttribLocationsMask())
344     {
345         // Record the type of the associated vertex shader vector in our key
346         // This will prevent mismatched vertex shaders from using the same input layout
347         GLenum glslElementType = GetGLSLAttributeType(shaderAttributes, attribIndex);
348 
349         const auto &attrib = attribs[attribIndex];
350         const auto &binding = bindings[attrib.bindingIndex];
351         int d3dSemantic    = locationToSemantic[attribIndex];
352 
353         const auto &currentValue =
354             state.getVertexAttribCurrentValue(static_cast<unsigned int>(attribIndex));
355         gl::VertexFormatType vertexFormatType = gl::GetVertexFormatType(attrib, currentValue.Type);
356 
357         layout.addAttributeData(glslElementType, d3dSemantic, vertexFormatType,
358                                 binding.getDivisor() * divisorMultiplier);
359     }
360 
361     const d3d11::InputLayout *inputLayout = nullptr;
362     if (layout.numAttributes > 0 || layout.flags != 0)
363     {
364         auto it = mLayoutCache.Get(layout);
365         if (it != mLayoutCache.end())
366         {
367             inputLayout = &it->second;
368         }
369         else
370         {
371             angle::TrimCache(mLayoutCache.max_size() / 2, kGCLimit, "input layout", &mLayoutCache);
372 
373             d3d11::InputLayout newInputLayout;
374             ANGLE_TRY(createInputLayout(renderer, sortedSemanticIndices, currentAttributes, mode,
375                                         program, vertexParams, &newInputLayout));
376 
377             auto insertIt = mLayoutCache.Put(layout, std::move(newInputLayout));
378             inputLayout   = &insertIt->second;
379         }
380     }
381 
382     renderer->getStateManager()->setInputLayout(inputLayout);
383     return gl::NoError();
384 }
385 
createInputLayout(Renderer11 * renderer,const AttribIndexArray & sortedSemanticIndices,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,gl::Program * program,const DrawCallVertexParams & vertexParams,d3d11::InputLayout * inputLayoutOut)386 gl::Error InputLayoutCache::createInputLayout(
387     Renderer11 *renderer,
388     const AttribIndexArray &sortedSemanticIndices,
389     const std::vector<const TranslatedAttribute *> &currentAttributes,
390     GLenum mode,
391     gl::Program *program,
392     const DrawCallVertexParams &vertexParams,
393     d3d11::InputLayout *inputLayoutOut)
394 {
395     ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
396     auto featureLevel      = renderer->getRenderer11DeviceCaps().featureLevel;
397 
398     bool programUsesInstancedPointSprites =
399         programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
400 
401     unsigned int inputElementCount = 0;
402     std::array<D3D11_INPUT_ELEMENT_DESC, gl::MAX_VERTEX_ATTRIBS> inputElements;
403 
404     for (size_t attribIndex = 0; attribIndex < currentAttributes.size(); ++attribIndex)
405     {
406         const auto &attrib    = *currentAttributes[attribIndex];
407         const int sortedIndex = sortedSemanticIndices[attribIndex];
408 
409         D3D11_INPUT_CLASSIFICATION inputClass =
410             attrib.divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA;
411 
412         const auto &vertexFormatType =
413             gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType);
414         const auto &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, featureLevel);
415 
416         auto *inputElement = &inputElements[inputElementCount];
417 
418         inputElement->SemanticName         = "TEXCOORD";
419         inputElement->SemanticIndex        = sortedIndex;
420         inputElement->Format               = vertexFormatInfo.nativeFormat;
421         inputElement->InputSlot            = static_cast<UINT>(attribIndex);
422         inputElement->AlignedByteOffset    = 0;
423         inputElement->InputSlotClass       = inputClass;
424         inputElement->InstanceDataStepRate = attrib.divisor;
425 
426         inputElementCount++;
427     }
428 
429     // Instanced PointSprite emulation requires additional entries in the
430     // inputlayout to support the vertices that make up the pointsprite quad.
431     // We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the
432     // input layout must match the shader
433     if (programUsesInstancedPointSprites)
434     {
435         // On 9_3, we must ensure that slot 0 contains non-instanced data.
436         // If slot 0 currently contains instanced data then we swap it with a non-instanced element.
437         // Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3
438         // doesn't support OpenGL ES 3.0.
439         // As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced
440         // simultaneously, so a non-instanced element must exist.
441 
442         GLsizei numIndicesPerInstance = 0;
443         if (vertexParams.instances() > 0)
444         {
445             // This may trigger an evaluation of the index range.
446             numIndicesPerInstance = vertexParams.vertexCount();
447         }
448 
449         for (size_t elementIndex = 0; elementIndex < inputElementCount; ++elementIndex)
450         {
451             // If rendering points and instanced pointsprite emulation is being used, the
452             // inputClass is required to be configured as per instance data
453             if (mode == GL_POINTS)
454             {
455                 inputElements[elementIndex].InputSlotClass       = D3D11_INPUT_PER_INSTANCE_DATA;
456                 inputElements[elementIndex].InstanceDataStepRate = 1;
457                 if (numIndicesPerInstance > 0 && currentAttributes[elementIndex]->divisor > 0)
458                 {
459                     inputElements[elementIndex].InstanceDataStepRate = numIndicesPerInstance;
460                 }
461             }
462             inputElements[elementIndex].InputSlot++;
463         }
464 
465         inputElements[inputElementCount].SemanticName         = "SPRITEPOSITION";
466         inputElements[inputElementCount].SemanticIndex        = 0;
467         inputElements[inputElementCount].Format               = DXGI_FORMAT_R32G32B32_FLOAT;
468         inputElements[inputElementCount].InputSlot            = 0;
469         inputElements[inputElementCount].AlignedByteOffset    = 0;
470         inputElements[inputElementCount].InputSlotClass       = D3D11_INPUT_PER_VERTEX_DATA;
471         inputElements[inputElementCount].InstanceDataStepRate = 0;
472         inputElementCount++;
473 
474         inputElements[inputElementCount].SemanticName         = "SPRITETEXCOORD";
475         inputElements[inputElementCount].SemanticIndex        = 0;
476         inputElements[inputElementCount].Format               = DXGI_FORMAT_R32G32_FLOAT;
477         inputElements[inputElementCount].InputSlot            = 0;
478         inputElements[inputElementCount].AlignedByteOffset    = sizeof(float) * 3;
479         inputElements[inputElementCount].InputSlotClass       = D3D11_INPUT_PER_VERTEX_DATA;
480         inputElements[inputElementCount].InstanceDataStepRate = 0;
481         inputElementCount++;
482     }
483 
484     ShaderExecutableD3D *shader = nullptr;
485     ANGLE_TRY(programD3D->getVertexExecutableForCachedInputLayout(&shader, nullptr));
486 
487     ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader);
488 
489     InputElementArray inputElementArray(inputElements.data(), inputElementCount);
490     ShaderData vertexShaderData(shader11->getFunction(), shader11->getLength());
491 
492     ANGLE_TRY(renderer->allocateResource(inputElementArray, &vertexShaderData, inputLayoutOut));
493     return gl::NoError();
494 }
495 
setCacheSize(size_t newCacheSize)496 void InputLayoutCache::setCacheSize(size_t newCacheSize)
497 {
498     // Forces a reset of the cache.
499     LayoutCache newCache(newCacheSize);
500     mLayoutCache.Swap(newCache);
501 }
502 
503 }  // namespace rx
504