1 //
2 // Copyright (c) 2012 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // InputLayoutCache.cpp: Defines InputLayoutCache, a class that builds and caches
8 // D3D11 input layouts.
9
10 #include "libANGLE/renderer/d3d/d3d11/InputLayoutCache.h"
11
12 #include "common/bitset_utils.h"
13 #include "common/utilities.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/Program.h"
16 #include "libANGLE/VertexArray.h"
17 #include "libANGLE/VertexAttribute.h"
18 #include "libANGLE/renderer/d3d/IndexDataManager.h"
19 #include "libANGLE/renderer/d3d/ProgramD3D.h"
20 #include "libANGLE/renderer/d3d/VertexDataManager.h"
21 #include "libANGLE/renderer/d3d/d3d11/Buffer11.h"
22 #include "libANGLE/renderer/d3d/d3d11/Context11.h"
23 #include "libANGLE/renderer/d3d/d3d11/Renderer11.h"
24 #include "libANGLE/renderer/d3d/d3d11/ShaderExecutable11.h"
25 #include "libANGLE/renderer/d3d/d3d11/VertexArray11.h"
26 #include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h"
27 #include "libANGLE/renderer/d3d/d3d11/formatutils11.h"
28
29 namespace rx
30 {
31
32 namespace
33 {
34
GetReservedBufferCount(bool usesPointSpriteEmulation)35 size_t GetReservedBufferCount(bool usesPointSpriteEmulation)
36 {
37 return usesPointSpriteEmulation ? 1 : 0;
38 }
39
GetGLSLAttributeType(const std::vector<sh::Attribute> & shaderAttributes,size_t index)40 GLenum GetGLSLAttributeType(const std::vector<sh::Attribute> &shaderAttributes, size_t index)
41 {
42 // Count matrices differently
43 for (const sh::Attribute &attrib : shaderAttributes)
44 {
45 if (attrib.location == -1)
46 {
47 continue;
48 }
49
50 GLenum transposedType = gl::TransposeMatrixType(attrib.type);
51 int rows = gl::VariableRowCount(transposedType);
52 int intIndex = static_cast<int>(index);
53
54 if (intIndex >= attrib.location && intIndex < attrib.location + rows)
55 {
56 return transposedType;
57 }
58 }
59
60 UNREACHABLE();
61 return GL_NONE;
62 }
63
64 struct PackedAttribute
65 {
66 uint8_t attribType;
67 uint8_t semanticIndex;
68 uint8_t vertexFormatType;
69 uint8_t divisor;
70 };
71
72 } // anonymous namespace
73
PackedAttributeLayout()74 PackedAttributeLayout::PackedAttributeLayout() : numAttributes(0), flags(0), attributeData({})
75 {
76 }
77
78 PackedAttributeLayout::PackedAttributeLayout(const PackedAttributeLayout &other) = default;
79
addAttributeData(GLenum glType,UINT semanticIndex,gl::VertexFormatType vertexFormatType,unsigned int divisor)80 void PackedAttributeLayout::addAttributeData(GLenum glType,
81 UINT semanticIndex,
82 gl::VertexFormatType vertexFormatType,
83 unsigned int divisor)
84 {
85 gl::AttributeType attribType = gl::GetAttributeType(glType);
86
87 PackedAttribute packedAttrib;
88 packedAttrib.attribType = static_cast<uint8_t>(attribType);
89 packedAttrib.semanticIndex = static_cast<uint8_t>(semanticIndex);
90 packedAttrib.vertexFormatType = static_cast<uint8_t>(vertexFormatType);
91 packedAttrib.divisor = static_cast<uint8_t>(divisor);
92
93 ASSERT(static_cast<gl::AttributeType>(packedAttrib.attribType) == attribType);
94 ASSERT(static_cast<UINT>(packedAttrib.semanticIndex) == semanticIndex);
95 ASSERT(static_cast<gl::VertexFormatType>(packedAttrib.vertexFormatType) == vertexFormatType);
96 ASSERT(static_cast<unsigned int>(packedAttrib.divisor) == divisor);
97
98 static_assert(sizeof(uint32_t) == sizeof(PackedAttribute), "PackedAttributes must be 32-bits exactly.");
99
100 attributeData[numAttributes++] = gl::bitCast<uint32_t>(packedAttrib);
101 }
102
operator ==(const PackedAttributeLayout & other) const103 bool PackedAttributeLayout::operator==(const PackedAttributeLayout &other) const
104 {
105 return (numAttributes == other.numAttributes) && (flags == other.flags) &&
106 (attributeData == other.attributeData);
107 }
108
InputLayoutCache()109 InputLayoutCache::InputLayoutCache()
110 : mLayoutCache(kDefaultCacheSize * 2), mPointSpriteVertexBuffer(), mPointSpriteIndexBuffer()
111 {
112 }
113
~InputLayoutCache()114 InputLayoutCache::~InputLayoutCache()
115 {
116 }
117
clear()118 void InputLayoutCache::clear()
119 {
120 mLayoutCache.Clear();
121 mPointSpriteVertexBuffer.reset();
122 mPointSpriteIndexBuffer.reset();
123 }
124
applyVertexBuffers(const gl::Context * context,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,GLint start,bool isIndexedRendering)125 gl::Error InputLayoutCache::applyVertexBuffers(
126 const gl::Context *context,
127 const std::vector<const TranslatedAttribute *> ¤tAttributes,
128 GLenum mode,
129 GLint start,
130 bool isIndexedRendering)
131 {
132 Renderer11 *renderer = GetImplAs<Context11>(context)->getRenderer();
133 const gl::State &state = context->getGLState();
134 auto *stateManager = renderer->getStateManager();
135 gl::Program *program = state.getProgram();
136 ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
137
138 bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
139 bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
140
141 // Note that if we use instance emulation, we reserve the first buffer slot.
142 size_t reservedBuffers = GetReservedBufferCount(programUsesInstancedPointSprites);
143
144 for (size_t attribIndex = 0; attribIndex < (gl::MAX_VERTEX_ATTRIBS - reservedBuffers);
145 ++attribIndex)
146 {
147 ID3D11Buffer *buffer = nullptr;
148 UINT vertexStride = 0;
149 UINT vertexOffset = 0;
150
151 if (attribIndex < currentAttributes.size())
152 {
153 const auto &attrib = *currentAttributes[attribIndex];
154 Buffer11 *bufferStorage = attrib.storage ? GetAs<Buffer11>(attrib.storage) : nullptr;
155
156 // If indexed pointsprite emulation is active, then we need to take a less efficent code path.
157 // Emulated indexed pointsprite rendering requires that the vertex buffers match exactly to
158 // the indices passed by the caller. This could expand or shrink the vertex buffer depending
159 // on the number of points indicated by the index list or how many duplicates are found on the index list.
160 if (bufferStorage == nullptr)
161 {
162 ASSERT(attrib.vertexBuffer.get());
163 buffer = GetAs<VertexBuffer11>(attrib.vertexBuffer.get())->getBuffer().get();
164 }
165 else if (instancedPointSpritesActive && isIndexedRendering)
166 {
167 VertexArray11 *vao11 = GetImplAs<VertexArray11>(state.getVertexArray());
168 ASSERT(vao11->isCachedIndexInfoValid());
169 TranslatedIndexData *indexInfo = vao11->getCachedIndexInfo();
170 if (indexInfo->srcIndexData.srcBuffer != nullptr)
171 {
172 const uint8_t *bufferData = nullptr;
173 ANGLE_TRY(indexInfo->srcIndexData.srcBuffer->getData(context, &bufferData));
174 ASSERT(bufferData != nullptr);
175
176 ptrdiff_t offset =
177 reinterpret_cast<ptrdiff_t>(indexInfo->srcIndexData.srcIndices);
178 indexInfo->srcIndexData.srcBuffer = nullptr;
179 indexInfo->srcIndexData.srcIndices = bufferData + offset;
180 }
181
182 ANGLE_TRY_RESULT(bufferStorage->getEmulatedIndexedBuffer(
183 context, &indexInfo->srcIndexData, attrib, start),
184 buffer);
185 }
186 else
187 {
188 ANGLE_TRY_RESULT(
189 bufferStorage->getBuffer(context, BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK),
190 buffer);
191 }
192
193 vertexStride = attrib.stride;
194 ANGLE_TRY_RESULT(attrib.computeOffset(start), vertexOffset);
195 }
196
197 size_t bufferIndex = reservedBuffers + attribIndex;
198
199 stateManager->queueVertexBufferChange(bufferIndex, buffer, vertexStride, vertexOffset);
200 }
201
202 // Instanced PointSprite emulation requires two additional ID3D11Buffers. A vertex buffer needs
203 // to be created and added to the list of current buffers, strides and offsets collections.
204 // This buffer contains the vertices for a single PointSprite quad.
205 // An index buffer also needs to be created and applied because rendering instanced data on
206 // D3D11 FL9_3 requires DrawIndexedInstanced() to be used. Shaders that contain gl_PointSize and
207 // used without the GL_POINTS rendering mode require a vertex buffer because some drivers cannot
208 // handle missing vertex data and will TDR the system.
209 if (programUsesInstancedPointSprites)
210 {
211 const UINT pointSpriteVertexStride = sizeof(float) * 5;
212
213 if (!mPointSpriteVertexBuffer.valid())
214 {
215 static const float pointSpriteVertices[] =
216 {
217 // Position // TexCoord
218 -1.0f, -1.0f, 0.0f, 0.0f, 1.0f,
219 -1.0f, 1.0f, 0.0f, 0.0f, 0.0f,
220 1.0f, 1.0f, 0.0f, 1.0f, 0.0f,
221 1.0f, -1.0f, 0.0f, 1.0f, 1.0f,
222 -1.0f, -1.0f, 0.0f, 0.0f, 1.0f,
223 1.0f, 1.0f, 0.0f, 1.0f, 0.0f,
224 };
225
226 D3D11_SUBRESOURCE_DATA vertexBufferData = { pointSpriteVertices, 0, 0 };
227 D3D11_BUFFER_DESC vertexBufferDesc;
228 vertexBufferDesc.ByteWidth = sizeof(pointSpriteVertices);
229 vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
230 vertexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
231 vertexBufferDesc.CPUAccessFlags = 0;
232 vertexBufferDesc.MiscFlags = 0;
233 vertexBufferDesc.StructureByteStride = 0;
234
235 ANGLE_TRY(renderer->allocateResource(vertexBufferDesc, &vertexBufferData,
236 &mPointSpriteVertexBuffer));
237 }
238
239 // Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver to avoid
240 // indexing into the vertex buffer.
241 UINT stride = instancedPointSpritesActive ? pointSpriteVertexStride : 0;
242 stateManager->queueVertexBufferChange(0, mPointSpriteVertexBuffer.get(), stride, 0);
243
244 if (!mPointSpriteIndexBuffer.valid())
245 {
246 // Create an index buffer and set it for pointsprite rendering
247 static const unsigned short pointSpriteIndices[] =
248 {
249 0, 1, 2, 3, 4, 5,
250 };
251
252 D3D11_SUBRESOURCE_DATA indexBufferData = { pointSpriteIndices, 0, 0 };
253 D3D11_BUFFER_DESC indexBufferDesc;
254 indexBufferDesc.ByteWidth = sizeof(pointSpriteIndices);
255 indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
256 indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
257 indexBufferDesc.CPUAccessFlags = 0;
258 indexBufferDesc.MiscFlags = 0;
259 indexBufferDesc.StructureByteStride = 0;
260
261 ANGLE_TRY(renderer->allocateResource(indexBufferDesc, &indexBufferData,
262 &mPointSpriteIndexBuffer));
263 }
264
265 if (instancedPointSpritesActive)
266 {
267 // The index buffer is applied here because Instanced PointSprite emulation uses the a
268 // non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer()
269 // on the renderer will not be called and setting this buffer here ensures that the
270 // rendering path will contain the correct index buffers.
271 stateManager->setIndexBuffer(mPointSpriteIndexBuffer.get(), DXGI_FORMAT_R16_UINT, 0);
272 }
273 }
274
275 stateManager->applyVertexBufferChanges();
276 return gl::NoError();
277 }
278
updateVertexOffsetsForPointSpritesEmulation(Renderer11 * renderer,const std::vector<const TranslatedAttribute * > & currentAttributes,GLint startVertex,GLsizei emulatedInstanceId)279 gl::Error InputLayoutCache::updateVertexOffsetsForPointSpritesEmulation(
280 Renderer11 *renderer,
281 const std::vector<const TranslatedAttribute *> ¤tAttributes,
282 GLint startVertex,
283 GLsizei emulatedInstanceId)
284 {
285 auto *stateManager = renderer->getStateManager();
286
287 size_t reservedBuffers = GetReservedBufferCount(true);
288 for (size_t attribIndex = 0; attribIndex < currentAttributes.size(); ++attribIndex)
289 {
290 const auto &attrib = *currentAttributes[attribIndex];
291 size_t bufferIndex = reservedBuffers + attribIndex;
292
293 if (attrib.divisor > 0)
294 {
295 unsigned int offset = 0;
296 ANGLE_TRY_RESULT(attrib.computeOffset(startVertex), offset);
297 offset += (attrib.stride * (emulatedInstanceId / attrib.divisor));
298 stateManager->queueVertexOffsetChange(bufferIndex, offset);
299 }
300 }
301
302 stateManager->applyVertexBufferChanges();
303 return gl::NoError();
304 }
305
updateInputLayout(Renderer11 * renderer,const gl::State & state,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,const AttribIndexArray & sortedSemanticIndices,const DrawCallVertexParams & vertexParams)306 gl::Error InputLayoutCache::updateInputLayout(
307 Renderer11 *renderer,
308 const gl::State &state,
309 const std::vector<const TranslatedAttribute *> ¤tAttributes,
310 GLenum mode,
311 const AttribIndexArray &sortedSemanticIndices,
312 const DrawCallVertexParams &vertexParams)
313 {
314 gl::Program *program = state.getProgram();
315 const auto &shaderAttributes = program->getAttributes();
316 PackedAttributeLayout layout;
317
318 ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
319 bool programUsesInstancedPointSprites =
320 programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
321 bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
322
323 if (programUsesInstancedPointSprites)
324 {
325 layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES;
326 }
327
328 if (instancedPointSpritesActive)
329 {
330 layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE;
331 }
332
333 if (vertexParams.instances() > 0)
334 {
335 layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_RENDERING_ACTIVE;
336 }
337
338 const auto &attribs = state.getVertexArray()->getVertexAttributes();
339 const auto &bindings = state.getVertexArray()->getVertexBindings();
340 const auto &locationToSemantic = programD3D->getAttribLocationToD3DSemantics();
341 int divisorMultiplier = program->usesMultiview() ? program->getNumViews() : 1;
342
343 for (size_t attribIndex : program->getActiveAttribLocationsMask())
344 {
345 // Record the type of the associated vertex shader vector in our key
346 // This will prevent mismatched vertex shaders from using the same input layout
347 GLenum glslElementType = GetGLSLAttributeType(shaderAttributes, attribIndex);
348
349 const auto &attrib = attribs[attribIndex];
350 const auto &binding = bindings[attrib.bindingIndex];
351 int d3dSemantic = locationToSemantic[attribIndex];
352
353 const auto ¤tValue =
354 state.getVertexAttribCurrentValue(static_cast<unsigned int>(attribIndex));
355 gl::VertexFormatType vertexFormatType = gl::GetVertexFormatType(attrib, currentValue.Type);
356
357 layout.addAttributeData(glslElementType, d3dSemantic, vertexFormatType,
358 binding.getDivisor() * divisorMultiplier);
359 }
360
361 const d3d11::InputLayout *inputLayout = nullptr;
362 if (layout.numAttributes > 0 || layout.flags != 0)
363 {
364 auto it = mLayoutCache.Get(layout);
365 if (it != mLayoutCache.end())
366 {
367 inputLayout = &it->second;
368 }
369 else
370 {
371 angle::TrimCache(mLayoutCache.max_size() / 2, kGCLimit, "input layout", &mLayoutCache);
372
373 d3d11::InputLayout newInputLayout;
374 ANGLE_TRY(createInputLayout(renderer, sortedSemanticIndices, currentAttributes, mode,
375 program, vertexParams, &newInputLayout));
376
377 auto insertIt = mLayoutCache.Put(layout, std::move(newInputLayout));
378 inputLayout = &insertIt->second;
379 }
380 }
381
382 renderer->getStateManager()->setInputLayout(inputLayout);
383 return gl::NoError();
384 }
385
createInputLayout(Renderer11 * renderer,const AttribIndexArray & sortedSemanticIndices,const std::vector<const TranslatedAttribute * > & currentAttributes,GLenum mode,gl::Program * program,const DrawCallVertexParams & vertexParams,d3d11::InputLayout * inputLayoutOut)386 gl::Error InputLayoutCache::createInputLayout(
387 Renderer11 *renderer,
388 const AttribIndexArray &sortedSemanticIndices,
389 const std::vector<const TranslatedAttribute *> ¤tAttributes,
390 GLenum mode,
391 gl::Program *program,
392 const DrawCallVertexParams &vertexParams,
393 d3d11::InputLayout *inputLayoutOut)
394 {
395 ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
396 auto featureLevel = renderer->getRenderer11DeviceCaps().featureLevel;
397
398 bool programUsesInstancedPointSprites =
399 programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
400
401 unsigned int inputElementCount = 0;
402 std::array<D3D11_INPUT_ELEMENT_DESC, gl::MAX_VERTEX_ATTRIBS> inputElements;
403
404 for (size_t attribIndex = 0; attribIndex < currentAttributes.size(); ++attribIndex)
405 {
406 const auto &attrib = *currentAttributes[attribIndex];
407 const int sortedIndex = sortedSemanticIndices[attribIndex];
408
409 D3D11_INPUT_CLASSIFICATION inputClass =
410 attrib.divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA;
411
412 const auto &vertexFormatType =
413 gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType);
414 const auto &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, featureLevel);
415
416 auto *inputElement = &inputElements[inputElementCount];
417
418 inputElement->SemanticName = "TEXCOORD";
419 inputElement->SemanticIndex = sortedIndex;
420 inputElement->Format = vertexFormatInfo.nativeFormat;
421 inputElement->InputSlot = static_cast<UINT>(attribIndex);
422 inputElement->AlignedByteOffset = 0;
423 inputElement->InputSlotClass = inputClass;
424 inputElement->InstanceDataStepRate = attrib.divisor;
425
426 inputElementCount++;
427 }
428
429 // Instanced PointSprite emulation requires additional entries in the
430 // inputlayout to support the vertices that make up the pointsprite quad.
431 // We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the
432 // input layout must match the shader
433 if (programUsesInstancedPointSprites)
434 {
435 // On 9_3, we must ensure that slot 0 contains non-instanced data.
436 // If slot 0 currently contains instanced data then we swap it with a non-instanced element.
437 // Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3
438 // doesn't support OpenGL ES 3.0.
439 // As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced
440 // simultaneously, so a non-instanced element must exist.
441
442 GLsizei numIndicesPerInstance = 0;
443 if (vertexParams.instances() > 0)
444 {
445 // This may trigger an evaluation of the index range.
446 numIndicesPerInstance = vertexParams.vertexCount();
447 }
448
449 for (size_t elementIndex = 0; elementIndex < inputElementCount; ++elementIndex)
450 {
451 // If rendering points and instanced pointsprite emulation is being used, the
452 // inputClass is required to be configured as per instance data
453 if (mode == GL_POINTS)
454 {
455 inputElements[elementIndex].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
456 inputElements[elementIndex].InstanceDataStepRate = 1;
457 if (numIndicesPerInstance > 0 && currentAttributes[elementIndex]->divisor > 0)
458 {
459 inputElements[elementIndex].InstanceDataStepRate = numIndicesPerInstance;
460 }
461 }
462 inputElements[elementIndex].InputSlot++;
463 }
464
465 inputElements[inputElementCount].SemanticName = "SPRITEPOSITION";
466 inputElements[inputElementCount].SemanticIndex = 0;
467 inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32B32_FLOAT;
468 inputElements[inputElementCount].InputSlot = 0;
469 inputElements[inputElementCount].AlignedByteOffset = 0;
470 inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
471 inputElements[inputElementCount].InstanceDataStepRate = 0;
472 inputElementCount++;
473
474 inputElements[inputElementCount].SemanticName = "SPRITETEXCOORD";
475 inputElements[inputElementCount].SemanticIndex = 0;
476 inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32_FLOAT;
477 inputElements[inputElementCount].InputSlot = 0;
478 inputElements[inputElementCount].AlignedByteOffset = sizeof(float) * 3;
479 inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
480 inputElements[inputElementCount].InstanceDataStepRate = 0;
481 inputElementCount++;
482 }
483
484 ShaderExecutableD3D *shader = nullptr;
485 ANGLE_TRY(programD3D->getVertexExecutableForCachedInputLayout(&shader, nullptr));
486
487 ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader);
488
489 InputElementArray inputElementArray(inputElements.data(), inputElementCount);
490 ShaderData vertexShaderData(shader11->getFunction(), shader11->getLength());
491
492 ANGLE_TRY(renderer->allocateResource(inputElementArray, &vertexShaderData, inputLayoutOut));
493 return gl::NoError();
494 }
495
setCacheSize(size_t newCacheSize)496 void InputLayoutCache::setCacheSize(size_t newCacheSize)
497 {
498 // Forces a reset of the cache.
499 LayoutCache newCache(newCacheSize);
500 mLayoutCache.Swap(newCache);
501 }
502
503 } // namespace rx
504