1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17
18 namespace rx
19 {
20
21 // Some descriptor set and pipeline layout constants.
22 //
23 // The set/binding assignment is done as following:
24 //
25 // - Set 0 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
26 // only under rare circumstances, such as viewport or depth range change. However, there is only
27 // one binding in this set. This set is placed before Set 1 containing transform feedback
28 // buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
29 // Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
30 // subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
31 // have needed to invalidateGraphicsDriverUniforms().
32 // - Set 1 contains uniform blocks created to encompass default uniforms. 1 binding is used per
33 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
34 // - Set 2 contains all textures.
35 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
36 // buffers and images.
37
38 // ANGLE driver uniforms set index (binding is always 0):
39 enum DescriptorSetIndex : uint32_t
40 {
41 // All internal shaders assume there is only one descriptor set, indexed at 0
42 InternalShader = 0,
43
44 DriverUniforms = 0, // ANGLE driver uniforms set index
45 UniformsAndXfb, // Uniforms set index
46 Texture, // Textures set index
47 ShaderResource, // Other shader resources set index
48
49 InvalidEnum,
50 EnumCount = InvalidEnum,
51 };
52
53 namespace vk
54 {
55 class DynamicDescriptorPool;
56 class ImageHelper;
57 enum class ImageLayout;
58
59 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
60
61 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
62 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
63 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
64
65 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
66 #define SetBitField(lhs, rhs) \
67 do \
68 { \
69 auto ANGLE_LOCAL_VAR = rhs; \
70 lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
71 ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
72 } while (0)
73
74 // Packed Vk resource descriptions.
75 // Most Vk types use many more bits than required to represent the underlying data.
76 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
77 // hashing (and also needs to check equality) we can optimize these operations by
78 // using fewer bits. Hence the packed types.
79 //
80 // One implementation note: these types could potentially be improved by using even
81 // fewer bits. For example, boolean values could be represented by a single bit instead
82 // of a uint8_t. However at the current time there are concerns about the portability
83 // of bitfield operators, and complexity issues with using bit mask operations. This is
84 // something we will likely want to investigate as the Vulkan implementation progresses.
85 //
86 // Second implementation note: the struct packing is also a bit fragile, and some of the
87 // packing requirements depend on using alignas and field ordering to get the result of
88 // packing nicely into the desired space. This is something we could also potentially fix
89 // with a redesign to use bitfields or bit mask operations.
90
91 // Enable struct padding warnings for the code below since it is used in caches.
92 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
93
94 enum ResourceAccess
95 {
96 Unused,
97 ReadOnly,
98 Write,
99 };
100
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)101 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
102 {
103 if (newAccess > *oldAccess)
104 {
105 *oldAccess = newAccess;
106 }
107 }
108
109 enum RenderPassStoreOp
110 {
111 Store = VK_ATTACHMENT_STORE_OP_STORE,
112 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
113 NoneQCOM,
114 };
115 // ConvertRenderPassStoreOpToVkStoreOp rely on the fact that only NoneQCOM is different from VK
116 // enums.
117 static_assert(RenderPassStoreOp::NoneQCOM == 2, "ConvertRenderPassStoreOpToVkStoreOp must updated");
118
ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)119 inline VkAttachmentStoreOp ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)
120 {
121 return storeOp == RenderPassStoreOp::NoneQCOM ? VK_ATTACHMENT_STORE_OP_NONE_QCOM
122 : static_cast<VkAttachmentStoreOp>(storeOp);
123 }
124
125 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
126 // depth/stencil attachment and one depth/stencil resolve attachment.
127 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
128 template <typename T>
129 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
130 template <typename T>
131 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
132 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
133
134 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
135 template <typename T>
136 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
137 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
138
139 class alignas(4) RenderPassDesc final
140 {
141 public:
142 RenderPassDesc();
143 ~RenderPassDesc();
144 RenderPassDesc(const RenderPassDesc &other);
145 RenderPassDesc &operator=(const RenderPassDesc &other);
146
147 // Set format for an enabled GL color attachment.
148 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
149 // Mark a GL color attachment index as disabled.
150 void packColorAttachmentGap(size_t colorIndexGL);
151 // The caller must pack the depth/stencil attachment last, which is packed right after the color
152 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
153 void packDepthStencilAttachment(angle::FormatID angleFormatID);
154 void updateDepthStencilAccess(ResourceAccess access);
155 // Indicate that a color attachment should have a corresponding resolve attachment.
156 void packColorResolveAttachment(size_t colorIndexGL);
157 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
158 // temporarily pack a resolve attachment and then remove it.
159 void removeColorResolveAttachment(size_t colorIndexGL);
160 // Indicate that a color attachment should take its data from the resolve attachment initially.
161 void packColorUnresolveAttachment(size_t colorIndexGL);
162 void removeColorUnresolveAttachment(size_t colorIndexGL);
163 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
164 void packDepthStencilResolveAttachment(bool resolveDepth, bool resolveStencil);
165 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
166 // initially.
167 void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
168 void removeDepthStencilUnresolveAttachment();
169
170 size_t hash() const;
171
172 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()173 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()174 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
175
176 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()177 bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
hasColorResolveAttachment(size_t colorIndexGL)178 bool hasColorResolveAttachment(size_t colorIndexGL) const
179 {
180 return mColorResolveAttachmentMask.test(colorIndexGL);
181 }
getColorUnresolveAttachmentMask()182 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
183 {
184 return mColorUnresolveAttachmentMask;
185 }
hasColorUnresolveAttachment(size_t colorIndexGL)186 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
187 {
188 return mColorUnresolveAttachmentMask.test(colorIndexGL);
189 }
hasDepthStencilResolveAttachment()190 bool hasDepthStencilResolveAttachment() const
191 {
192 return (mAttachmentFormats.back() & (kResolveDepthFlag | kResolveStencilFlag)) != 0;
193 }
hasDepthResolveAttachment()194 bool hasDepthResolveAttachment() const
195 {
196 return (mAttachmentFormats.back() & kResolveDepthFlag) != 0;
197 }
hasStencilResolveAttachment()198 bool hasStencilResolveAttachment() const
199 {
200 return (mAttachmentFormats.back() & kResolveStencilFlag) != 0;
201 }
hasDepthStencilUnresolveAttachment()202 bool hasDepthStencilUnresolveAttachment() const
203 {
204 return (mAttachmentFormats.back() & (kUnresolveDepthFlag | kUnresolveStencilFlag)) != 0;
205 }
hasDepthUnresolveAttachment()206 bool hasDepthUnresolveAttachment() const
207 {
208 return (mAttachmentFormats.back() & kUnresolveDepthFlag) != 0;
209 }
hasStencilUnresolveAttachment()210 bool hasStencilUnresolveAttachment() const
211 {
212 return (mAttachmentFormats.back() & kUnresolveStencilFlag) != 0;
213 }
214
215 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
216 // color attachments.
217 size_t attachmentCount() const;
218
219 void setSamples(GLint samples);
220
samples()221 uint8_t samples() const { return 1u << mLogSamples; }
222
223 angle::FormatID operator[](size_t index) const
224 {
225 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
226
227 uint8_t format = mAttachmentFormats[index];
228 if (index >= depthStencilAttachmentIndex())
229 {
230 format &= kDepthStencilFormatStorageMask;
231 }
232 return static_cast<angle::FormatID>(format);
233 }
234
235 private:
236 // Store log(samples), to be able to store it in 3 bits.
237 uint8_t mLogSamples : 3;
238 uint8_t mColorAttachmentRange : 4;
239 uint8_t mHasDepthStencilAttachment : 1;
240
241 // Whether each color attachment has a corresponding resolve attachment. Color resolve
242 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
243 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
244 //
245 // Note that depth/stencil resolve attachments require VK_KHR_depth_stencil_resolve which is
246 // currently not well supported, so ANGLE always takes a fallback path for them. When a resolve
247 // path is implemented for depth/stencil attachments, another bit must be made free
248 // (mAttachmentFormats is one element too large, so there are 8 bits there to take).
249 gl::DrawBufferMask mColorResolveAttachmentMask;
250
251 // Whether each color attachment with a corresponding resolve attachment should be initialized
252 // with said resolve attachment in an initial subpass. This is an optimization to avoid
253 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
254 // render targets. This operation is referred to as "unresolve".
255 gl::DrawBufferMask mColorUnresolveAttachmentMask;
256
257 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
258 // attachment formats follow the last enabled color attachment. When creating a render pass,
259 // the disabled attachments are removed and the resulting attachments are packed.
260 //
261 // The attachment indices provided as input to various functions in this file are thus GL
262 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
263 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
264 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
265 // link between the two index spaces. The subpass declares attachment references with GL
266 // indices (which corresponds to the location decoration of shader outputs). The attachment
267 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
268 //
269 // For example, if GL uses color attachments 0 and 3, then there are two render pass
270 // attachments (indexed 0 and 1) and 4 subpass attachments:
271 //
272 // - Subpass attachment 0 -> Renderpass attachment 0
273 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
274 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
275 // - Subpass attachment 3 -> Renderpass attachment 1
276 //
277 // The resolve attachments are packed after the non-resolve attachments. They use the same
278 // formats, so they are not specified in this array.
279 //
280 // The depth/stencil angle::FormatID values are in the range [1, 7], and therefore require only
281 // 3 bits to be stored. As a result, the upper 5 bits of mAttachmentFormats.back() is free to
282 // use for other purposes.
283 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
284
285 // Depth/stencil format is stored in 3 bits.
286 static constexpr uint8_t kDepthStencilFormatStorageMask = 0x7;
287
288 // Flags stored in the upper 5 bits of mAttachmentFormats.back().
289 static constexpr uint8_t kResolveDepthFlag = 0x80;
290 static constexpr uint8_t kResolveStencilFlag = 0x40;
291 static constexpr uint8_t kUnresolveDepthFlag = 0x20;
292 static constexpr uint8_t kUnresolveStencilFlag = 0x10;
293 };
294
295 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
296
297 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
298 static_assert(kRenderPassDescSize == 12, "Size check failed");
299
300 struct PackedAttachmentOpsDesc final
301 {
302 // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 2].
303 uint16_t loadOp : 2;
304 uint16_t storeOp : 2;
305 uint16_t stencilLoadOp : 2;
306 uint16_t stencilStoreOp : 2;
307 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
308 // unclear whether the attachment was invalidated or not. This information is passed along here
309 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
310 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
311 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
312 // render pass compatibility rules.
313 uint16_t isInvalidated : 1;
314 uint16_t isStencilInvalidated : 1;
315 uint16_t padding1 : 6;
316
317 // 4-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
318 // any of the extension layouts, whose values start at 1'000'000'000.
319 uint16_t initialLayout : 4;
320 uint16_t finalLayout : 4;
321 uint16_t padding2 : 8;
322 };
323
324 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
325
326 class PackedAttachmentIndex;
327
328 class AttachmentOpsArray final
329 {
330 public:
331 AttachmentOpsArray();
332 ~AttachmentOpsArray();
333 AttachmentOpsArray(const AttachmentOpsArray &other);
334 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
335
336 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
337 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
338
339 // Initialize an attachment op with all load and store operations.
340 void initWithLoadStore(PackedAttachmentIndex index,
341 ImageLayout initialLayout,
342 ImageLayout finalLayout);
343
344 void setLayouts(PackedAttachmentIndex index,
345 ImageLayout initialLayout,
346 ImageLayout finalLayout);
347 void setOps(PackedAttachmentIndex index, VkAttachmentLoadOp loadOp, RenderPassStoreOp storeOp);
348 void setStencilOps(PackedAttachmentIndex index,
349 VkAttachmentLoadOp loadOp,
350 RenderPassStoreOp storeOp);
351
352 void setClearOp(PackedAttachmentIndex index);
353 void setClearStencilOp(PackedAttachmentIndex index);
354
355 size_t hash() const;
356
357 private:
358 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
359 };
360
361 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
362
363 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
364
365 struct PackedAttribDesc final
366 {
367 uint8_t format;
368 uint8_t divisor;
369
370 // Desktop drivers support
371 uint16_t offset : kAttributeOffsetMaxBits;
372
373 uint16_t compressed : 1;
374
375 // Although technically stride can be any value in ES 2.0, in practice supporting stride
376 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
377 // introduced in ES 3.1.
378 uint16_t stride;
379 };
380
381 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
382 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
383
384 struct VertexInputAttributes final
385 {
386 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
387 };
388
389 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
390 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
391
392 struct RasterizationStateBits final
393 {
394 // Note: Currently only 2 subpasses possible, so there are 5 bits in subpass that can be
395 // repurposed.
396 uint32_t subpass : 6;
397 uint32_t depthClampEnable : 1;
398 uint32_t rasterizationDiscardEnable : 1;
399 uint32_t polygonMode : 4;
400 uint32_t cullMode : 4;
401 uint32_t frontFace : 4;
402 uint32_t depthBiasEnable : 1;
403 uint32_t sampleShadingEnable : 1;
404 uint32_t alphaToCoverageEnable : 1;
405 uint32_t alphaToOneEnable : 1;
406 uint32_t rasterizationSamples : 8;
407 };
408
409 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
410 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
411
412 struct PackedRasterizationAndMultisampleStateInfo final
413 {
414 RasterizationStateBits bits;
415 // Padded to ensure there's no gaps in this structure or those that use it.
416 float minSampleShading;
417 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
418 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
419 float depthBiasClamp;
420 float depthBiasConstantFactor;
421 float depthBiasSlopeFactor;
422 float lineWidth;
423 };
424
425 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
426 sizeof(PackedRasterizationAndMultisampleStateInfo);
427 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
428
429 struct StencilOps final
430 {
431 uint8_t fail : 4;
432 uint8_t pass : 4;
433 uint8_t depthFail : 4;
434 uint8_t compare : 4;
435 };
436
437 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
438 static_assert(kStencilOpsSize == 2, "Size check failed");
439
440 struct PackedStencilOpState final
441 {
442 StencilOps ops;
443 uint8_t compareMask;
444 uint8_t writeMask;
445 };
446
447 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
448 static_assert(kPackedStencilOpSize == 4, "Size check failed");
449
450 struct DepthStencilEnableFlags final
451 {
452 uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
453 uint8_t depthWrite : 2;
454 uint8_t depthBoundsTest : 2;
455 uint8_t stencilTest : 2;
456 };
457
458 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
459 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
460
461 // We are borrowing three bits here for surface rotation, even though it has nothing to do with
462 // depth stencil.
463 struct DepthCompareOpAndSurfaceRotation final
464 {
465 uint8_t depthCompareOp : 4;
466 uint8_t surfaceRotation : 3;
467 uint8_t padding : 1;
468 };
469 constexpr size_t kDepthCompareOpAndSurfaceRotationSize = sizeof(DepthCompareOpAndSurfaceRotation);
470 static_assert(kDepthCompareOpAndSurfaceRotationSize == 1, "Size check failed");
471
472 struct PackedDepthStencilStateInfo final
473 {
474 DepthStencilEnableFlags enable;
475 uint8_t frontStencilReference;
476 uint8_t backStencilReference;
477 DepthCompareOpAndSurfaceRotation depthCompareOpAndSurfaceRotation;
478
479 float minDepthBounds;
480 float maxDepthBounds;
481 PackedStencilOpState front;
482 PackedStencilOpState back;
483 };
484
485 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
486 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
487 static_assert(static_cast<int>(SurfaceRotation::EnumCount) <= 8, "Size check failed");
488
489 struct LogicOpState final
490 {
491 uint8_t opEnable : 1;
492 uint8_t op : 7;
493 };
494
495 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
496 static_assert(kLogicOpStateSize == 1, "Size check failed");
497
498 struct PackedColorBlendAttachmentState final
499 {
500 uint16_t srcColorBlendFactor : 5;
501 uint16_t dstColorBlendFactor : 5;
502 uint16_t colorBlendOp : 6;
503 uint16_t srcAlphaBlendFactor : 5;
504 uint16_t dstAlphaBlendFactor : 5;
505 uint16_t alphaBlendOp : 6;
506 };
507
508 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
509 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
510
511 struct PrimitiveState final
512 {
513 uint16_t topology : 15;
514 uint16_t restartEnable : 1;
515 };
516
517 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
518 static_assert(kPrimitiveStateSize == 2, "Size check failed");
519
520 struct PackedInputAssemblyAndColorBlendStateInfo final
521 {
522 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
523 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
524 float blendConstants[4];
525 LogicOpState logic;
526 uint8_t blendEnableMask;
527 PrimitiveState primitive;
528 };
529
530 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
531 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
532 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
533
534 constexpr size_t kGraphicsPipelineDescSumOfSizes =
535 kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
536 kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
537 sizeof(VkRect2D);
538
539 // Number of dirty bits in the dirty bit set.
540 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
541 constexpr static size_t kNumGraphicsPipelineDirtyBits =
542 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
543 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
544
545 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
546 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
547
548 // State changes are applied through the update methods. Each update method can also have a
549 // sibling method that applies the update without marking a state transition. The non-transition
550 // update methods are used for internal shader pipelines. Not every non-transition update method
551 // is implemented yet as not every state is used in internal shaders.
552 class GraphicsPipelineDesc final
553 {
554 public:
555 // Use aligned allocation and free so we can use the alignas keyword.
556 void *operator new(std::size_t size);
557 void operator delete(void *ptr);
558
559 GraphicsPipelineDesc();
560 ~GraphicsPipelineDesc();
561 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
562 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
563
564 size_t hash() const;
565 bool operator==(const GraphicsPipelineDesc &other) const;
566
567 void initDefaults();
568
569 // For custom comparisons.
570 template <typename T>
getPtr()571 const T *getPtr() const
572 {
573 return reinterpret_cast<const T *>(this);
574 }
575
576 angle::Result initializePipeline(ContextVk *contextVk,
577 const vk::PipelineCache &pipelineCacheVk,
578 const RenderPass &compatibleRenderPass,
579 const PipelineLayout &pipelineLayout,
580 const gl::AttributesMask &activeAttribLocationsMask,
581 const gl::ComponentTypeMask &programAttribsTypeMask,
582 const ShaderModule *vertexModule,
583 const ShaderModule *fragmentModule,
584 const ShaderModule *geometryModule,
585 const vk::SpecializationConstants specConsts,
586 Pipeline *pipelineOut) const;
587
588 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
589 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
590 uint32_t attribIndex,
591 GLuint stride,
592 GLuint divisor,
593 angle::FormatID format,
594 bool compressed,
595 GLuint relativeOffset);
596
597 // Input assembly info
598 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
599 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
600 bool primitiveRestartEnabled);
601
602 // Raster states
603 void setCullMode(VkCullModeFlagBits cullMode);
604 void updateCullMode(GraphicsPipelineTransitionBits *transition,
605 const gl::RasterizerState &rasterState);
606 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
607 const gl::RasterizerState &rasterState,
608 bool invertFrontFace);
609 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
610 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
611 bool rasterizerDiscardEnabled);
612
613 // Multisample states
614 uint32_t getRasterizationSamples() const;
615 void setRasterizationSamples(uint32_t rasterizationSamples);
616 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
617 uint32_t rasterizationSamples);
618 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
619 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
620 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
621 uint32_t maskNumber,
622 uint32_t mask);
623
624 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
625
626 // RenderPass description.
getRenderPassDesc()627 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
628
629 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
630 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
631 const RenderPassDesc &renderPassDesc);
632
633 // Blend states
634 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
635 gl::DrawBufferMask blendEnabledMask);
636 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
637 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
638 const gl::BlendStateExt &blendStateExt);
639 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
640 const gl::BlendStateExt &blendStateExt);
641 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
642 const gl::DrawBufferMask &alphaMask,
643 const gl::DrawBufferMask &enabledDrawBuffers);
644 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
645 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
646 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
647 const gl::DrawBufferMask &alphaMask,
648 const gl::DrawBufferMask &enabledDrawBuffers);
649
650 // Depth/stencil states.
651 void setDepthTestEnabled(bool enabled);
652 void setDepthWriteEnabled(bool enabled);
653 void setDepthFunc(VkCompareOp op);
654 void setDepthClampEnabled(bool enabled);
655 void setStencilTestEnabled(bool enabled);
656 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
657 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
658 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
659 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
660 void setStencilFrontWriteMask(uint8_t mask);
661 void setStencilBackWriteMask(uint8_t mask);
662 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
663 const gl::DepthStencilState &depthStencilState,
664 const gl::Framebuffer *drawFramebuffer);
665 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
666 const gl::DepthStencilState &depthStencilState);
667 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
668 const gl::DepthStencilState &depthStencilState,
669 const gl::Framebuffer *drawFramebuffer);
670 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
671 const gl::DepthStencilState &depthStencilState,
672 const gl::Framebuffer *drawFramebuffer);
673 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
674 GLint ref,
675 const gl::DepthStencilState &depthStencilState);
676 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
677 GLint ref,
678 const gl::DepthStencilState &depthStencilState);
679 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
680 const gl::DepthStencilState &depthStencilState);
681 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
682 const gl::DepthStencilState &depthStencilState);
683 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
684 const gl::DepthStencilState &depthStencilState,
685 const gl::Framebuffer *drawFramebuffer);
686 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
687 const gl::DepthStencilState &depthStencilState,
688 const gl::Framebuffer *drawFramebuffer);
689
690 // Depth offset.
691 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
692 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
693 const gl::RasterizerState &rasterState);
694
695 // Viewport and scissor.
696 void setViewport(const VkViewport &viewport);
697 void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
698 void updateDepthRange(GraphicsPipelineTransitionBits *transition,
699 float nearPlane,
700 float farPlane);
701 void setDynamicScissor();
702 void setScissor(const VkRect2D &scissor);
703 void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
704
705 // Subpass
706 void resetSubpass(GraphicsPipelineTransitionBits *transition);
707 void nextSubpass(GraphicsPipelineTransitionBits *transition);
708 void setSubpass(uint32_t subpass);
709 uint32_t getSubpass() const;
710
711 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
712 const SurfaceRotation surfaceRotation);
getSurfaceRotation()713 SurfaceRotation getSurfaceRotation() const
714 {
715 return static_cast<SurfaceRotation>(
716 mDepthStencilStateInfo.depthCompareOpAndSurfaceRotation.surfaceRotation);
717 }
718
719 private:
720 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
721
722 VertexInputAttributes mVertexInputAttribs;
723 RenderPassDesc mRenderPassDesc;
724 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
725 PackedDepthStencilStateInfo mDepthStencilStateInfo;
726 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
727 VkViewport mViewport;
728 // The special value of .offset.x == INT_MIN for scissor implies dynamic scissor that needs to
729 // be set through vkCmdSetScissor.
730 VkRect2D mScissor;
731 };
732
733 // Verify the packed pipeline description has no gaps in the packing.
734 // This is not guaranteed by the spec, but is validated by a compile-time check.
735 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
736 // into uninitialized memory regions.
737 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
738 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
739
740 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
741 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
742 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
743
744 using DescriptorSetLayoutBindingVector =
745 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
746
747 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
748 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
749 // usage. In the future we could generalize this.
750 class DescriptorSetLayoutDesc final
751 {
752 public:
753 DescriptorSetLayoutDesc();
754 ~DescriptorSetLayoutDesc();
755 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
756 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
757
758 size_t hash() const;
759 bool operator==(const DescriptorSetLayoutDesc &other) const;
760
761 void update(uint32_t bindingIndex,
762 VkDescriptorType type,
763 uint32_t count,
764 VkShaderStageFlags stages,
765 const vk::Sampler *immutableSampler);
766
767 void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
768 std::vector<VkSampler> *immutableSamplers) const;
769
770 private:
771 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
772 // cache we would have an invalid handle here. Thus propose follow-up work:
773 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
774 struct PackedDescriptorSetBinding
775 {
776 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
777 uint8_t stages; // Stores a packed VkShaderStageFlags.
778 uint16_t count; // Stores a packed uint32_t descriptorCount.
779 uint32_t pad;
780 VkSampler immutableSampler;
781 };
782
783 // 4x 32bit
784 static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
785
786 // This is a compact representation of a descriptor set layout.
787 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
788 mPackedDescriptorSetLayout;
789 };
790
791 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
792 // This can be extended in the future.
793 constexpr size_t kMaxDescriptorSetLayouts = 4;
794
795 struct PackedPushConstantRange
796 {
797 uint32_t offset;
798 uint32_t size;
799 };
800
801 template <typename T>
802 using DescriptorSetLayoutArray = std::array<T, static_cast<size_t>(DescriptorSetIndex::EnumCount)>;
803 using DescriptorSetLayoutPointerArray =
804 DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
805 template <typename T>
806 using PushConstantRangeArray = gl::ShaderMap<T>;
807
808 class PipelineLayoutDesc final
809 {
810 public:
811 PipelineLayoutDesc();
812 ~PipelineLayoutDesc();
813 PipelineLayoutDesc(const PipelineLayoutDesc &other);
814 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
815
816 size_t hash() const;
817 bool operator==(const PipelineLayoutDesc &other) const;
818
819 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
820 const DescriptorSetLayoutDesc &desc);
821 void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
822
823 const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
824
825 private:
826 DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
827 PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
828
829 // Verify the arrays are properly packed.
830 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
831 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
832 "Unexpected size");
833 static_assert(sizeof(decltype(mPushConstantRanges)) ==
834 (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
835 "Unexpected size");
836 };
837
838 // Verify the structure is properly packed.
839 static_assert(sizeof(PipelineLayoutDesc) ==
840 (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
841 sizeof(gl::ShaderMap<PackedPushConstantRange>)),
842 "Unexpected Size");
843
844 // Packed sampler description for the sampler cache.
845 class SamplerDesc final
846 {
847 public:
848 SamplerDesc();
849 SamplerDesc(const angle::FeaturesVk &featuresVk,
850 const gl::SamplerState &samplerState,
851 bool stencilMode,
852 uint64_t externalFormat);
853 ~SamplerDesc();
854
855 SamplerDesc(const SamplerDesc &other);
856 SamplerDesc &operator=(const SamplerDesc &rhs);
857
858 void update(const angle::FeaturesVk &featuresVk,
859 const gl::SamplerState &samplerState,
860 bool stencilMode,
861 uint64_t externalFormat);
862 void reset();
863 angle::Result init(ContextVk *contextVk, vk::Sampler *sampler) const;
864
865 size_t hash() const;
866 bool operator==(const SamplerDesc &other) const;
867
868 private:
869 // 32*4 bits for floating point data.
870 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
871 float mMipLodBias;
872 float mMaxAnisotropy;
873 float mMinLod;
874 float mMaxLod;
875
876 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then mExternalFormat
877 // will be non-zero and match the external format as returned from
878 // vkGetAndroidHardwareBufferPropertiesANDROID.
879 // The externalFormat is guaranteed to be unique and any image with the same externalFormat can
880 // use the same conversion sampler. Thus externalFormat works as a Serial() used elsewhere in
881 // ANGLE.
882 uint64_t mExternalFormat;
883
884 // 16 bits for modes + states.
885 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
886 uint16_t mMagFilter : 1;
887 uint16_t mMinFilter : 1;
888 uint16_t mMipmapMode : 1;
889
890 // 3 bits per address mode (5 possible values)
891 uint16_t mAddressModeU : 3;
892 uint16_t mAddressModeV : 3;
893 uint16_t mAddressModeW : 3;
894
895 // 1 bit for compare enabled (2 possible values)
896 uint16_t mCompareEnabled : 1;
897
898 // 3 bits for compare op. (8 possible values)
899 uint16_t mCompareOp : 3;
900
901 // Border color and unnormalized coordinates implicitly set to contants.
902
903 // 48 extra bits reserved for future use.
904 uint16_t mReserved[3];
905 };
906
907 static_assert(sizeof(SamplerDesc) == 32, "Unexpected SamplerDesc size");
908
909 // Disable warnings about struct padding.
910 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
911
912 class PipelineHelper;
913
914 struct GraphicsPipelineTransition
915 {
916 GraphicsPipelineTransition();
917 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
918 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
919 const GraphicsPipelineDesc *desc,
920 PipelineHelper *pipeline);
921
922 GraphicsPipelineTransitionBits bits;
923 const GraphicsPipelineDesc *desc;
924 PipelineHelper *target;
925 };
926
927 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
928
929 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
930 const GraphicsPipelineTransition &other) = default;
931
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)932 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
933 GraphicsPipelineTransitionBits bits,
934 const GraphicsPipelineDesc *desc,
935 PipelineHelper *pipeline)
936 : bits(bits), desc(desc), target(pipeline)
937 {}
938
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)939 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
940 GraphicsPipelineTransitionBits bitsB,
941 const GraphicsPipelineDesc &descA,
942 const GraphicsPipelineDesc &descB)
943 {
944 if (bitsA != bitsB)
945 return false;
946
947 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
948 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
949 // of the code faster. The for loop below would scan over twice as many bits per iteration.
950 // But there may be more collisions between the same dirty bit masks leading to different
951 // transitions. Thus there may be additional cost when applications use many transitions.
952 // We should revisit this in the future and investigate using different bit widths.
953 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
954
955 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
956 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
957
958 for (size_t dirtyBit : bitsA)
959 {
960 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
961 return false;
962 }
963
964 return true;
965 }
966
967 class PipelineHelper final : angle::NonCopyable
968 {
969 public:
970 PipelineHelper();
971 ~PipelineHelper();
972 inline explicit PipelineHelper(Pipeline &&pipeline);
973
974 void destroy(VkDevice device);
975
updateSerial(Serial serial)976 void updateSerial(Serial serial) { mSerial = serial; }
valid()977 bool valid() const { return mPipeline.valid(); }
getSerial()978 Serial getSerial() const { return mSerial; }
getPipeline()979 Pipeline &getPipeline() { return mPipeline; }
980
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)981 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
982 const GraphicsPipelineDesc &desc,
983 PipelineHelper **pipelineOut) const
984 {
985 // Search could be improved using sorting or hashing.
986 for (const GraphicsPipelineTransition &transition : mTransitions)
987 {
988 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
989 {
990 *pipelineOut = transition.target;
991 return true;
992 }
993 }
994
995 return false;
996 }
997
998 void addTransition(GraphicsPipelineTransitionBits bits,
999 const GraphicsPipelineDesc *desc,
1000 PipelineHelper *pipeline);
1001
1002 private:
1003 std::vector<GraphicsPipelineTransition> mTransitions;
1004 Serial mSerial;
1005 Pipeline mPipeline;
1006 };
1007
PipelineHelper(Pipeline && pipeline)1008 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
1009
1010 struct ImageSubresourceRange
1011 {
1012 uint16_t level : 10; // GL max is 1000 (fits in 10 bits).
1013 uint16_t levelCount : 6; // Max 63 levels (2 ** 6 - 1). If we need more, take from layer.
1014 uint16_t layer : 13; // Implementation max is 2048 (11 bits).
1015 uint16_t singleLayer : 1; // true/false only. Not possible to use sub-slices of levels.
1016 uint16_t srgbDecodeMode : 1; // Values from vk::SrgbDecodeMode.
1017 uint16_t srgbOverrideMode : 1; // Values from gl::SrgbOverride, either Default or SRGB.
1018 };
1019
1020 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1021
1022 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1023
1024 struct ImageViewSubresourceSerial
1025 {
1026 ImageViewSerial imageViewSerial;
1027 ImageSubresourceRange subresource;
1028 };
1029
1030 static_assert(sizeof(ImageViewSubresourceSerial) == sizeof(uint64_t), "Size mismatch");
1031
1032 constexpr ImageViewSubresourceSerial kInvalidImageViewSubresourceSerial = {
1033 kInvalidImageViewSerial, kInvalidImageSubresourceRange};
1034
1035 class TextureDescriptorDesc
1036 {
1037 public:
1038 TextureDescriptorDesc();
1039 ~TextureDescriptorDesc();
1040
1041 TextureDescriptorDesc(const TextureDescriptorDesc &other);
1042 TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
1043
1044 void update(size_t index,
1045 ImageViewSubresourceSerial imageViewSerial,
1046 SamplerSerial samplerSerial);
1047 size_t hash() const;
1048 void reset();
1049
1050 bool operator==(const TextureDescriptorDesc &other) const;
1051
1052 // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()1053 uint32_t getMaxIndex() const { return mMaxIndex; }
1054
1055 private:
1056 uint32_t mMaxIndex;
1057
1058 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1059 struct TexUnitSerials
1060 {
1061 ImageViewSubresourceSerial imageView;
1062 SamplerSerial sampler;
1063 };
1064 gl::ActiveTextureArray<TexUnitSerials> mSerials;
1065 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1066 };
1067
1068 class UniformsAndXfbDesc
1069 {
1070 public:
1071 UniformsAndXfbDesc();
1072 ~UniformsAndXfbDesc();
1073
1074 UniformsAndXfbDesc(const UniformsAndXfbDesc &other);
1075 UniformsAndXfbDesc &operator=(const UniformsAndXfbDesc &other);
1076
getDefaultUniformBufferSerial()1077 BufferSerial getDefaultUniformBufferSerial() const
1078 {
1079 return mBufferSerials[kDefaultUniformBufferIndex];
1080 }
updateDefaultUniformBuffer(BufferSerial bufferSerial)1081 void updateDefaultUniformBuffer(BufferSerial bufferSerial)
1082 {
1083 mBufferSerials[kDefaultUniformBufferIndex] = bufferSerial;
1084 mBufferCount = std::max(mBufferCount, static_cast<uint32_t>(1));
1085 }
updateTransformFeedbackBuffer(size_t xfbIndex,BufferSerial bufferSerial)1086 void updateTransformFeedbackBuffer(size_t xfbIndex, BufferSerial bufferSerial)
1087 {
1088 uint32_t bufferIndex = static_cast<uint32_t>(xfbIndex) + 1;
1089 mBufferSerials[bufferIndex] = bufferSerial;
1090 mBufferCount = std::max(mBufferCount, (bufferIndex + 1));
1091 }
1092 size_t hash() const;
1093 void reset();
1094
1095 bool operator==(const UniformsAndXfbDesc &other) const;
1096
1097 private:
1098 uint32_t mBufferCount;
1099 // The array index 0 is used for default uniform buffer
1100 static constexpr size_t kDefaultUniformBufferIndex = 0;
1101 static constexpr size_t kMaxBufferCount = 1 + gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_BUFFERS;
1102 std::array<BufferSerial, kMaxBufferCount> mBufferSerials;
1103 };
1104
1105 // In the FramebufferDesc object:
1106 // - Depth/stencil serial is at index 0
1107 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1108 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1109 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1110 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1111 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1112 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
1113 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1114 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1115 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1116 kFramebufferDescDepthStencilResolveIndexOffset + 1;
1117
1118 // Enable struct padding warnings for the code below since it is used in caches.
1119 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1120
1121 class FramebufferDesc
1122 {
1123 public:
1124 FramebufferDesc();
1125 ~FramebufferDesc();
1126
1127 FramebufferDesc(const FramebufferDesc &other);
1128 FramebufferDesc &operator=(const FramebufferDesc &other);
1129
1130 void updateColor(uint32_t index, ImageViewSubresourceSerial serial);
1131 void updateColorResolve(uint32_t index, ImageViewSubresourceSerial serial);
1132 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1133 void updateDepthStencil(ImageViewSubresourceSerial serial);
1134 void updateDepthStencilResolve(ImageViewSubresourceSerial serial);
1135 size_t hash() const;
1136
1137 bool operator==(const FramebufferDesc &other) const;
1138
1139 uint32_t attachmentCount() const;
1140
getColorImageViewSerial(uint32_t index)1141 ImageViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1142 {
1143 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1144 return mSerials[kFramebufferDescColorIndexOffset + index];
1145 }
1146
1147 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
1148
1149 private:
1150 void reset();
1151 void update(uint32_t index, ImageViewSubresourceSerial serial);
1152
1153 // Note: this is an exclusive index. If there is one index it will be "1".
1154 uint16_t mMaxIndex;
1155
1156 // If the render pass contains an initial subpass to unresolve a number of attachments, the
1157 // subpass description is derived from the following mask, specifying which attachments need
1158 // to be unresolved. Includes both color and depth/stencil attachments.
1159 FramebufferNonResolveAttachmentMask mUnresolveAttachmentMask;
1160
1161 FramebufferAttachmentArray<ImageViewSubresourceSerial> mSerials;
1162 };
1163
1164 // Disable warnings about struct padding.
1165 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1166
1167 // The SamplerHelper allows a Sampler to be coupled with a serial.
1168 // Must be included before we declare SamplerCache.
1169 class SamplerHelper final : angle::NonCopyable
1170 {
1171 public:
1172 SamplerHelper(ContextVk *contextVk);
1173 ~SamplerHelper();
1174
1175 explicit SamplerHelper(SamplerHelper &&samplerHelper);
1176 SamplerHelper &operator=(SamplerHelper &&rhs);
1177
valid()1178 bool valid() const { return mSampler.valid(); }
get()1179 const Sampler &get() const { return mSampler; }
get()1180 Sampler &get() { return mSampler; }
getSamplerSerial()1181 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1182
1183 private:
1184 Sampler mSampler;
1185 SamplerSerial mSamplerSerial;
1186 };
1187
1188 using RefCountedSampler = RefCounted<SamplerHelper>;
1189 using SamplerBinding = BindingPointer<SamplerHelper>;
1190
1191 class RenderPassHelper final : angle::NonCopyable
1192 {
1193 public:
1194 RenderPassHelper();
1195 ~RenderPassHelper();
1196
1197 RenderPassHelper(RenderPassHelper &&other);
1198 RenderPassHelper &operator=(RenderPassHelper &&other);
1199
1200 void destroy(VkDevice device);
1201
1202 const RenderPass &getRenderPass() const;
1203 RenderPass &getRenderPass();
1204
1205 const RenderPassPerfCounters &getPerfCounters() const;
1206 RenderPassPerfCounters &getPerfCounters();
1207
1208 private:
1209 RenderPass mRenderPass;
1210 RenderPassPerfCounters mPerfCounters;
1211 };
1212 } // namespace vk
1213 } // namespace rx
1214
1215 // Introduce std::hash for the above classes.
1216 namespace std
1217 {
1218 template <>
1219 struct hash<rx::vk::RenderPassDesc>
1220 {
1221 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
1222 };
1223
1224 template <>
1225 struct hash<rx::vk::AttachmentOpsArray>
1226 {
1227 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
1228 };
1229
1230 template <>
1231 struct hash<rx::vk::GraphicsPipelineDesc>
1232 {
1233 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
1234 };
1235
1236 template <>
1237 struct hash<rx::vk::DescriptorSetLayoutDesc>
1238 {
1239 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
1240 };
1241
1242 template <>
1243 struct hash<rx::vk::PipelineLayoutDesc>
1244 {
1245 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
1246 };
1247
1248 template <>
1249 struct hash<rx::vk::TextureDescriptorDesc>
1250 {
1251 size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
1252 };
1253
1254 template <>
1255 struct hash<rx::vk::UniformsAndXfbDesc>
1256 {
1257 size_t operator()(const rx::vk::UniformsAndXfbDesc &key) const { return key.hash(); }
1258 };
1259
1260 template <>
1261 struct hash<rx::vk::FramebufferDesc>
1262 {
1263 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
1264 };
1265
1266 template <>
1267 struct hash<rx::vk::SamplerDesc>
1268 {
1269 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
1270 };
1271
1272 // See Resource Serial types defined in vk_utils.h.
1273 #define ANGLE_HASH_VK_SERIAL(Type) \
1274 template <> \
1275 struct hash<rx::vk::Type##Serial> \
1276 { \
1277 size_t operator()(const rx::vk::Type##Serial &key) const { return key.getValue(); } \
1278 };
1279
1280 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
1281
1282 } // namespace std
1283
1284 namespace rx
1285 {
1286 // TODO(jmadill): Add cache trimming/eviction.
1287 class RenderPassCache final : angle::NonCopyable
1288 {
1289 public:
1290 RenderPassCache();
1291 ~RenderPassCache();
1292
1293 void destroy(VkDevice device);
1294
1295 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
1296 const vk::RenderPassDesc &desc,
1297 vk::RenderPass **renderPassOut)
1298 {
1299 auto outerIt = mPayload.find(desc);
1300 if (outerIt != mPayload.end())
1301 {
1302 InnerCache &innerCache = outerIt->second;
1303 ASSERT(!innerCache.empty());
1304
1305 // Find the first element and return it.
1306 *renderPassOut = &innerCache.begin()->second.getRenderPass();
1307 return angle::Result::Continue;
1308 }
1309
1310 return addRenderPass(contextVk, desc, renderPassOut);
1311 }
1312
1313 angle::Result getRenderPassWithOps(ContextVk *contextVk,
1314 const vk::RenderPassDesc &desc,
1315 const vk::AttachmentOpsArray &attachmentOps,
1316 vk::RenderPass **renderPassOut);
1317
1318 private:
1319 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
1320 const vk::RenderPassDesc &desc,
1321 const vk::AttachmentOpsArray &attachmentOps,
1322 bool updatePerfCounters,
1323 vk::RenderPass **renderPassOut);
1324
1325 angle::Result addRenderPass(ContextVk *contextVk,
1326 const vk::RenderPassDesc &desc,
1327 vk::RenderPass **renderPassOut);
1328
1329 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
1330 // The second layer caches the attachment load/store ops and initial/final layout.
1331 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
1332 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
1333
1334 OuterCache mPayload;
1335 };
1336
1337 // TODO(jmadill): Add cache trimming/eviction.
1338 class GraphicsPipelineCache final : angle::NonCopyable
1339 {
1340 public:
1341 GraphicsPipelineCache();
1342 ~GraphicsPipelineCache();
1343
1344 void destroy(VkDevice device);
1345 void release(ContextVk *context);
1346
1347 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
1348
1349 ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
1350 const vk::PipelineCache &pipelineCacheVk,
1351 const vk::RenderPass &compatibleRenderPass,
1352 const vk::PipelineLayout &pipelineLayout,
1353 const gl::AttributesMask &activeAttribLocationsMask,
1354 const gl::ComponentTypeMask &programAttribsTypeMask,
1355 const vk::ShaderModule *vertexModule,
1356 const vk::ShaderModule *fragmentModule,
1357 const vk::ShaderModule *geometryModule,
1358 const vk::SpecializationConstants specConsts,
1359 const vk::GraphicsPipelineDesc &desc,
1360 const vk::GraphicsPipelineDesc **descPtrOut,
1361 vk::PipelineHelper **pipelineOut)
1362 {
1363 auto item = mPayload.find(desc);
1364 if (item != mPayload.end())
1365 {
1366 *descPtrOut = &item->first;
1367 *pipelineOut = &item->second;
1368 return angle::Result::Continue;
1369 }
1370
1371 return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
1372 activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
1373 fragmentModule, geometryModule, specConsts, desc, descPtrOut,
1374 pipelineOut);
1375 }
1376
1377 private:
1378 angle::Result insertPipeline(ContextVk *contextVk,
1379 const vk::PipelineCache &pipelineCacheVk,
1380 const vk::RenderPass &compatibleRenderPass,
1381 const vk::PipelineLayout &pipelineLayout,
1382 const gl::AttributesMask &activeAttribLocationsMask,
1383 const gl::ComponentTypeMask &programAttribsTypeMask,
1384 const vk::ShaderModule *vertexModule,
1385 const vk::ShaderModule *fragmentModule,
1386 const vk::ShaderModule *geometryModule,
1387 const vk::SpecializationConstants specConsts,
1388 const vk::GraphicsPipelineDesc &desc,
1389 const vk::GraphicsPipelineDesc **descPtrOut,
1390 vk::PipelineHelper **pipelineOut);
1391
1392 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
1393 };
1394
1395 class DescriptorSetLayoutCache final : angle::NonCopyable
1396 {
1397 public:
1398 DescriptorSetLayoutCache();
1399 ~DescriptorSetLayoutCache();
1400
1401 void destroy(VkDevice device);
1402
1403 angle::Result getDescriptorSetLayout(
1404 vk::Context *context,
1405 const vk::DescriptorSetLayoutDesc &desc,
1406 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
1407
1408 private:
1409 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
1410 };
1411
1412 class PipelineLayoutCache final : angle::NonCopyable
1413 {
1414 public:
1415 PipelineLayoutCache();
1416 ~PipelineLayoutCache();
1417
1418 void destroy(VkDevice device);
1419
1420 angle::Result getPipelineLayout(vk::Context *context,
1421 const vk::PipelineLayoutDesc &desc,
1422 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1423 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1424
1425 private:
1426 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1427 };
1428
1429 class SamplerCache final : angle::NonCopyable
1430 {
1431 public:
1432 SamplerCache();
1433 ~SamplerCache();
1434
1435 void destroy(RendererVk *renderer);
1436
1437 angle::Result getSampler(ContextVk *contextVk,
1438 const vk::SamplerDesc &desc,
1439 vk::SamplerBinding *samplerOut);
1440
1441 private:
1442 std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1443 };
1444
1445 // YuvConversion Cache
1446 class SamplerYcbcrConversionCache final : angle::NonCopyable
1447 {
1448 public:
1449 SamplerYcbcrConversionCache();
1450 ~SamplerYcbcrConversionCache();
1451
1452 void destroy(RendererVk *render);
1453
1454 angle::Result getYuvConversion(
1455 vk::Context *context,
1456 uint64_t externalFormat,
1457 const VkSamplerYcbcrConversionCreateInfo &yuvConversionCreateInfo,
1458 vk::BindingPointer<vk::SamplerYcbcrConversion> *yuvConversionOut);
1459 VkSamplerYcbcrConversion getYuvConversionFromExternalFormat(uint64_t externalFormat) const;
1460
1461 private:
1462 std::unordered_map<uint64_t, vk::RefCountedSamplerYcbcrConversion> mPayload;
1463 };
1464
1465 // Only 1 driver uniform binding is used.
1466 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1467 // There is 1 default uniform binding used per stage. Currently, a maxium of three stages are
1468 // supported.
1469 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1470 constexpr uint32_t kReservedDefaultUniformBindingCount = 3;
1471 } // namespace rx
1472
1473 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1474