1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17 
18 namespace rx
19 {
20 
21 // Some descriptor set and pipeline layout constants.
22 //
23 // The set/binding assignment is done as following:
24 //
25 // - Set 0 contains the ANGLE driver uniforms at binding 0.  Note that driver uniforms are updated
26 //   only under rare circumstances, such as viewport or depth range change.  However, there is only
27 //   one binding in this set.  This set is placed before Set 1 containing transform feedback
28 //   buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
29 //   Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
30 //   subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
31 //   have needed to invalidateGraphicsDriverUniforms().
32 // - Set 1 contains uniform blocks created to encompass default uniforms.  1 binding is used per
33 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
34 // - Set 2 contains all textures.
35 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
36 //   buffers and images.
37 
38 // ANGLE driver uniforms set index (binding is always 0):
39 enum DescriptorSetIndex : uint32_t
40 {
41     // All internal shaders assume there is only one descriptor set, indexed at 0
42     InternalShader = 0,
43 
44     DriverUniforms = 0,  // ANGLE driver uniforms set index
45     UniformsAndXfb,      // Uniforms set index
46     Texture,             // Textures set index
47     ShaderResource,      // Other shader resources set index
48 
49     InvalidEnum,
50     EnumCount = InvalidEnum,
51 };
52 
53 namespace vk
54 {
55 class DynamicDescriptorPool;
56 class ImageHelper;
57 enum class ImageLayout;
58 
59 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
60 
61 using RefCountedDescriptorSetLayout    = RefCounted<DescriptorSetLayout>;
62 using RefCountedPipelineLayout         = RefCounted<PipelineLayout>;
63 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
64 
65 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
66 #define SetBitField(lhs, rhs)                                                         \
67     do                                                                                \
68     {                                                                                 \
69         auto ANGLE_LOCAL_VAR = rhs;                                                   \
70         lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
71         ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR);       \
72     } while (0)
73 
74 // Packed Vk resource descriptions.
75 // Most Vk types use many more bits than required to represent the underlying data.
76 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
77 // hashing (and also needs to check equality) we can optimize these operations by
78 // using fewer bits. Hence the packed types.
79 //
80 // One implementation note: these types could potentially be improved by using even
81 // fewer bits. For example, boolean values could be represented by a single bit instead
82 // of a uint8_t. However at the current time there are concerns about the portability
83 // of bitfield operators, and complexity issues with using bit mask operations. This is
84 // something we will likely want to investigate as the Vulkan implementation progresses.
85 //
86 // Second implementation note: the struct packing is also a bit fragile, and some of the
87 // packing requirements depend on using alignas and field ordering to get the result of
88 // packing nicely into the desired space. This is something we could also potentially fix
89 // with a redesign to use bitfields or bit mask operations.
90 
91 // Enable struct padding warnings for the code below since it is used in caches.
92 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
93 
94 enum ResourceAccess
95 {
96     Unused,
97     ReadOnly,
98     Write,
99 };
100 
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)101 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
102 {
103     if (newAccess > *oldAccess)
104     {
105         *oldAccess = newAccess;
106     }
107 }
108 
109 enum RenderPassStoreOp
110 {
111     Store    = VK_ATTACHMENT_STORE_OP_STORE,
112     DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
113     NoneQCOM,
114 };
115 // ConvertRenderPassStoreOpToVkStoreOp rely on the fact that only NoneQCOM is different from VK
116 // enums.
117 static_assert(RenderPassStoreOp::NoneQCOM == 2, "ConvertRenderPassStoreOpToVkStoreOp must updated");
118 
ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)119 inline VkAttachmentStoreOp ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)
120 {
121     return storeOp == RenderPassStoreOp::NoneQCOM ? VK_ATTACHMENT_STORE_OP_NONE_QCOM
122                                                   : static_cast<VkAttachmentStoreOp>(storeOp);
123 }
124 
125 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
126 // depth/stencil attachment and one depth/stencil resolve attachment.
127 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
128 template <typename T>
129 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
130 template <typename T>
131 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
132 using FramebufferAttachmentMask    = angle::BitSet<kMaxFramebufferAttachments>;
133 
134 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
135 template <typename T>
136 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
137 using FramebufferNonResolveAttachmentMask  = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
138 
139 class alignas(4) RenderPassDesc final
140 {
141   public:
142     RenderPassDesc();
143     ~RenderPassDesc();
144     RenderPassDesc(const RenderPassDesc &other);
145     RenderPassDesc &operator=(const RenderPassDesc &other);
146 
147     // Set format for an enabled GL color attachment.
148     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
149     // Mark a GL color attachment index as disabled.
150     void packColorAttachmentGap(size_t colorIndexGL);
151     // The caller must pack the depth/stencil attachment last, which is packed right after the color
152     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
153     void packDepthStencilAttachment(angle::FormatID angleFormatID);
154     void updateDepthStencilAccess(ResourceAccess access);
155     // Indicate that a color attachment should have a corresponding resolve attachment.
156     void packColorResolveAttachment(size_t colorIndexGL);
157     // Remove the resolve attachment.  Used when optimizing blit through resolve attachment to
158     // temporarily pack a resolve attachment and then remove it.
159     void removeColorResolveAttachment(size_t colorIndexGL);
160     // Indicate that a color attachment should take its data from the resolve attachment initially.
161     void packColorUnresolveAttachment(size_t colorIndexGL);
162     void removeColorUnresolveAttachment(size_t colorIndexGL);
163     // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
164     void packDepthStencilResolveAttachment(bool resolveDepth, bool resolveStencil);
165     // Indicate that a depth/stencil attachment should take its data from the resolve attachment
166     // initially.
167     void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
168     void removeDepthStencilUnresolveAttachment();
169 
170     size_t hash() const;
171 
172     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()173     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()174     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
175 
176     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()177     bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
hasColorResolveAttachment(size_t colorIndexGL)178     bool hasColorResolveAttachment(size_t colorIndexGL) const
179     {
180         return mColorResolveAttachmentMask.test(colorIndexGL);
181     }
getColorUnresolveAttachmentMask()182     gl::DrawBufferMask getColorUnresolveAttachmentMask() const
183     {
184         return mColorUnresolveAttachmentMask;
185     }
hasColorUnresolveAttachment(size_t colorIndexGL)186     bool hasColorUnresolveAttachment(size_t colorIndexGL) const
187     {
188         return mColorUnresolveAttachmentMask.test(colorIndexGL);
189     }
hasDepthStencilResolveAttachment()190     bool hasDepthStencilResolveAttachment() const
191     {
192         return (mAttachmentFormats.back() & (kResolveDepthFlag | kResolveStencilFlag)) != 0;
193     }
hasDepthResolveAttachment()194     bool hasDepthResolveAttachment() const
195     {
196         return (mAttachmentFormats.back() & kResolveDepthFlag) != 0;
197     }
hasStencilResolveAttachment()198     bool hasStencilResolveAttachment() const
199     {
200         return (mAttachmentFormats.back() & kResolveStencilFlag) != 0;
201     }
hasDepthStencilUnresolveAttachment()202     bool hasDepthStencilUnresolveAttachment() const
203     {
204         return (mAttachmentFormats.back() & (kUnresolveDepthFlag | kUnresolveStencilFlag)) != 0;
205     }
hasDepthUnresolveAttachment()206     bool hasDepthUnresolveAttachment() const
207     {
208         return (mAttachmentFormats.back() & kUnresolveDepthFlag) != 0;
209     }
hasStencilUnresolveAttachment()210     bool hasStencilUnresolveAttachment() const
211     {
212         return (mAttachmentFormats.back() & kUnresolveStencilFlag) != 0;
213     }
214 
215     // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
216     // color attachments.
217     size_t attachmentCount() const;
218 
219     void setSamples(GLint samples);
220 
samples()221     uint8_t samples() const { return 1u << mLogSamples; }
222 
223     angle::FormatID operator[](size_t index) const
224     {
225         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
226 
227         uint8_t format = mAttachmentFormats[index];
228         if (index >= depthStencilAttachmentIndex())
229         {
230             format &= kDepthStencilFormatStorageMask;
231         }
232         return static_cast<angle::FormatID>(format);
233     }
234 
235   private:
236     // Store log(samples), to be able to store it in 3 bits.
237     uint8_t mLogSamples : 3;
238     uint8_t mColorAttachmentRange : 4;
239     uint8_t mHasDepthStencilAttachment : 1;
240 
241     // Whether each color attachment has a corresponding resolve attachment.  Color resolve
242     // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
243     // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
244     //
245     // Note that depth/stencil resolve attachments require VK_KHR_depth_stencil_resolve which is
246     // currently not well supported, so ANGLE always takes a fallback path for them.  When a resolve
247     // path is implemented for depth/stencil attachments, another bit must be made free
248     // (mAttachmentFormats is one element too large, so there are 8 bits there to take).
249     gl::DrawBufferMask mColorResolveAttachmentMask;
250 
251     // Whether each color attachment with a corresponding resolve attachment should be initialized
252     // with said resolve attachment in an initial subpass.  This is an optimization to avoid
253     // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
254     // render targets.  This operation is referred to as "unresolve".
255     gl::DrawBufferMask mColorUnresolveAttachmentMask;
256 
257     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
258     // attachment formats follow the last enabled color attachment.  When creating a render pass,
259     // the disabled attachments are removed and the resulting attachments are packed.
260     //
261     // The attachment indices provided as input to various functions in this file are thus GL
262     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
263     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
264     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
265     // link between the two index spaces.  The subpass declares attachment references with GL
266     // indices (which corresponds to the location decoration of shader outputs).  The attachment
267     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
268     //
269     // For example, if GL uses color attachments 0 and 3, then there are two render pass
270     // attachments (indexed 0 and 1) and 4 subpass attachments:
271     //
272     //  - Subpass attachment 0 -> Renderpass attachment 0
273     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
274     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
275     //  - Subpass attachment 3 -> Renderpass attachment 1
276     //
277     // The resolve attachments are packed after the non-resolve attachments.  They use the same
278     // formats, so they are not specified in this array.
279     //
280     // The depth/stencil angle::FormatID values are in the range [1, 7], and therefore require only
281     // 3 bits to be stored.  As a result, the upper 5 bits of mAttachmentFormats.back() is free to
282     // use for other purposes.
283     FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
284 
285     // Depth/stencil format is stored in 3 bits.
286     static constexpr uint8_t kDepthStencilFormatStorageMask = 0x7;
287 
288     // Flags stored in the upper 5 bits of mAttachmentFormats.back().
289     static constexpr uint8_t kResolveDepthFlag     = 0x80;
290     static constexpr uint8_t kResolveStencilFlag   = 0x40;
291     static constexpr uint8_t kUnresolveDepthFlag   = 0x20;
292     static constexpr uint8_t kUnresolveStencilFlag = 0x10;
293 };
294 
295 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
296 
297 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
298 static_assert(kRenderPassDescSize == 12, "Size check failed");
299 
300 struct PackedAttachmentOpsDesc final
301 {
302     // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 2].
303     uint16_t loadOp : 2;
304     uint16_t storeOp : 2;
305     uint16_t stencilLoadOp : 2;
306     uint16_t stencilStoreOp : 2;
307     // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
308     // unclear whether the attachment was invalidated or not.  This information is passed along here
309     // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
310     // invalidated, and if possible removed from the list of resolve attachments altogether.  Note
311     // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
312     // render pass compatibility rules.
313     uint16_t isInvalidated : 1;
314     uint16_t isStencilInvalidated : 1;
315     uint16_t padding1 : 6;
316 
317     // 4-bits to force pad the structure to exactly 2 bytes.  Note that we currently don't support
318     // any of the extension layouts, whose values start at 1'000'000'000.
319     uint16_t initialLayout : 4;
320     uint16_t finalLayout : 4;
321     uint16_t padding2 : 8;
322 };
323 
324 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
325 
326 class PackedAttachmentIndex;
327 
328 class AttachmentOpsArray final
329 {
330   public:
331     AttachmentOpsArray();
332     ~AttachmentOpsArray();
333     AttachmentOpsArray(const AttachmentOpsArray &other);
334     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
335 
336     const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
337     PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
338 
339     // Initialize an attachment op with all load and store operations.
340     void initWithLoadStore(PackedAttachmentIndex index,
341                            ImageLayout initialLayout,
342                            ImageLayout finalLayout);
343 
344     void setLayouts(PackedAttachmentIndex index,
345                     ImageLayout initialLayout,
346                     ImageLayout finalLayout);
347     void setOps(PackedAttachmentIndex index, VkAttachmentLoadOp loadOp, RenderPassStoreOp storeOp);
348     void setStencilOps(PackedAttachmentIndex index,
349                        VkAttachmentLoadOp loadOp,
350                        RenderPassStoreOp storeOp);
351 
352     void setClearOp(PackedAttachmentIndex index);
353     void setClearStencilOp(PackedAttachmentIndex index);
354 
355     size_t hash() const;
356 
357   private:
358     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
359 };
360 
361 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
362 
363 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
364 
365 struct PackedAttribDesc final
366 {
367     uint8_t format;
368     uint8_t divisor;
369 
370     // Desktop drivers support
371     uint16_t offset : kAttributeOffsetMaxBits;
372 
373     uint16_t compressed : 1;
374 
375     // Although technically stride can be any value in ES 2.0, in practice supporting stride
376     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
377     // introduced in ES 3.1.
378     uint16_t stride;
379 };
380 
381 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
382 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
383 
384 struct VertexInputAttributes final
385 {
386     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
387 };
388 
389 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
390 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
391 
392 struct RasterizationStateBits final
393 {
394     // Note: Currently only 2 subpasses possible, so there are 5 bits in subpass that can be
395     // repurposed.
396     uint32_t subpass : 6;
397     uint32_t depthClampEnable : 1;
398     uint32_t rasterizationDiscardEnable : 1;
399     uint32_t polygonMode : 4;
400     uint32_t cullMode : 4;
401     uint32_t frontFace : 4;
402     uint32_t depthBiasEnable : 1;
403     uint32_t sampleShadingEnable : 1;
404     uint32_t alphaToCoverageEnable : 1;
405     uint32_t alphaToOneEnable : 1;
406     uint32_t rasterizationSamples : 8;
407 };
408 
409 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
410 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
411 
412 struct PackedRasterizationAndMultisampleStateInfo final
413 {
414     RasterizationStateBits bits;
415     // Padded to ensure there's no gaps in this structure or those that use it.
416     float minSampleShading;
417     uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
418     // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
419     float depthBiasClamp;
420     float depthBiasConstantFactor;
421     float depthBiasSlopeFactor;
422     float lineWidth;
423 };
424 
425 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
426     sizeof(PackedRasterizationAndMultisampleStateInfo);
427 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
428 
429 struct StencilOps final
430 {
431     uint8_t fail : 4;
432     uint8_t pass : 4;
433     uint8_t depthFail : 4;
434     uint8_t compare : 4;
435 };
436 
437 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
438 static_assert(kStencilOpsSize == 2, "Size check failed");
439 
440 struct PackedStencilOpState final
441 {
442     StencilOps ops;
443     uint8_t compareMask;
444     uint8_t writeMask;
445 };
446 
447 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
448 static_assert(kPackedStencilOpSize == 4, "Size check failed");
449 
450 struct DepthStencilEnableFlags final
451 {
452     uint8_t depthTest : 2;  // these only need one bit each. the extra is used as padding.
453     uint8_t depthWrite : 2;
454     uint8_t depthBoundsTest : 2;
455     uint8_t stencilTest : 2;
456 };
457 
458 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
459 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
460 
461 // We are borrowing three bits here for surface rotation, even though it has nothing to do with
462 // depth stencil.
463 struct DepthCompareOpAndSurfaceRotation final
464 {
465     uint8_t depthCompareOp : 4;
466     uint8_t surfaceRotation : 3;
467     uint8_t padding : 1;
468 };
469 constexpr size_t kDepthCompareOpAndSurfaceRotationSize = sizeof(DepthCompareOpAndSurfaceRotation);
470 static_assert(kDepthCompareOpAndSurfaceRotationSize == 1, "Size check failed");
471 
472 struct PackedDepthStencilStateInfo final
473 {
474     DepthStencilEnableFlags enable;
475     uint8_t frontStencilReference;
476     uint8_t backStencilReference;
477     DepthCompareOpAndSurfaceRotation depthCompareOpAndSurfaceRotation;
478 
479     float minDepthBounds;
480     float maxDepthBounds;
481     PackedStencilOpState front;
482     PackedStencilOpState back;
483 };
484 
485 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
486 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
487 static_assert(static_cast<int>(SurfaceRotation::EnumCount) <= 8, "Size check failed");
488 
489 struct LogicOpState final
490 {
491     uint8_t opEnable : 1;
492     uint8_t op : 7;
493 };
494 
495 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
496 static_assert(kLogicOpStateSize == 1, "Size check failed");
497 
498 struct PackedColorBlendAttachmentState final
499 {
500     uint16_t srcColorBlendFactor : 5;
501     uint16_t dstColorBlendFactor : 5;
502     uint16_t colorBlendOp : 6;
503     uint16_t srcAlphaBlendFactor : 5;
504     uint16_t dstAlphaBlendFactor : 5;
505     uint16_t alphaBlendOp : 6;
506 };
507 
508 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
509 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
510 
511 struct PrimitiveState final
512 {
513     uint16_t topology : 15;
514     uint16_t restartEnable : 1;
515 };
516 
517 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
518 static_assert(kPrimitiveStateSize == 2, "Size check failed");
519 
520 struct PackedInputAssemblyAndColorBlendStateInfo final
521 {
522     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
523     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
524     float blendConstants[4];
525     LogicOpState logic;
526     uint8_t blendEnableMask;
527     PrimitiveState primitive;
528 };
529 
530 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
531     sizeof(PackedInputAssemblyAndColorBlendStateInfo);
532 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
533 
534 constexpr size_t kGraphicsPipelineDescSumOfSizes =
535     kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
536     kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
537     sizeof(VkRect2D);
538 
539 // Number of dirty bits in the dirty bit set.
540 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
541 constexpr static size_t kNumGraphicsPipelineDirtyBits =
542     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
543 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
544 
545 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
546 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
547 
548 // State changes are applied through the update methods. Each update method can also have a
549 // sibling method that applies the update without marking a state transition. The non-transition
550 // update methods are used for internal shader pipelines. Not every non-transition update method
551 // is implemented yet as not every state is used in internal shaders.
552 class GraphicsPipelineDesc final
553 {
554   public:
555     // Use aligned allocation and free so we can use the alignas keyword.
556     void *operator new(std::size_t size);
557     void operator delete(void *ptr);
558 
559     GraphicsPipelineDesc();
560     ~GraphicsPipelineDesc();
561     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
562     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
563 
564     size_t hash() const;
565     bool operator==(const GraphicsPipelineDesc &other) const;
566 
567     void initDefaults();
568 
569     // For custom comparisons.
570     template <typename T>
getPtr()571     const T *getPtr() const
572     {
573         return reinterpret_cast<const T *>(this);
574     }
575 
576     angle::Result initializePipeline(ContextVk *contextVk,
577                                      const vk::PipelineCache &pipelineCacheVk,
578                                      const RenderPass &compatibleRenderPass,
579                                      const PipelineLayout &pipelineLayout,
580                                      const gl::AttributesMask &activeAttribLocationsMask,
581                                      const gl::ComponentTypeMask &programAttribsTypeMask,
582                                      const ShaderModule *vertexModule,
583                                      const ShaderModule *fragmentModule,
584                                      const ShaderModule *geometryModule,
585                                      const vk::SpecializationConstants specConsts,
586                                      Pipeline *pipelineOut) const;
587 
588     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
589     void updateVertexInput(GraphicsPipelineTransitionBits *transition,
590                            uint32_t attribIndex,
591                            GLuint stride,
592                            GLuint divisor,
593                            angle::FormatID format,
594                            bool compressed,
595                            GLuint relativeOffset);
596 
597     // Input assembly info
598     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
599     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
600                                        bool primitiveRestartEnabled);
601 
602     // Raster states
603     void setCullMode(VkCullModeFlagBits cullMode);
604     void updateCullMode(GraphicsPipelineTransitionBits *transition,
605                         const gl::RasterizerState &rasterState);
606     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
607                          const gl::RasterizerState &rasterState,
608                          bool invertFrontFace);
609     void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
610     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
611                                         bool rasterizerDiscardEnabled);
612 
613     // Multisample states
614     uint32_t getRasterizationSamples() const;
615     void setRasterizationSamples(uint32_t rasterizationSamples);
616     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
617                                     uint32_t rasterizationSamples);
618     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
619     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
620     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
621                           uint32_t maskNumber,
622                           uint32_t mask);
623 
624     void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
625 
626     // RenderPass description.
getRenderPassDesc()627     const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
628 
629     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
630     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
631                               const RenderPassDesc &renderPassDesc);
632 
633     // Blend states
634     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
635                             gl::DrawBufferMask blendEnabledMask);
636     void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
637     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
638                           const gl::BlendStateExt &blendStateExt);
639     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
640                               const gl::BlendStateExt &blendStateExt);
641     void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
642                             const gl::DrawBufferMask &alphaMask,
643                             const gl::DrawBufferMask &enabledDrawBuffers);
644     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
645     void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
646                                gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
647                                const gl::DrawBufferMask &alphaMask,
648                                const gl::DrawBufferMask &enabledDrawBuffers);
649 
650     // Depth/stencil states.
651     void setDepthTestEnabled(bool enabled);
652     void setDepthWriteEnabled(bool enabled);
653     void setDepthFunc(VkCompareOp op);
654     void setDepthClampEnabled(bool enabled);
655     void setStencilTestEnabled(bool enabled);
656     void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
657     void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
658     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
659     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
660     void setStencilFrontWriteMask(uint8_t mask);
661     void setStencilBackWriteMask(uint8_t mask);
662     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
663                                 const gl::DepthStencilState &depthStencilState,
664                                 const gl::Framebuffer *drawFramebuffer);
665     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
666                          const gl::DepthStencilState &depthStencilState);
667     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
668                                  const gl::DepthStencilState &depthStencilState,
669                                  const gl::Framebuffer *drawFramebuffer);
670     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
671                                   const gl::DepthStencilState &depthStencilState,
672                                   const gl::Framebuffer *drawFramebuffer);
673     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
674                                  GLint ref,
675                                  const gl::DepthStencilState &depthStencilState);
676     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
677                                 GLint ref,
678                                 const gl::DepthStencilState &depthStencilState);
679     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
680                                const gl::DepthStencilState &depthStencilState);
681     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
682                               const gl::DepthStencilState &depthStencilState);
683     void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
684                                      const gl::DepthStencilState &depthStencilState,
685                                      const gl::Framebuffer *drawFramebuffer);
686     void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
687                                     const gl::DepthStencilState &depthStencilState,
688                                     const gl::Framebuffer *drawFramebuffer);
689 
690     // Depth offset.
691     void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
692     void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
693                              const gl::RasterizerState &rasterState);
694 
695     // Viewport and scissor.
696     void setViewport(const VkViewport &viewport);
697     void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
698     void updateDepthRange(GraphicsPipelineTransitionBits *transition,
699                           float nearPlane,
700                           float farPlane);
701     void setDynamicScissor();
702     void setScissor(const VkRect2D &scissor);
703     void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
704 
705     // Subpass
706     void resetSubpass(GraphicsPipelineTransitionBits *transition);
707     void nextSubpass(GraphicsPipelineTransitionBits *transition);
708     void setSubpass(uint32_t subpass);
709     uint32_t getSubpass() const;
710 
711     void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
712                                const SurfaceRotation surfaceRotation);
getSurfaceRotation()713     SurfaceRotation getSurfaceRotation() const
714     {
715         return static_cast<SurfaceRotation>(
716             mDepthStencilStateInfo.depthCompareOpAndSurfaceRotation.surfaceRotation);
717     }
718 
719   private:
720     void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
721 
722     VertexInputAttributes mVertexInputAttribs;
723     RenderPassDesc mRenderPassDesc;
724     PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
725     PackedDepthStencilStateInfo mDepthStencilStateInfo;
726     PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
727     VkViewport mViewport;
728     // The special value of .offset.x == INT_MIN for scissor implies dynamic scissor that needs to
729     // be set through vkCmdSetScissor.
730     VkRect2D mScissor;
731 };
732 
733 // Verify the packed pipeline description has no gaps in the packing.
734 // This is not guaranteed by the spec, but is validated by a compile-time check.
735 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
736 // into uninitialized memory regions.
737 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
738 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
739 
740 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
741     std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
742              gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
743 
744 using DescriptorSetLayoutBindingVector =
745     angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
746 
747 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
748 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
749 // usage. In the future we could generalize this.
750 class DescriptorSetLayoutDesc final
751 {
752   public:
753     DescriptorSetLayoutDesc();
754     ~DescriptorSetLayoutDesc();
755     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
756     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
757 
758     size_t hash() const;
759     bool operator==(const DescriptorSetLayoutDesc &other) const;
760 
761     void update(uint32_t bindingIndex,
762                 VkDescriptorType type,
763                 uint32_t count,
764                 VkShaderStageFlags stages,
765                 const vk::Sampler *immutableSampler);
766 
767     void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
768                         std::vector<VkSampler> *immutableSamplers) const;
769 
770   private:
771     // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
772     // cache we would have an invalid handle here. Thus propose follow-up work:
773     // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
774     struct PackedDescriptorSetBinding
775     {
776         uint8_t type;    // Stores a packed VkDescriptorType descriptorType.
777         uint8_t stages;  // Stores a packed VkShaderStageFlags.
778         uint16_t count;  // Stores a packed uint32_t descriptorCount.
779         uint32_t pad;
780         VkSampler immutableSampler;
781     };
782 
783     // 4x 32bit
784     static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
785 
786     // This is a compact representation of a descriptor set layout.
787     std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
788         mPackedDescriptorSetLayout;
789 };
790 
791 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
792 // This can be extended in the future.
793 constexpr size_t kMaxDescriptorSetLayouts = 4;
794 
795 struct PackedPushConstantRange
796 {
797     uint32_t offset;
798     uint32_t size;
799 };
800 
801 template <typename T>
802 using DescriptorSetLayoutArray = std::array<T, static_cast<size_t>(DescriptorSetIndex::EnumCount)>;
803 using DescriptorSetLayoutPointerArray =
804     DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
805 template <typename T>
806 using PushConstantRangeArray = gl::ShaderMap<T>;
807 
808 class PipelineLayoutDesc final
809 {
810   public:
811     PipelineLayoutDesc();
812     ~PipelineLayoutDesc();
813     PipelineLayoutDesc(const PipelineLayoutDesc &other);
814     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
815 
816     size_t hash() const;
817     bool operator==(const PipelineLayoutDesc &other) const;
818 
819     void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
820                                    const DescriptorSetLayoutDesc &desc);
821     void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
822 
823     const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
824 
825   private:
826     DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
827     PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
828 
829     // Verify the arrays are properly packed.
830     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
831                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
832                   "Unexpected size");
833     static_assert(sizeof(decltype(mPushConstantRanges)) ==
834                       (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
835                   "Unexpected size");
836 };
837 
838 // Verify the structure is properly packed.
839 static_assert(sizeof(PipelineLayoutDesc) ==
840                   (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
841                    sizeof(gl::ShaderMap<PackedPushConstantRange>)),
842               "Unexpected Size");
843 
844 // Packed sampler description for the sampler cache.
845 class SamplerDesc final
846 {
847   public:
848     SamplerDesc();
849     SamplerDesc(const angle::FeaturesVk &featuresVk,
850                 const gl::SamplerState &samplerState,
851                 bool stencilMode,
852                 uint64_t externalFormat);
853     ~SamplerDesc();
854 
855     SamplerDesc(const SamplerDesc &other);
856     SamplerDesc &operator=(const SamplerDesc &rhs);
857 
858     void update(const angle::FeaturesVk &featuresVk,
859                 const gl::SamplerState &samplerState,
860                 bool stencilMode,
861                 uint64_t externalFormat);
862     void reset();
863     angle::Result init(ContextVk *contextVk, vk::Sampler *sampler) const;
864 
865     size_t hash() const;
866     bool operator==(const SamplerDesc &other) const;
867 
868   private:
869     // 32*4 bits for floating point data.
870     // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
871     float mMipLodBias;
872     float mMaxAnisotropy;
873     float mMinLod;
874     float mMaxLod;
875 
876     // If the sampler needs to convert the image content (e.g. from YUV to RGB) then mExternalFormat
877     // will be non-zero and match the external format as returned from
878     // vkGetAndroidHardwareBufferPropertiesANDROID.
879     // The externalFormat is guaranteed to be unique and any image with the same externalFormat can
880     // use the same conversion sampler. Thus externalFormat works as a Serial() used elsewhere in
881     // ANGLE.
882     uint64_t mExternalFormat;
883 
884     // 16 bits for modes + states.
885     // 1 bit per filter (only 2 possible values in GL: linear/nearest)
886     uint16_t mMagFilter : 1;
887     uint16_t mMinFilter : 1;
888     uint16_t mMipmapMode : 1;
889 
890     // 3 bits per address mode (5 possible values)
891     uint16_t mAddressModeU : 3;
892     uint16_t mAddressModeV : 3;
893     uint16_t mAddressModeW : 3;
894 
895     // 1 bit for compare enabled (2 possible values)
896     uint16_t mCompareEnabled : 1;
897 
898     // 3 bits for compare op. (8 possible values)
899     uint16_t mCompareOp : 3;
900 
901     // Border color and unnormalized coordinates implicitly set to contants.
902 
903     // 48 extra bits reserved for future use.
904     uint16_t mReserved[3];
905 };
906 
907 static_assert(sizeof(SamplerDesc) == 32, "Unexpected SamplerDesc size");
908 
909 // Disable warnings about struct padding.
910 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
911 
912 class PipelineHelper;
913 
914 struct GraphicsPipelineTransition
915 {
916     GraphicsPipelineTransition();
917     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
918     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
919                                const GraphicsPipelineDesc *desc,
920                                PipelineHelper *pipeline);
921 
922     GraphicsPipelineTransitionBits bits;
923     const GraphicsPipelineDesc *desc;
924     PipelineHelper *target;
925 };
926 
927 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
928 
929 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
930     const GraphicsPipelineTransition &other) = default;
931 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)932 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
933     GraphicsPipelineTransitionBits bits,
934     const GraphicsPipelineDesc *desc,
935     PipelineHelper *pipeline)
936     : bits(bits), desc(desc), target(pipeline)
937 {}
938 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)939 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
940                                                   GraphicsPipelineTransitionBits bitsB,
941                                                   const GraphicsPipelineDesc &descA,
942                                                   const GraphicsPipelineDesc &descB)
943 {
944     if (bitsA != bitsB)
945         return false;
946 
947     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
948     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
949     // of the code faster. The for loop below would scan over twice as many bits per iteration.
950     // But there may be more collisions between the same dirty bit masks leading to different
951     // transitions. Thus there may be additional cost when applications use many transitions.
952     // We should revisit this in the future and investigate using different bit widths.
953     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
954 
955     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
956     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
957 
958     for (size_t dirtyBit : bitsA)
959     {
960         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
961             return false;
962     }
963 
964     return true;
965 }
966 
967 class PipelineHelper final : angle::NonCopyable
968 {
969   public:
970     PipelineHelper();
971     ~PipelineHelper();
972     inline explicit PipelineHelper(Pipeline &&pipeline);
973 
974     void destroy(VkDevice device);
975 
updateSerial(Serial serial)976     void updateSerial(Serial serial) { mSerial = serial; }
valid()977     bool valid() const { return mPipeline.valid(); }
getSerial()978     Serial getSerial() const { return mSerial; }
getPipeline()979     Pipeline &getPipeline() { return mPipeline; }
980 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)981     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
982                                      const GraphicsPipelineDesc &desc,
983                                      PipelineHelper **pipelineOut) const
984     {
985         // Search could be improved using sorting or hashing.
986         for (const GraphicsPipelineTransition &transition : mTransitions)
987         {
988             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
989             {
990                 *pipelineOut = transition.target;
991                 return true;
992             }
993         }
994 
995         return false;
996     }
997 
998     void addTransition(GraphicsPipelineTransitionBits bits,
999                        const GraphicsPipelineDesc *desc,
1000                        PipelineHelper *pipeline);
1001 
1002   private:
1003     std::vector<GraphicsPipelineTransition> mTransitions;
1004     Serial mSerial;
1005     Pipeline mPipeline;
1006 };
1007 
PipelineHelper(Pipeline && pipeline)1008 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
1009 
1010 struct ImageSubresourceRange
1011 {
1012     uint16_t level : 10;            // GL max is 1000 (fits in 10 bits).
1013     uint16_t levelCount : 6;        // Max 63 levels (2 ** 6 - 1). If we need more, take from layer.
1014     uint16_t layer : 13;            // Implementation max is 2048 (11 bits).
1015     uint16_t singleLayer : 1;       // true/false only. Not possible to use sub-slices of levels.
1016     uint16_t srgbDecodeMode : 1;    // Values from vk::SrgbDecodeMode.
1017     uint16_t srgbOverrideMode : 1;  // Values from gl::SrgbOverride, either Default or SRGB.
1018 };
1019 
1020 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1021 
1022 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1023 
1024 struct ImageViewSubresourceSerial
1025 {
1026     ImageViewSerial imageViewSerial;
1027     ImageSubresourceRange subresource;
1028 };
1029 
1030 static_assert(sizeof(ImageViewSubresourceSerial) == sizeof(uint64_t), "Size mismatch");
1031 
1032 constexpr ImageViewSubresourceSerial kInvalidImageViewSubresourceSerial = {
1033     kInvalidImageViewSerial, kInvalidImageSubresourceRange};
1034 
1035 class TextureDescriptorDesc
1036 {
1037   public:
1038     TextureDescriptorDesc();
1039     ~TextureDescriptorDesc();
1040 
1041     TextureDescriptorDesc(const TextureDescriptorDesc &other);
1042     TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
1043 
1044     void update(size_t index,
1045                 ImageViewSubresourceSerial imageViewSerial,
1046                 SamplerSerial samplerSerial);
1047     size_t hash() const;
1048     void reset();
1049 
1050     bool operator==(const TextureDescriptorDesc &other) const;
1051 
1052     // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()1053     uint32_t getMaxIndex() const { return mMaxIndex; }
1054 
1055   private:
1056     uint32_t mMaxIndex;
1057 
1058     ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1059     struct TexUnitSerials
1060     {
1061         ImageViewSubresourceSerial imageView;
1062         SamplerSerial sampler;
1063     };
1064     gl::ActiveTextureArray<TexUnitSerials> mSerials;
1065     ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1066 };
1067 
1068 class UniformsAndXfbDesc
1069 {
1070   public:
1071     UniformsAndXfbDesc();
1072     ~UniformsAndXfbDesc();
1073 
1074     UniformsAndXfbDesc(const UniformsAndXfbDesc &other);
1075     UniformsAndXfbDesc &operator=(const UniformsAndXfbDesc &other);
1076 
getDefaultUniformBufferSerial()1077     BufferSerial getDefaultUniformBufferSerial() const
1078     {
1079         return mBufferSerials[kDefaultUniformBufferIndex];
1080     }
updateDefaultUniformBuffer(BufferSerial bufferSerial)1081     void updateDefaultUniformBuffer(BufferSerial bufferSerial)
1082     {
1083         mBufferSerials[kDefaultUniformBufferIndex] = bufferSerial;
1084         mBufferCount = std::max(mBufferCount, static_cast<uint32_t>(1));
1085     }
updateTransformFeedbackBuffer(size_t xfbIndex,BufferSerial bufferSerial)1086     void updateTransformFeedbackBuffer(size_t xfbIndex, BufferSerial bufferSerial)
1087     {
1088         uint32_t bufferIndex        = static_cast<uint32_t>(xfbIndex) + 1;
1089         mBufferSerials[bufferIndex] = bufferSerial;
1090         mBufferCount                = std::max(mBufferCount, (bufferIndex + 1));
1091     }
1092     size_t hash() const;
1093     void reset();
1094 
1095     bool operator==(const UniformsAndXfbDesc &other) const;
1096 
1097   private:
1098     uint32_t mBufferCount;
1099     // The array index 0 is used for default uniform buffer
1100     static constexpr size_t kDefaultUniformBufferIndex = 0;
1101     static constexpr size_t kMaxBufferCount = 1 + gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_BUFFERS;
1102     std::array<BufferSerial, kMaxBufferCount> mBufferSerials;
1103 };
1104 
1105 // In the FramebufferDesc object:
1106 //  - Depth/stencil serial is at index 0
1107 //  - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1108 //  - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1109 //  - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1110 //                                        gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1111 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1112 constexpr size_t kFramebufferDescColorIndexOffset  = kFramebufferDescDepthStencilIndex + 1;
1113 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1114     kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1115 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1116     kFramebufferDescDepthStencilResolveIndexOffset + 1;
1117 
1118 // Enable struct padding warnings for the code below since it is used in caches.
1119 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1120 
1121 class FramebufferDesc
1122 {
1123   public:
1124     FramebufferDesc();
1125     ~FramebufferDesc();
1126 
1127     FramebufferDesc(const FramebufferDesc &other);
1128     FramebufferDesc &operator=(const FramebufferDesc &other);
1129 
1130     void updateColor(uint32_t index, ImageViewSubresourceSerial serial);
1131     void updateColorResolve(uint32_t index, ImageViewSubresourceSerial serial);
1132     void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1133     void updateDepthStencil(ImageViewSubresourceSerial serial);
1134     void updateDepthStencilResolve(ImageViewSubresourceSerial serial);
1135     size_t hash() const;
1136 
1137     bool operator==(const FramebufferDesc &other) const;
1138 
1139     uint32_t attachmentCount() const;
1140 
getColorImageViewSerial(uint32_t index)1141     ImageViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1142     {
1143         ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1144         return mSerials[kFramebufferDescColorIndexOffset + index];
1145     }
1146 
1147     FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
1148 
1149   private:
1150     void reset();
1151     void update(uint32_t index, ImageViewSubresourceSerial serial);
1152 
1153     // Note: this is an exclusive index. If there is one index it will be "1".
1154     uint16_t mMaxIndex;
1155 
1156     // If the render pass contains an initial subpass to unresolve a number of attachments, the
1157     // subpass description is derived from the following mask, specifying which attachments need
1158     // to be unresolved.  Includes both color and depth/stencil attachments.
1159     FramebufferNonResolveAttachmentMask mUnresolveAttachmentMask;
1160 
1161     FramebufferAttachmentArray<ImageViewSubresourceSerial> mSerials;
1162 };
1163 
1164 // Disable warnings about struct padding.
1165 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1166 
1167 // The SamplerHelper allows a Sampler to be coupled with a serial.
1168 // Must be included before we declare SamplerCache.
1169 class SamplerHelper final : angle::NonCopyable
1170 {
1171   public:
1172     SamplerHelper(ContextVk *contextVk);
1173     ~SamplerHelper();
1174 
1175     explicit SamplerHelper(SamplerHelper &&samplerHelper);
1176     SamplerHelper &operator=(SamplerHelper &&rhs);
1177 
valid()1178     bool valid() const { return mSampler.valid(); }
get()1179     const Sampler &get() const { return mSampler; }
get()1180     Sampler &get() { return mSampler; }
getSamplerSerial()1181     SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1182 
1183   private:
1184     Sampler mSampler;
1185     SamplerSerial mSamplerSerial;
1186 };
1187 
1188 using RefCountedSampler = RefCounted<SamplerHelper>;
1189 using SamplerBinding    = BindingPointer<SamplerHelper>;
1190 
1191 class RenderPassHelper final : angle::NonCopyable
1192 {
1193   public:
1194     RenderPassHelper();
1195     ~RenderPassHelper();
1196 
1197     RenderPassHelper(RenderPassHelper &&other);
1198     RenderPassHelper &operator=(RenderPassHelper &&other);
1199 
1200     void destroy(VkDevice device);
1201 
1202     const RenderPass &getRenderPass() const;
1203     RenderPass &getRenderPass();
1204 
1205     const RenderPassPerfCounters &getPerfCounters() const;
1206     RenderPassPerfCounters &getPerfCounters();
1207 
1208   private:
1209     RenderPass mRenderPass;
1210     RenderPassPerfCounters mPerfCounters;
1211 };
1212 }  // namespace vk
1213 }  // namespace rx
1214 
1215 // Introduce std::hash for the above classes.
1216 namespace std
1217 {
1218 template <>
1219 struct hash<rx::vk::RenderPassDesc>
1220 {
1221     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
1222 };
1223 
1224 template <>
1225 struct hash<rx::vk::AttachmentOpsArray>
1226 {
1227     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
1228 };
1229 
1230 template <>
1231 struct hash<rx::vk::GraphicsPipelineDesc>
1232 {
1233     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
1234 };
1235 
1236 template <>
1237 struct hash<rx::vk::DescriptorSetLayoutDesc>
1238 {
1239     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
1240 };
1241 
1242 template <>
1243 struct hash<rx::vk::PipelineLayoutDesc>
1244 {
1245     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
1246 };
1247 
1248 template <>
1249 struct hash<rx::vk::TextureDescriptorDesc>
1250 {
1251     size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
1252 };
1253 
1254 template <>
1255 struct hash<rx::vk::UniformsAndXfbDesc>
1256 {
1257     size_t operator()(const rx::vk::UniformsAndXfbDesc &key) const { return key.hash(); }
1258 };
1259 
1260 template <>
1261 struct hash<rx::vk::FramebufferDesc>
1262 {
1263     size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
1264 };
1265 
1266 template <>
1267 struct hash<rx::vk::SamplerDesc>
1268 {
1269     size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
1270 };
1271 
1272 // See Resource Serial types defined in vk_utils.h.
1273 #define ANGLE_HASH_VK_SERIAL(Type)                                                          \
1274     template <>                                                                             \
1275     struct hash<rx::vk::Type##Serial>                                                       \
1276     {                                                                                       \
1277         size_t operator()(const rx::vk::Type##Serial &key) const { return key.getValue(); } \
1278     };
1279 
1280 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
1281 
1282 }  // namespace std
1283 
1284 namespace rx
1285 {
1286 // TODO(jmadill): Add cache trimming/eviction.
1287 class RenderPassCache final : angle::NonCopyable
1288 {
1289   public:
1290     RenderPassCache();
1291     ~RenderPassCache();
1292 
1293     void destroy(VkDevice device);
1294 
1295     ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
1296                                                        const vk::RenderPassDesc &desc,
1297                                                        vk::RenderPass **renderPassOut)
1298     {
1299         auto outerIt = mPayload.find(desc);
1300         if (outerIt != mPayload.end())
1301         {
1302             InnerCache &innerCache = outerIt->second;
1303             ASSERT(!innerCache.empty());
1304 
1305             // Find the first element and return it.
1306             *renderPassOut = &innerCache.begin()->second.getRenderPass();
1307             return angle::Result::Continue;
1308         }
1309 
1310         return addRenderPass(contextVk, desc, renderPassOut);
1311     }
1312 
1313     angle::Result getRenderPassWithOps(ContextVk *contextVk,
1314                                        const vk::RenderPassDesc &desc,
1315                                        const vk::AttachmentOpsArray &attachmentOps,
1316                                        vk::RenderPass **renderPassOut);
1317 
1318   private:
1319     angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
1320                                            const vk::RenderPassDesc &desc,
1321                                            const vk::AttachmentOpsArray &attachmentOps,
1322                                            bool updatePerfCounters,
1323                                            vk::RenderPass **renderPassOut);
1324 
1325     angle::Result addRenderPass(ContextVk *contextVk,
1326                                 const vk::RenderPassDesc &desc,
1327                                 vk::RenderPass **renderPassOut);
1328 
1329     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
1330     // The second layer caches the attachment load/store ops and initial/final layout.
1331     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
1332     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
1333 
1334     OuterCache mPayload;
1335 };
1336 
1337 // TODO(jmadill): Add cache trimming/eviction.
1338 class GraphicsPipelineCache final : angle::NonCopyable
1339 {
1340   public:
1341     GraphicsPipelineCache();
1342     ~GraphicsPipelineCache();
1343 
1344     void destroy(VkDevice device);
1345     void release(ContextVk *context);
1346 
1347     void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
1348 
1349     ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
1350                                            const vk::PipelineCache &pipelineCacheVk,
1351                                            const vk::RenderPass &compatibleRenderPass,
1352                                            const vk::PipelineLayout &pipelineLayout,
1353                                            const gl::AttributesMask &activeAttribLocationsMask,
1354                                            const gl::ComponentTypeMask &programAttribsTypeMask,
1355                                            const vk::ShaderModule *vertexModule,
1356                                            const vk::ShaderModule *fragmentModule,
1357                                            const vk::ShaderModule *geometryModule,
1358                                            const vk::SpecializationConstants specConsts,
1359                                            const vk::GraphicsPipelineDesc &desc,
1360                                            const vk::GraphicsPipelineDesc **descPtrOut,
1361                                            vk::PipelineHelper **pipelineOut)
1362     {
1363         auto item = mPayload.find(desc);
1364         if (item != mPayload.end())
1365         {
1366             *descPtrOut  = &item->first;
1367             *pipelineOut = &item->second;
1368             return angle::Result::Continue;
1369         }
1370 
1371         return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
1372                               activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
1373                               fragmentModule, geometryModule, specConsts, desc, descPtrOut,
1374                               pipelineOut);
1375     }
1376 
1377   private:
1378     angle::Result insertPipeline(ContextVk *contextVk,
1379                                  const vk::PipelineCache &pipelineCacheVk,
1380                                  const vk::RenderPass &compatibleRenderPass,
1381                                  const vk::PipelineLayout &pipelineLayout,
1382                                  const gl::AttributesMask &activeAttribLocationsMask,
1383                                  const gl::ComponentTypeMask &programAttribsTypeMask,
1384                                  const vk::ShaderModule *vertexModule,
1385                                  const vk::ShaderModule *fragmentModule,
1386                                  const vk::ShaderModule *geometryModule,
1387                                  const vk::SpecializationConstants specConsts,
1388                                  const vk::GraphicsPipelineDesc &desc,
1389                                  const vk::GraphicsPipelineDesc **descPtrOut,
1390                                  vk::PipelineHelper **pipelineOut);
1391 
1392     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
1393 };
1394 
1395 class DescriptorSetLayoutCache final : angle::NonCopyable
1396 {
1397   public:
1398     DescriptorSetLayoutCache();
1399     ~DescriptorSetLayoutCache();
1400 
1401     void destroy(VkDevice device);
1402 
1403     angle::Result getDescriptorSetLayout(
1404         vk::Context *context,
1405         const vk::DescriptorSetLayoutDesc &desc,
1406         vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
1407 
1408   private:
1409     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
1410 };
1411 
1412 class PipelineLayoutCache final : angle::NonCopyable
1413 {
1414   public:
1415     PipelineLayoutCache();
1416     ~PipelineLayoutCache();
1417 
1418     void destroy(VkDevice device);
1419 
1420     angle::Result getPipelineLayout(vk::Context *context,
1421                                     const vk::PipelineLayoutDesc &desc,
1422                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1423                                     vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1424 
1425   private:
1426     std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1427 };
1428 
1429 class SamplerCache final : angle::NonCopyable
1430 {
1431   public:
1432     SamplerCache();
1433     ~SamplerCache();
1434 
1435     void destroy(RendererVk *renderer);
1436 
1437     angle::Result getSampler(ContextVk *contextVk,
1438                              const vk::SamplerDesc &desc,
1439                              vk::SamplerBinding *samplerOut);
1440 
1441   private:
1442     std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1443 };
1444 
1445 // YuvConversion Cache
1446 class SamplerYcbcrConversionCache final : angle::NonCopyable
1447 {
1448   public:
1449     SamplerYcbcrConversionCache();
1450     ~SamplerYcbcrConversionCache();
1451 
1452     void destroy(RendererVk *render);
1453 
1454     angle::Result getYuvConversion(
1455         vk::Context *context,
1456         uint64_t externalFormat,
1457         const VkSamplerYcbcrConversionCreateInfo &yuvConversionCreateInfo,
1458         vk::BindingPointer<vk::SamplerYcbcrConversion> *yuvConversionOut);
1459     VkSamplerYcbcrConversion getYuvConversionFromExternalFormat(uint64_t externalFormat) const;
1460 
1461   private:
1462     std::unordered_map<uint64_t, vk::RefCountedSamplerYcbcrConversion> mPayload;
1463 };
1464 
1465 // Only 1 driver uniform binding is used.
1466 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1467 // There is 1 default uniform binding used per stage.  Currently, a maxium of three stages are
1468 // supported.
1469 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1470 constexpr uint32_t kReservedDefaultUniformBindingCount         = 3;
1471 }  // namespace rx
1472 
1473 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1474