1 /* Copyright (c) 2015-2021 The Khronos Group Inc.
2  * Copyright (c) 2015-2021 Valve Corporation
3  * Copyright (c) 2015-2021 LunarG, Inc.
4  * Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * Author: Camden Stocker <camden@lunarg.com>
19  * Author: Nadav Geva <nadav.geva@amd.com>
20  */
21 
22 #include "best_practices_validation.h"
23 #include "layer_chassis_dispatch.h"
24 #include "best_practices_error_enums.h"
25 #include "shader_validation.h"
26 #include "sync_utils.h"
27 #include "cmd_buffer_state.h"
28 #include "device_state.h"
29 #include "render_pass_state.h"
30 
31 #include <string>
32 #include <bitset>
33 #include <memory>
34 
35 struct VendorSpecificInfo {
36     EnableFlags vendor_id;
37     std::string name;
38 };
39 
40 const std::map<BPVendorFlagBits, VendorSpecificInfo> kVendorInfo = {
41     {kBPVendorArm, {vendor_specific_arm, "Arm"}},
42     {kBPVendorAMD, {vendor_specific_amd, "AMD"}},
43 };
44 
45 const SpecialUseVUIDs kSpecialUseInstanceVUIDs {
46     kVUID_BestPractices_CreateInstance_SpecialUseExtension_CADSupport,
47     kVUID_BestPractices_CreateInstance_SpecialUseExtension_D3DEmulation,
48     kVUID_BestPractices_CreateInstance_SpecialUseExtension_DevTools,
49     kVUID_BestPractices_CreateInstance_SpecialUseExtension_Debugging,
50     kVUID_BestPractices_CreateInstance_SpecialUseExtension_GLEmulation,
51 };
52 
53 const SpecialUseVUIDs kSpecialUseDeviceVUIDs {
54     kVUID_BestPractices_CreateDevice_SpecialUseExtension_CADSupport,
55     kVUID_BestPractices_CreateDevice_SpecialUseExtension_D3DEmulation,
56     kVUID_BestPractices_CreateDevice_SpecialUseExtension_DevTools,
57     kVUID_BestPractices_CreateDevice_SpecialUseExtension_Debugging,
58     kVUID_BestPractices_CreateDevice_SpecialUseExtension_GLEmulation,
59 };
60 
CreateCmdBufferState(VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)61 std::shared_ptr<CMD_BUFFER_STATE> BestPractices::CreateCmdBufferState(VkCommandBuffer cb,
62                                                                       const VkCommandBufferAllocateInfo* pCreateInfo,
63                                                                       const COMMAND_POOL_STATE* pool) {
64     return std::static_pointer_cast<CMD_BUFFER_STATE>(std::make_shared<CMD_BUFFER_STATE_BP>(this, cb, pCreateInfo, pool));
65 }
66 
CMD_BUFFER_STATE_BP(BestPractices * bp,VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)67 CMD_BUFFER_STATE_BP::CMD_BUFFER_STATE_BP(BestPractices* bp, VkCommandBuffer cb, const VkCommandBufferAllocateInfo* pCreateInfo,
68                                          const COMMAND_POOL_STATE* pool)
69     : CMD_BUFFER_STATE(bp, cb, pCreateInfo, pool) {}
70 
VendorCheckEnabled(BPVendorFlags vendors) const71 bool BestPractices::VendorCheckEnabled(BPVendorFlags vendors) const {
72     for (const auto& vendor : kVendorInfo) {
73         if (vendors & vendor.first && enabled[vendor.second.vendor_id]) {
74             return true;
75         }
76     }
77     return false;
78 }
79 
VendorSpecificTag(BPVendorFlags vendors)80 const char* VendorSpecificTag(BPVendorFlags vendors) {
81     // Cache built vendor tags in a map
82     static layer_data::unordered_map<BPVendorFlags, std::string> tag_map;
83 
84     auto res = tag_map.find(vendors);
85     if (res == tag_map.end()) {
86         // Build the vendor tag string
87         std::stringstream vendor_tag;
88 
89         vendor_tag << "[";
90         bool first_vendor = true;
91         for (const auto& vendor : kVendorInfo) {
92             if (vendors & vendor.first) {
93                 if (!first_vendor) {
94                     vendor_tag << ", ";
95                 }
96                 vendor_tag << vendor.second.name;
97                 first_vendor = false;
98             }
99         }
100         vendor_tag << "]";
101 
102         tag_map[vendors] = vendor_tag.str();
103         res = tag_map.find(vendors);
104     }
105 
106     return res->second.c_str();
107 }
108 
DepReasonToString(ExtDeprecationReason reason)109 const char* DepReasonToString(ExtDeprecationReason reason) {
110     switch (reason) {
111         case kExtPromoted:
112             return "promoted to";
113             break;
114         case kExtObsoleted:
115             return "obsoleted by";
116             break;
117         case kExtDeprecated:
118             return "deprecated by";
119             break;
120         default:
121             return "";
122             break;
123     }
124 }
125 
ValidateDeprecatedExtensions(const char * api_name,const char * extension_name,uint32_t version,const char * vuid) const126 bool BestPractices::ValidateDeprecatedExtensions(const char* api_name, const char* extension_name, uint32_t version,
127                                                  const char* vuid) const {
128     bool skip = false;
129     auto dep_info_it = deprecated_extensions.find(extension_name);
130     if (dep_info_it != deprecated_extensions.end()) {
131         auto dep_info = dep_info_it->second;
132         if (((dep_info.target.compare("VK_VERSION_1_1") == 0) && (version >= VK_API_VERSION_1_1)) ||
133             ((dep_info.target.compare("VK_VERSION_1_2") == 0) && (version >= VK_API_VERSION_1_2))) {
134             skip |=
135                 LogWarning(instance, vuid, "%s(): Attempting to enable deprecated extension %s, but this extension has been %s %s.",
136                            api_name, extension_name, DepReasonToString(dep_info.reason), (dep_info.target).c_str());
137         } else if (dep_info.target.find("VK_VERSION") == std::string::npos) {
138             if (dep_info.target.length() == 0) {
139                 skip |= LogWarning(instance, vuid,
140                                    "%s(): Attempting to enable deprecated extension %s, but this extension has been deprecated "
141                                    "without replacement.",
142                                    api_name, extension_name);
143             } else {
144                 skip |= LogWarning(instance, vuid,
145                                    "%s(): Attempting to enable deprecated extension %s, but this extension has been %s %s.",
146                                    api_name, extension_name, DepReasonToString(dep_info.reason), (dep_info.target).c_str());
147             }
148         }
149     }
150     return skip;
151 }
152 
ValidateSpecialUseExtensions(const char * api_name,const char * extension_name,const SpecialUseVUIDs & special_use_vuids) const153 bool BestPractices::ValidateSpecialUseExtensions(const char* api_name, const char* extension_name, const SpecialUseVUIDs& special_use_vuids) const
154 {
155     bool skip = false;
156     auto dep_info_it = special_use_extensions.find(extension_name);
157 
158     if (dep_info_it != special_use_extensions.end()) {
159         const char* const format = "%s(): Attempting to enable extension %s, but this extension is intended to support %s "
160                                    "and it is strongly recommended that it be otherwise avoided.";
161         auto& special_uses = dep_info_it->second;
162 
163         if (special_uses.find("cadsupport") != std::string::npos) {
164             skip |= LogWarning(instance, special_use_vuids.cadsupport, format, api_name, extension_name,
165                 "specialized functionality used by CAD/CAM applications");
166         }
167         if (special_uses.find("d3demulation") != std::string::npos) {
168             skip |= LogWarning(instance, special_use_vuids.d3demulation, format, api_name, extension_name,
169                 "D3D emulation layers, and applications ported from D3D, by adding functionality specific to D3D");
170         }
171         if (special_uses.find("devtools") != std::string::npos) {
172             skip |= LogWarning(instance, special_use_vuids.devtools, format, api_name, extension_name,
173                 "developer tools such as capture-replay libraries");
174         }
175         if (special_uses.find("debugging") != std::string::npos) {
176             skip |= LogWarning(instance, special_use_vuids.debugging, format, api_name, extension_name,
177                 "use by applications when debugging");
178         }
179         if (special_uses.find("glemulation") != std::string::npos) {
180             skip |= LogWarning(instance, special_use_vuids.glemulation, format, api_name, extension_name,
181                 "OpenGL and/or OpenGL ES emulation layers, and applications ported from those APIs, by adding functionality "
182                 "specific to those APIs");
183         }
184     }
185     return skip;
186 }
187 
InitDeviceValidationObject(bool add_obj,ValidationObject * inst_obj,ValidationObject * dev_obj)188 void BestPractices::InitDeviceValidationObject(bool add_obj, ValidationObject* inst_obj, ValidationObject* dev_obj) {
189     if (add_obj) {
190         ValidationStateTracker::InitDeviceValidationObject(add_obj, inst_obj, dev_obj);
191     }
192 }
193 
194 
PreCallValidateCreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance) const195 bool BestPractices::PreCallValidateCreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator,
196                                                   VkInstance* pInstance) const {
197     bool skip = false;
198 
199     for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
200         if (white_list(pCreateInfo->ppEnabledExtensionNames[i], kDeviceExtensionNames)) {
201             skip |= LogWarning(instance, kVUID_BestPractices_CreateInstance_ExtensionMismatch,
202                                "vkCreateInstance(): Attempting to enable Device Extension %s at CreateInstance time.",
203                                pCreateInfo->ppEnabledExtensionNames[i]);
204         }
205         uint32_t specified_version =
206             (pCreateInfo->pApplicationInfo ? pCreateInfo->pApplicationInfo->apiVersion : VK_API_VERSION_1_0);
207         skip |= ValidateDeprecatedExtensions("CreateInstance", pCreateInfo->ppEnabledExtensionNames[i], specified_version,
208                                              kVUID_BestPractices_CreateInstance_DeprecatedExtension);
209         skip |= ValidateSpecialUseExtensions("CreateInstance", pCreateInfo->ppEnabledExtensionNames[i], kSpecialUseInstanceVUIDs);
210     }
211 
212     return skip;
213 }
214 
PreCallValidateCreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice) const215 bool BestPractices::PreCallValidateCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo,
216                                                 const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) const {
217     bool skip = false;
218 
219     // get API version of physical device passed when creating device.
220     VkPhysicalDeviceProperties physical_device_properties{};
221     DispatchGetPhysicalDeviceProperties(physicalDevice, &physical_device_properties);
222     auto device_api_version = physical_device_properties.apiVersion;
223 
224     // check api versions and warn if instance api Version is higher than version on device.
225     if (api_version > device_api_version) {
226         std::string inst_api_name = StringAPIVersion(api_version);
227         std::string dev_api_name = StringAPIVersion(device_api_version);
228 
229         skip |= LogWarning(device, kVUID_BestPractices_CreateDevice_API_Mismatch,
230                            "vkCreateDevice(): API Version of current instance, %s is higher than API Version on device, %s",
231                            inst_api_name.c_str(), dev_api_name.c_str());
232     }
233 
234     for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
235         if (white_list(pCreateInfo->ppEnabledExtensionNames[i], kInstanceExtensionNames)) {
236             skip |= LogWarning(instance, kVUID_BestPractices_CreateDevice_ExtensionMismatch,
237                                "vkCreateDevice(): Attempting to enable Instance Extension %s at CreateDevice time.",
238                                pCreateInfo->ppEnabledExtensionNames[i]);
239         }
240         skip |= ValidateDeprecatedExtensions("CreateDevice", pCreateInfo->ppEnabledExtensionNames[i], api_version,
241                                              kVUID_BestPractices_CreateDevice_DeprecatedExtension);
242         skip |= ValidateSpecialUseExtensions("CreateDevice", pCreateInfo->ppEnabledExtensionNames[i], kSpecialUseDeviceVUIDs);
243     }
244 
245     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
246     if ((bp_pd_state->vkGetPhysicalDeviceFeaturesState == UNCALLED) && (pCreateInfo->pEnabledFeatures != NULL)) {
247         skip |= LogWarning(device, kVUID_BestPractices_CreateDevice_PDFeaturesNotCalled,
248                            "vkCreateDevice() called before getting physical device features from vkGetPhysicalDeviceFeatures().");
249     }
250 
251     if ((VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorAMD)) && (pCreateInfo->pEnabledFeatures != nullptr) &&
252         (pCreateInfo->pEnabledFeatures->robustBufferAccess == VK_TRUE)) {
253         skip |= LogPerformanceWarning(
254             device, kVUID_BestPractices_CreateDevice_RobustBufferAccess,
255             "%s %s vkCreateDevice() called with enabled robustBufferAccess. Use robustBufferAccess as a debugging tool during "
256             "development. Enabling it causes loss in performance for accesses to uniform buffers and shader storage "
257             "buffers. Disable robustBufferAccess in release builds. Only leave it enabled if the application use-case "
258             "requires the additional level of reliability due to the use of unverified user-supplied draw parameters.",
259             VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorAMD));
260     }
261 
262     return skip;
263 }
264 
PreCallValidateCreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer) const265 bool BestPractices::PreCallValidateCreateBuffer(VkDevice device, const VkBufferCreateInfo* pCreateInfo,
266                                                 const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer) const {
267     bool skip = false;
268 
269     if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
270         std::stringstream buffer_hex;
271         buffer_hex << "0x" << std::hex << HandleToUint64(pBuffer);
272 
273         skip |= LogWarning(
274             device, kVUID_BestPractices_SharingModeExclusive,
275             "Warning: Buffer (%s) specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple queues "
276             "(queueFamilyIndexCount of %" PRIu32 ").",
277             buffer_hex.str().c_str(), pCreateInfo->queueFamilyIndexCount);
278     }
279 
280     return skip;
281 }
282 
PreCallValidateCreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage) const283 bool BestPractices::PreCallValidateCreateImage(VkDevice device, const VkImageCreateInfo* pCreateInfo,
284                                                const VkAllocationCallbacks* pAllocator, VkImage* pImage) const {
285     bool skip = false;
286 
287     if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
288         std::stringstream image_hex;
289         image_hex << "0x" << std::hex << HandleToUint64(pImage);
290 
291         skip |=
292             LogWarning(device, kVUID_BestPractices_SharingModeExclusive,
293                        "Warning: Image (%s) specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple queues "
294                        "(queueFamilyIndexCount of %" PRIu32 ").",
295                        image_hex.str().c_str(), pCreateInfo->queueFamilyIndexCount);
296     }
297 
298     if (VendorCheckEnabled(kBPVendorArm)) {
299         if (pCreateInfo->samples > kMaxEfficientSamplesArm) {
300             skip |= LogPerformanceWarning(
301                 device, kVUID_BestPractices_CreateImage_TooLargeSampleCount,
302                 "%s vkCreateImage(): Trying to create an image with %u samples. "
303                 "The hardware revision may not have full throughput for framebuffers with more than %u samples.",
304                 VendorSpecificTag(kBPVendorArm), static_cast<uint32_t>(pCreateInfo->samples), kMaxEfficientSamplesArm);
305         }
306 
307         if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT && !(pCreateInfo->usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT)) {
308             skip |= LogPerformanceWarning(
309                 device, kVUID_BestPractices_CreateImage_NonTransientMSImage,
310                 "%s vkCreateImage(): Trying to create a multisampled image, but createInfo.usage did not have "
311                 "VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. Multisampled images may be resolved on-chip, "
312                 "and do not need to be backed by physical storage. "
313                 "TRANSIENT_ATTACHMENT allows tiled GPUs to not back the multisampled image with physical memory.",
314                 VendorSpecificTag(kBPVendorArm));
315         }
316     }
317 
318     if (VendorCheckEnabled(kBPVendorAMD)) {
319         std::stringstream image_hex;
320         image_hex << "0x" << std::hex << HandleToUint64(pImage);
321 
322         if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
323             (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT)) {
324             skip |= LogPerformanceWarning(device,
325                             kVUID_BestPractices_vkImage_AvoidConcurrentRenderTargets,
326                             "%s Performance warning: image (%s) is created as a render target with VK_SHARING_MODE_CONCURRENT. "
327                             "Using a SHARING_MODE_CONCURRENT "
328                             "is not recommended with color and depth targets",
329                             VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
330         }
331 
332         if ((pCreateInfo->usage &
333              (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
334             (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
335             skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_DontUseMutableRenderTargets,
336                         "%s Performance warning: image (%s) is created as a render target with VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT. "
337                         "Using a MUTABLE_FORMAT is not recommended with color, depth, and storage targets",
338                         VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
339         }
340 
341         if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
342                 (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
343             skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_DontUseStorageRenderTargets,
344                         "%s Performance warning: image (%s) is created as a render target with VK_IMAGE_USAGE_STORAGE_BIT. Using a "
345                         "VK_IMAGE_USAGE_STORAGE_BIT is not recommended with color and depth targets",
346                         VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
347         }
348     }
349 
350     return skip;
351 }
352 
PreCallRecordDestroyImage(VkDevice device,VkImage image,const VkAllocationCallbacks * pAllocator)353 void BestPractices::PreCallRecordDestroyImage(VkDevice device, VkImage image, const VkAllocationCallbacks *pAllocator) {
354     ValidationStateTracker::PreCallRecordDestroyImage(device, image, pAllocator);
355     ReleaseImageUsageState(image);
356 }
357 
PreCallRecordDestroySwapchainKHR(VkDevice device,VkSwapchainKHR swapchain,const VkAllocationCallbacks * pAllocator)358 void BestPractices::PreCallRecordDestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator) {
359     if (VK_NULL_HANDLE != swapchain) {
360         auto chain = Get<SWAPCHAIN_NODE>(swapchain);
361         for (auto& image : chain->images) {
362             if (image.image_state) {
363                 ReleaseImageUsageState(image.image_state->image());
364             }
365         }
366     }
367     ValidationStateTracker::PreCallRecordDestroySwapchainKHR(device, swapchain, pAllocator);
368 }
369 
GetImageUsageState(VkImage vk_image)370 IMAGE_STATE_BP* BestPractices::GetImageUsageState(VkImage vk_image) {
371     auto itr = imageUsageMap.find(vk_image);
372     if (itr != imageUsageMap.end()) {
373         return &itr->second;
374     } else {
375         auto& state = imageUsageMap[vk_image];
376         auto image = Get<IMAGE_STATE>(vk_image);
377         state.image = image.get();
378         state.usages.resize(image->createInfo.arrayLayers);
379         for (auto& mips : state.usages) {
380             mips.resize(image->createInfo.mipLevels, IMAGE_SUBRESOURCE_USAGE_BP::UNDEFINED);
381         }
382         return &state;
383     }
384 }
385 
ReleaseImageUsageState(VkImage image)386 void BestPractices::ReleaseImageUsageState(VkImage image) {
387     auto itr = imageUsageMap.find(image);
388     if (itr != imageUsageMap.end()) {
389         imageUsageMap.erase(itr);
390     }
391 }
392 
PreCallValidateCreateSwapchainKHR(VkDevice device,const VkSwapchainCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSwapchainKHR * pSwapchain) const393 bool BestPractices::PreCallValidateCreateSwapchainKHR(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo,
394                                                       const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) const {
395     bool skip = false;
396 
397     const auto* bp_pd_state = GetPhysicalDeviceState();
398     if (bp_pd_state) {
399         if (bp_pd_state->vkGetPhysicalDeviceSurfaceCapabilitiesKHRState == UNCALLED) {
400             skip |= LogWarning(device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
401                                "vkCreateSwapchainKHR() called before getting surface capabilities from "
402                                "vkGetPhysicalDeviceSurfaceCapabilitiesKHR().");
403         }
404 
405         if ((pCreateInfo->presentMode != VK_PRESENT_MODE_FIFO_KHR) &&
406             (bp_pd_state->vkGetPhysicalDeviceSurfacePresentModesKHRState != QUERY_DETAILS)) {
407             skip |= LogWarning(device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
408                                "vkCreateSwapchainKHR() called before getting surface present mode(s) from "
409                                "vkGetPhysicalDeviceSurfacePresentModesKHR().");
410         }
411 
412         if (bp_pd_state->vkGetPhysicalDeviceSurfaceFormatsKHRState != QUERY_DETAILS) {
413             skip |= LogWarning(
414                 device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
415                 "vkCreateSwapchainKHR() called before getting surface format(s) from vkGetPhysicalDeviceSurfaceFormatsKHR().");
416         }
417     }
418 
419     if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->imageSharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
420         skip |=
421             LogWarning(device, kVUID_BestPractices_SharingModeExclusive,
422                        "Warning: A Swapchain is being created which specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while "
423                        "specifying multiple queues (queueFamilyIndexCount of %" PRIu32 ").",
424                        pCreateInfo->queueFamilyIndexCount);
425     }
426 
427     if (pCreateInfo->minImageCount == 2) {
428         skip |= LogPerformanceWarning(
429             device, kVUID_BestPractices_SuboptimalSwapchainImageCount,
430             "Warning: A Swapchain is being created with minImageCount set to %" PRIu32
431             ", which means double buffering is going "
432             "to be used. Using double buffering and vsync locks rendering to an integer fraction of the vsync rate. In turn, "
433             "reducing the performance of the application if rendering is slower than vsync. Consider setting minImageCount to "
434             "3 to use triple buffering to maximize performance in such cases.",
435             pCreateInfo->minImageCount);
436     }
437 
438     if (VendorCheckEnabled(kBPVendorArm) && (pCreateInfo->presentMode != VK_PRESENT_MODE_FIFO_KHR)) {
439         skip |= LogWarning(device, kVUID_BestPractices_CreateSwapchain_PresentMode,
440                            "%s Warning: Swapchain is not being created with presentation mode \"VK_PRESENT_MODE_FIFO_KHR\". "
441                            "Prefer using \"VK_PRESENT_MODE_FIFO_KHR\" to avoid unnecessary CPU and GPU load and save power. "
442                            "Presentation modes which are not FIFO will present the latest available frame and discard other "
443                            "frame(s) if any.",
444                            VendorSpecificTag(kBPVendorArm));
445     }
446 
447     return skip;
448 }
449 
PreCallValidateCreateSharedSwapchainsKHR(VkDevice device,uint32_t swapchainCount,const VkSwapchainCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkSwapchainKHR * pSwapchains) const450 bool BestPractices::PreCallValidateCreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount,
451                                                              const VkSwapchainCreateInfoKHR* pCreateInfos,
452                                                              const VkAllocationCallbacks* pAllocator,
453                                                              VkSwapchainKHR* pSwapchains) const {
454     bool skip = false;
455 
456     for (uint32_t i = 0; i < swapchainCount; i++) {
457         if ((pCreateInfos[i].queueFamilyIndexCount > 1) && (pCreateInfos[i].imageSharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
458             skip |= LogWarning(
459                 device, kVUID_BestPractices_SharingModeExclusive,
460                 "Warning: A shared swapchain (index %" PRIu32
461                 ") is being created which specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple "
462                 "queues (queueFamilyIndexCount of %" PRIu32 ").",
463                 i, pCreateInfos[i].queueFamilyIndexCount);
464         }
465     }
466 
467     return skip;
468 }
469 
PreCallValidateCreateRenderPass(VkDevice device,const VkRenderPassCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass) const470 bool BestPractices::PreCallValidateCreateRenderPass(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo,
471                                                     const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) const {
472     bool skip = false;
473 
474     for (uint32_t i = 0; i < pCreateInfo->attachmentCount; ++i) {
475         VkFormat format = pCreateInfo->pAttachments[i].format;
476         if (pCreateInfo->pAttachments[i].initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
477             if ((FormatIsColor(format) || FormatHasDepth(format)) &&
478                 pCreateInfo->pAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
479                 skip |= LogWarning(device, kVUID_BestPractices_RenderPass_Attatchment,
480                                    "Render pass has an attachment with loadOp == VK_ATTACHMENT_LOAD_OP_LOAD and "
481                                    "initialLayout == VK_IMAGE_LAYOUT_UNDEFINED.  This is probably not what you "
482                                    "intended.  Consider using VK_ATTACHMENT_LOAD_OP_DONT_CARE instead if the "
483                                    "image truely is undefined at the start of the render pass.");
484             }
485             if (FormatHasStencil(format) && pCreateInfo->pAttachments[i].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
486                 skip |= LogWarning(device, kVUID_BestPractices_RenderPass_Attatchment,
487                                    "Render pass has an attachment with stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD "
488                                    "and initialLayout == VK_IMAGE_LAYOUT_UNDEFINED.  This is probably not what you "
489                                    "intended.  Consider using VK_ATTACHMENT_LOAD_OP_DONT_CARE instead if the "
490                                    "image truely is undefined at the start of the render pass.");
491             }
492         }
493 
494         const auto& attachment = pCreateInfo->pAttachments[i];
495         if (attachment.samples > VK_SAMPLE_COUNT_1_BIT) {
496             bool access_requires_memory =
497                 attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || attachment.storeOp == VK_ATTACHMENT_STORE_OP_STORE;
498 
499             if (FormatHasStencil(format)) {
500                 access_requires_memory |= attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
501                                           attachment.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE;
502             }
503 
504             if (access_requires_memory) {
505                 skip |= LogPerformanceWarning(
506                     device, kVUID_BestPractices_CreateRenderPass_ImageRequiresMemory,
507                     "Attachment %u in the VkRenderPass is a multisampled image with %u samples, but it uses loadOp/storeOp "
508                     "which requires accessing data from memory. Multisampled images should always be loadOp = CLEAR or DONT_CARE, "
509                     "storeOp = DONT_CARE. This allows the implementation to use lazily allocated memory effectively.",
510                     i, static_cast<uint32_t>(attachment.samples));
511             }
512         }
513     }
514 
515     for (uint32_t dependency = 0; dependency < pCreateInfo->dependencyCount; dependency++) {
516         skip |= CheckPipelineStageFlags("vkCreateRenderPass", pCreateInfo->pDependencies[dependency].srcStageMask);
517         skip |= CheckPipelineStageFlags("vkCreateRenderPass", pCreateInfo->pDependencies[dependency].dstStageMask);
518     }
519 
520     return skip;
521 }
522 
ValidateAttachments(const VkRenderPassCreateInfo2 * rpci,uint32_t attachmentCount,const VkImageView * image_views) const523 bool BestPractices::ValidateAttachments(const VkRenderPassCreateInfo2* rpci, uint32_t attachmentCount,
524                                         const VkImageView* image_views) const {
525     bool skip = false;
526 
527     // Check for non-transient attachments that should be transient and vice versa
528     for (uint32_t i = 0; i < attachmentCount; ++i) {
529         const auto& attachment = rpci->pAttachments[i];
530         bool attachment_should_be_transient =
531             (attachment.loadOp != VK_ATTACHMENT_LOAD_OP_LOAD && attachment.storeOp != VK_ATTACHMENT_STORE_OP_STORE);
532 
533         if (FormatHasStencil(attachment.format)) {
534             attachment_should_be_transient &= (attachment.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_LOAD &&
535                                                attachment.stencilStoreOp != VK_ATTACHMENT_STORE_OP_STORE);
536         }
537 
538         auto view_state = Get<IMAGE_VIEW_STATE>(image_views[i]);
539         if (view_state) {
540             const auto& ici = view_state->image_state->createInfo;
541 
542             bool image_is_transient = (ici.usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) != 0;
543 
544             // The check for an image that should not be transient applies to all GPUs
545             if (!attachment_should_be_transient && image_is_transient) {
546                 skip |= LogPerformanceWarning(
547                     device, kVUID_BestPractices_CreateFramebuffer_AttachmentShouldNotBeTransient,
548                     "Attachment %u in VkFramebuffer uses loadOp/storeOps which need to access physical memory, "
549                     "but the image backing the image view has VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. "
550                     "Physical memory will need to be backed lazily to this image, potentially causing stalls.",
551                     i);
552             }
553 
554             bool supports_lazy = false;
555             for (uint32_t j = 0; j < phys_dev_mem_props.memoryTypeCount; j++) {
556                 if (phys_dev_mem_props.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) {
557                     supports_lazy = true;
558                 }
559             }
560 
561             // The check for an image that should be transient only applies to GPUs supporting
562             // lazily allocated memory
563             if (supports_lazy && attachment_should_be_transient && !image_is_transient) {
564                 skip |= LogPerformanceWarning(
565                     device, kVUID_BestPractices_CreateFramebuffer_AttachmentShouldBeTransient,
566                     "Attachment %u in VkFramebuffer uses loadOp/storeOps which never have to be backed by physical memory, "
567                     "but the image backing the image view does not have VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. "
568                     "You can save physical memory by using transient attachment backed by lazily allocated memory here.",
569                     i);
570             }
571         }
572     }
573     return skip;
574 }
575 
PreCallValidateCreateFramebuffer(VkDevice device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer) const576 bool BestPractices::PreCallValidateCreateFramebuffer(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo,
577                                                      const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer) const {
578     bool skip = false;
579 
580     auto rp_state = Get<RENDER_PASS_STATE>(pCreateInfo->renderPass);
581     if (rp_state && !(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) {
582         skip = ValidateAttachments(rp_state->createInfo.ptr(), pCreateInfo->attachmentCount, pCreateInfo->pAttachments);
583     }
584 
585     return skip;
586 }
587 
PreCallValidateAllocateDescriptorSets(VkDevice device,const VkDescriptorSetAllocateInfo * pAllocateInfo,VkDescriptorSet * pDescriptorSets,void * ads_state_data) const588 bool BestPractices::PreCallValidateAllocateDescriptorSets(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo,
589                                                           VkDescriptorSet* pDescriptorSets, void* ads_state_data) const {
590     bool skip = false;
591     skip |= ValidationStateTracker::PreCallValidateAllocateDescriptorSets(device, pAllocateInfo, pDescriptorSets, ads_state_data);
592 
593     if (!skip) {
594         const auto& pool_handle = pAllocateInfo->descriptorPool;
595         auto iter = descriptor_pool_freed_count.find(pool_handle);
596         // if the number of freed sets > 0, it implies they could be recycled instead if desirable
597         // this warning is specific to Arm
598         if (VendorCheckEnabled(kBPVendorArm) && iter != descriptor_pool_freed_count.end() && iter->second > 0) {
599             skip |= LogPerformanceWarning(
600                 device, kVUID_BestPractices_AllocateDescriptorSets_SuboptimalReuse,
601                 "%s Descriptor set memory was allocated via vkAllocateDescriptorSets() for sets which were previously freed in the "
602                 "same logical device. On some drivers or architectures it may be most optimal to re-use existing descriptor sets.",
603                 VendorSpecificTag(kBPVendorArm));
604         }
605     }
606 
607     return skip;
608 }
609 
ManualPostCallRecordAllocateDescriptorSets(VkDevice device,const VkDescriptorSetAllocateInfo * pAllocateInfo,VkDescriptorSet * pDescriptorSets,VkResult result,void * ads_state)610 void BestPractices::ManualPostCallRecordAllocateDescriptorSets(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo,
611                                                                VkDescriptorSet* pDescriptorSets, VkResult result, void* ads_state) {
612     if (result == VK_SUCCESS) {
613         // find the free count for the pool we allocated into
614         auto iter = descriptor_pool_freed_count.find(pAllocateInfo->descriptorPool);
615         if (iter != descriptor_pool_freed_count.end()) {
616             // we record successful allocations by subtracting the allocation count from the last recorded free count
617             const auto alloc_count = pAllocateInfo->descriptorSetCount;
618             // clamp the unsigned subtraction to the range [0, last_free_count]
619             if (iter->second > alloc_count) {
620                 iter->second -= alloc_count;
621             } else {
622                 iter->second = 0;
623             }
624         }
625     }
626 }
627 
PostCallRecordFreeDescriptorSets(VkDevice device,VkDescriptorPool descriptorPool,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,VkResult result)628 void BestPractices::PostCallRecordFreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount,
629                                                      const VkDescriptorSet* pDescriptorSets, VkResult result) {
630     ValidationStateTracker::PostCallRecordFreeDescriptorSets(device, descriptorPool, descriptorSetCount, pDescriptorSets, result);
631     if (result == VK_SUCCESS) {
632         // we want to track frees because we're interested in suggesting re-use
633         auto iter = descriptor_pool_freed_count.find(descriptorPool);
634         if (iter == descriptor_pool_freed_count.end()) {
635             descriptor_pool_freed_count.emplace(descriptorPool, descriptorSetCount);
636         } else {
637             iter->second += descriptorSetCount;
638         }
639     }
640 }
641 
PreCallValidateAllocateMemory(VkDevice device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMemory) const642 bool BestPractices::PreCallValidateAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
643                                                   const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) const {
644     bool skip = false;
645 
646     if (num_mem_objects + 1 > kMemoryObjectWarningLimit) {
647         skip |= LogPerformanceWarning(device, kVUID_BestPractices_AllocateMemory_TooManyObjects,
648                                       "Performance Warning: This app has > %" PRIu32 " memory objects.", kMemoryObjectWarningLimit);
649     }
650 
651     if (pAllocateInfo->allocationSize < kMinDeviceAllocationSize) {
652         skip |= LogPerformanceWarning(
653             device, kVUID_BestPractices_AllocateMemory_SmallAllocation,
654             "vkAllocateMemory(): Allocating a VkDeviceMemory of size %" PRIu64 ". This is a very small allocation (current "
655             "threshold is %" PRIu64 " bytes). "
656             "You should make large allocations and sub-allocate from one large VkDeviceMemory.",
657             pAllocateInfo->allocationSize, kMinDeviceAllocationSize);
658     }
659 
660     // TODO: Insert get check for GetPhysicalDeviceMemoryProperties once the state is tracked in the StateTracker
661 
662     return skip;
663 }
664 
ManualPostCallRecordAllocateMemory(VkDevice device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMemory,VkResult result)665 void BestPractices::ManualPostCallRecordAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
666                                                        const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory,
667                                                        VkResult result) {
668     if (result != VK_SUCCESS) {
669         static std::vector<VkResult> error_codes = {VK_ERROR_OUT_OF_HOST_MEMORY, VK_ERROR_OUT_OF_DEVICE_MEMORY,
670                                                     VK_ERROR_TOO_MANY_OBJECTS, VK_ERROR_INVALID_EXTERNAL_HANDLE,
671                                                     VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS};
672         static std::vector<VkResult> success_codes = {};
673         ValidateReturnCodes("vkAllocateMemory", result, error_codes, success_codes);
674         return;
675     }
676     num_mem_objects++;
677 }
678 
ValidateReturnCodes(const char * api_name,VkResult result,const std::vector<VkResult> & error_codes,const std::vector<VkResult> & success_codes) const679 void BestPractices::ValidateReturnCodes(const char* api_name, VkResult result, const std::vector<VkResult>& error_codes,
680                                         const std::vector<VkResult>& success_codes) const {
681     auto error = std::find(error_codes.begin(), error_codes.end(), result);
682     if (error != error_codes.end()) {
683         static const std::vector<VkResult> common_failure_codes = {VK_ERROR_OUT_OF_DATE_KHR,
684                                                                    VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT};
685 
686         auto common_failure = std::find(common_failure_codes.begin(), common_failure_codes.end(), result);
687         if (common_failure != common_failure_codes.end()) {
688             LogInfo(instance, kVUID_BestPractices_Failure_Result, "%s(): Returned error %s.", api_name, string_VkResult(result));
689         } else {
690             LogWarning(instance, kVUID_BestPractices_Error_Result, "%s(): Returned error %s.", api_name, string_VkResult(result));
691         }
692         return;
693     }
694     auto success = std::find(success_codes.begin(), success_codes.end(), result);
695     if (success != success_codes.end()) {
696         LogInfo(instance, kVUID_BestPractices_NonSuccess_Result, "%s(): Returned non-success return code %s.", api_name,
697                 string_VkResult(result));
698     }
699 }
700 
PreCallValidateFreeMemory(VkDevice device,VkDeviceMemory memory,const VkAllocationCallbacks * pAllocator) const701 bool BestPractices::PreCallValidateFreeMemory(VkDevice device, VkDeviceMemory memory,
702                                               const VkAllocationCallbacks* pAllocator) const {
703     if (memory == VK_NULL_HANDLE) return false;
704     bool skip = false;
705 
706     const auto mem_info = Get<DEVICE_MEMORY_STATE>(memory);
707 
708     for (const auto& node: mem_info->ObjectBindings()) {
709         const auto& obj = node->Handle();
710         LogObjectList objlist(device);
711         objlist.add(obj);
712         objlist.add(mem_info->mem());
713         skip |= LogWarning(objlist, layer_name.c_str(), "VK Object %s still has a reference to mem obj %s.",
714                            report_data->FormatHandle(obj).c_str(), report_data->FormatHandle(mem_info->mem()).c_str());
715     }
716 
717     return skip;
718 }
719 
PreCallRecordFreeMemory(VkDevice device,VkDeviceMemory memory,const VkAllocationCallbacks * pAllocator)720 void BestPractices::PreCallRecordFreeMemory(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks* pAllocator) {
721     ValidationStateTracker::PreCallRecordFreeMemory(device, memory, pAllocator);
722     if (memory != VK_NULL_HANDLE) {
723         num_mem_objects--;
724     }
725 }
726 
ValidateBindBufferMemory(VkBuffer buffer,VkDeviceMemory memory,const char * api_name) const727 bool BestPractices::ValidateBindBufferMemory(VkBuffer buffer, VkDeviceMemory memory, const char* api_name) const {
728     bool skip = false;
729     const auto buffer_state = Get<BUFFER_STATE>(buffer);
730 
731     if (!buffer_state->memory_requirements_checked && !buffer_state->external_memory_handle) {
732         skip |= LogWarning(device, kVUID_BestPractices_BufferMemReqNotCalled,
733                            "%s: Binding memory to %s but vkGetBufferMemoryRequirements() has not been called on that buffer.",
734                            api_name, report_data->FormatHandle(buffer).c_str());
735     }
736 
737     const auto mem_state = Get<DEVICE_MEMORY_STATE>(memory);
738 
739     if (mem_state && mem_state->alloc_info.allocationSize == buffer_state->createInfo.size &&
740         mem_state->alloc_info.allocationSize < kMinDedicatedAllocationSize) {
741         skip |= LogPerformanceWarning(
742             device, kVUID_BestPractices_SmallDedicatedAllocation,
743             "%s: Trying to bind %s to a memory block which is fully consumed by the buffer. "
744             "The required size of the allocation is %" PRIu64 ", but smaller buffers like this should be sub-allocated from "
745             "larger memory blocks. (Current threshold is %" PRIu64 " bytes.)",
746             api_name, report_data->FormatHandle(buffer).c_str(), mem_state->alloc_info.allocationSize, kMinDedicatedAllocationSize);
747     }
748 
749     return skip;
750 }
751 
PreCallValidateBindBufferMemory(VkDevice device,VkBuffer buffer,VkDeviceMemory memory,VkDeviceSize memoryOffset) const752 bool BestPractices::PreCallValidateBindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,
753                                                     VkDeviceSize memoryOffset) const {
754     bool skip = false;
755     const char* api_name = "BindBufferMemory()";
756 
757     skip |= ValidateBindBufferMemory(buffer, memory, api_name);
758 
759     return skip;
760 }
761 
PreCallValidateBindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos) const762 bool BestPractices::PreCallValidateBindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
763                                                      const VkBindBufferMemoryInfo* pBindInfos) const {
764     char api_name[64];
765     bool skip = false;
766 
767     for (uint32_t i = 0; i < bindInfoCount; i++) {
768         sprintf(api_name, "vkBindBufferMemory2() pBindInfos[%u]", i);
769         skip |= ValidateBindBufferMemory(pBindInfos[i].buffer, pBindInfos[i].memory, api_name);
770     }
771 
772     return skip;
773 }
774 
PreCallValidateBindBufferMemory2KHR(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos) const775 bool BestPractices::PreCallValidateBindBufferMemory2KHR(VkDevice device, uint32_t bindInfoCount,
776                                                         const VkBindBufferMemoryInfo* pBindInfos) const {
777     char api_name[64];
778     bool skip = false;
779 
780     for (uint32_t i = 0; i < bindInfoCount; i++) {
781         sprintf(api_name, "vkBindBufferMemory2KHR() pBindInfos[%u]", i);
782         skip |= ValidateBindBufferMemory(pBindInfos[i].buffer, pBindInfos[i].memory, api_name);
783     }
784 
785     return skip;
786 }
787 
ValidateBindImageMemory(VkImage image,VkDeviceMemory memory,const char * api_name) const788 bool BestPractices::ValidateBindImageMemory(VkImage image, VkDeviceMemory memory, const char* api_name) const {
789     bool skip = false;
790     const auto image_state = Get<IMAGE_STATE>(image);
791 
792     if (image_state->disjoint == false) {
793         if (!image_state->memory_requirements_checked[0] && !image_state->external_memory_handle) {
794             skip |= LogWarning(device, kVUID_BestPractices_ImageMemReqNotCalled,
795                                "%s: Binding memory to %s but vkGetImageMemoryRequirements() has not been called on that image.",
796                                api_name, report_data->FormatHandle(image).c_str());
797         }
798     } else {
799         // TODO If binding disjoint image then this needs to check that VkImagePlaneMemoryRequirementsInfo was called for each
800         // plane.
801     }
802 
803     const auto mem_state = Get<DEVICE_MEMORY_STATE>(memory);
804 
805     if (mem_state->alloc_info.allocationSize == image_state->requirements[0].size &&
806         mem_state->alloc_info.allocationSize < kMinDedicatedAllocationSize) {
807         skip |= LogPerformanceWarning(
808             device, kVUID_BestPractices_SmallDedicatedAllocation,
809             "%s: Trying to bind %s to a memory block which is fully consumed by the image. "
810             "The required size of the allocation is %" PRIu64 ", but smaller images like this should be sub-allocated from "
811             "larger memory blocks. (Current threshold is %" PRIu64 " bytes.)",
812             api_name, report_data->FormatHandle(image).c_str(), mem_state->alloc_info.allocationSize, kMinDedicatedAllocationSize);
813     }
814 
815     // If we're binding memory to a image which was created as TRANSIENT and the image supports LAZY allocation,
816     // make sure this type is actually used.
817     // This warning will only trigger if this layer is run on a platform that supports LAZILY_ALLOCATED_BIT
818     // (i.e.most tile - based renderers)
819     if (image_state->createInfo.usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) {
820         bool supports_lazy = false;
821         uint32_t suggested_type = 0;
822 
823         for (uint32_t i = 0; i < phys_dev_mem_props.memoryTypeCount; i++) {
824             if ((1u << i) & image_state->requirements[0].memoryTypeBits) {
825                 if (phys_dev_mem_props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) {
826                     supports_lazy = true;
827                     suggested_type = i;
828                     break;
829                 }
830             }
831         }
832 
833         uint32_t allocated_properties = phys_dev_mem_props.memoryTypes[mem_state->alloc_info.memoryTypeIndex].propertyFlags;
834 
835         if (supports_lazy && (allocated_properties & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) == 0) {
836             skip |= LogPerformanceWarning(
837                 device, kVUID_BestPractices_NonLazyTransientImage,
838                 "%s: Attempting to bind memory type %u to VkImage which was created with TRANSIENT_ATTACHMENT_BIT,"
839                 "but this memory type is not LAZILY_ALLOCATED_BIT. You should use memory type %u here instead to save "
840                 "%" PRIu64 " bytes of physical memory.",
841                 api_name, mem_state->alloc_info.memoryTypeIndex, suggested_type, image_state->requirements[0].size);
842         }
843     }
844 
845     return skip;
846 }
847 
PreCallValidateBindImageMemory(VkDevice device,VkImage image,VkDeviceMemory memory,VkDeviceSize memoryOffset) const848 bool BestPractices::PreCallValidateBindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,
849                                                    VkDeviceSize memoryOffset) const {
850     bool skip = false;
851     const char* api_name = "vkBindImageMemory()";
852 
853     skip |= ValidateBindImageMemory(image, memory, api_name);
854 
855     return skip;
856 }
857 
PreCallValidateBindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos) const858 bool BestPractices::PreCallValidateBindImageMemory2(VkDevice device, uint32_t bindInfoCount,
859                                                     const VkBindImageMemoryInfo* pBindInfos) const {
860     char api_name[64];
861     bool skip = false;
862 
863     for (uint32_t i = 0; i < bindInfoCount; i++) {
864         sprintf(api_name, "vkBindImageMemory2() pBindInfos[%u]", i);
865         if (!LvlFindInChain<VkBindImageMemorySwapchainInfoKHR>(pBindInfos[i].pNext)) {
866             skip |= ValidateBindImageMemory(pBindInfos[i].image, pBindInfos[i].memory, api_name);
867         }
868     }
869 
870     return skip;
871 }
872 
PreCallValidateBindImageMemory2KHR(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos) const873 bool BestPractices::PreCallValidateBindImageMemory2KHR(VkDevice device, uint32_t bindInfoCount,
874                                                        const VkBindImageMemoryInfo* pBindInfos) const {
875     char api_name[64];
876     bool skip = false;
877 
878     for (uint32_t i = 0; i < bindInfoCount; i++) {
879         sprintf(api_name, "vkBindImageMemory2KHR() pBindInfos[%u]", i);
880         skip |= ValidateBindImageMemory(pBindInfos[i].image, pBindInfos[i].memory, api_name);
881     }
882 
883     return skip;
884 }
885 
FormatHasFullThroughputBlendingArm(VkFormat format)886 static inline bool FormatHasFullThroughputBlendingArm(VkFormat format) {
887     switch (format) {
888         case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
889         case VK_FORMAT_R16_SFLOAT:
890         case VK_FORMAT_R16G16_SFLOAT:
891         case VK_FORMAT_R16G16B16_SFLOAT:
892         case VK_FORMAT_R16G16B16A16_SFLOAT:
893         case VK_FORMAT_R32_SFLOAT:
894         case VK_FORMAT_R32G32_SFLOAT:
895         case VK_FORMAT_R32G32B32_SFLOAT:
896         case VK_FORMAT_R32G32B32A32_SFLOAT:
897             return false;
898 
899         default:
900             return true;
901     }
902 }
903 
ValidateMultisampledBlendingArm(uint32_t createInfoCount,const VkGraphicsPipelineCreateInfo * pCreateInfos) const904 bool BestPractices::ValidateMultisampledBlendingArm(uint32_t createInfoCount,
905                                                     const VkGraphicsPipelineCreateInfo* pCreateInfos) const {
906     bool skip = false;
907 
908     for (uint32_t i = 0; i < createInfoCount; i++) {
909         auto create_info = &pCreateInfos[i];
910 
911         if (!create_info->pColorBlendState || !create_info->pMultisampleState ||
912             create_info->pMultisampleState->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
913             create_info->pMultisampleState->sampleShadingEnable) {
914             return skip;
915         }
916 
917         auto rp_state = Get<RENDER_PASS_STATE>(create_info->renderPass);
918         const auto& subpass = rp_state->createInfo.pSubpasses[create_info->subpass];
919 
920         // According to spec, pColorBlendState must be ignored if subpass does not have color attachments.
921         uint32_t num_color_attachments = std::min(subpass.colorAttachmentCount, create_info->pColorBlendState->attachmentCount);
922 
923         for (uint32_t j = 0; j < num_color_attachments; j++) {
924             const auto& blend_att = create_info->pColorBlendState->pAttachments[j];
925             uint32_t att = subpass.pColorAttachments[j].attachment;
926 
927             if (att != VK_ATTACHMENT_UNUSED && blend_att.blendEnable && blend_att.colorWriteMask) {
928                 if (!FormatHasFullThroughputBlendingArm(rp_state->createInfo.pAttachments[att].format)) {
929                     skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MultisampledBlending,
930                                                   "%s vkCreateGraphicsPipelines() - createInfo #%u: Pipeline is multisampled and "
931                                                   "color attachment #%u makes use "
932                                                   "of a format which cannot be blended at full throughput when using MSAA.",
933                                                   VendorSpecificTag(kBPVendorArm), i, j);
934                 }
935             }
936         }
937     }
938 
939     return skip;
940 }
941 
ManualPostCallRecordCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * pipe_state)942 void BestPractices::ManualPostCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
943                                                          const VkComputePipelineCreateInfo* pCreateInfos,
944                                                          const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
945                                                          VkResult result, void* pipe_state) {
946     // AMD best practice
947     pipeline_cache = pipelineCache;
948 }
949 
PreCallValidateCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * cgpl_state_data) const950 bool BestPractices::PreCallValidateCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
951                                                            const VkGraphicsPipelineCreateInfo* pCreateInfos,
952                                                            const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
953                                                            void* cgpl_state_data) const {
954     bool skip = StateTracker::PreCallValidateCreateGraphicsPipelines(device, pipelineCache, createInfoCount, pCreateInfos,
955                                                                      pAllocator, pPipelines, cgpl_state_data);
956     create_graphics_pipeline_api_state* cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state*>(cgpl_state_data);
957 
958     if ((createInfoCount > 1) && (!pipelineCache)) {
959         skip |= LogPerformanceWarning(
960             device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
961             "Performance Warning: This vkCreateGraphicsPipelines call is creating multiple pipelines but is not using a "
962             "pipeline cache, which may help with performance");
963     }
964 
965     for (uint32_t i = 0; i < createInfoCount; i++) {
966         const auto& create_info = pCreateInfos[i];
967 
968         if (!(cgpl_state->pipe_state[i]->active_shaders & VK_SHADER_STAGE_MESH_BIT_NV)) {
969             const auto& vertex_input = *create_info.pVertexInputState;
970             uint32_t count = 0;
971             for (uint32_t j = 0; j < vertex_input.vertexBindingDescriptionCount; j++) {
972                 if (vertex_input.pVertexBindingDescriptions[j].inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
973                     count++;
974                 }
975             }
976             if (count > kMaxInstancedVertexBuffers) {
977                 skip |= LogPerformanceWarning(
978                     device, kVUID_BestPractices_CreatePipelines_TooManyInstancedVertexBuffers,
979                     "The pipeline is using %u instanced vertex buffers (current limit: %u), but this can be inefficient on the "
980                     "GPU. If using instanced vertex attributes prefer interleaving them in a single buffer.",
981                     count, kMaxInstancedVertexBuffers);
982             }
983         }
984 
985         if ((pCreateInfos[i].pRasterizationState->depthBiasEnable) &&
986             (pCreateInfos[i].pRasterizationState->depthBiasConstantFactor == 0.0f) &&
987             (pCreateInfos[i].pRasterizationState->depthBiasSlopeFactor == 0.0f) &&
988             VendorCheckEnabled(kBPVendorArm)) {
989             skip |= LogPerformanceWarning(
990                 device, kVUID_BestPractices_CreatePipelines_DepthBias_Zero,
991                 "%s Performance Warning: This vkCreateGraphicsPipelines call is created with depthBiasEnable set to true "
992                 "and both depthBiasConstantFactor and depthBiasSlopeFactor are set to 0. This can cause reduced "
993                 "efficiency during rasterization. Consider disabling depthBias or increasing either "
994                 "depthBiasConstantFactor or depthBiasSlopeFactor.",
995                 VendorSpecificTag(kBPVendorArm));
996         }
997 
998         skip |= VendorCheckEnabled(kBPVendorArm) && ValidateMultisampledBlendingArm(createInfoCount, pCreateInfos);
999     }
1000     if (VendorCheckEnabled(kBPVendorAMD)) {
1001         if (pipelineCache && pipeline_cache && pipelineCache != pipeline_cache) {
1002             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MultiplePipelineCaches,
1003                             "%s Performance Warning: A second pipeline cache is in use. Consider using only one pipeline cache to "
1004                             "improve cache hit rate", VendorSpecificTag(kBPVendorAMD));
1005         }
1006 
1007         if (num_pso > kMaxRecommendedNumberOfPSOAMD) {
1008             skip |=
1009                 LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_TooManyPipelines,
1010                                           "%s Performance warning: Too many pipelines created, consider consolidation",
1011                                           VendorSpecificTag(kBPVendorAMD));
1012         }
1013 
1014         if (pCreateInfos->pInputAssemblyState && pCreateInfos->pInputAssemblyState->primitiveRestartEnable) {
1015             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_AvoidPrimitiveRestart,
1016                                           "%s Performance warning: Use of primitive restart is not recommended",
1017                                           VendorSpecificTag(kBPVendorAMD));
1018         }
1019 
1020         // TODO: this might be too aggressive of a check
1021         if (pCreateInfos->pDynamicState && pCreateInfos->pDynamicState->dynamicStateCount > kDynamicStatesWarningLimitAMD) {
1022             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MinimizeNumDynamicStates,
1023                 "%s Performance warning: Dynamic States usage incurs a performance cost. Ensure that they are truly needed",
1024                 VendorSpecificTag(kBPVendorAMD));
1025         }
1026     }
1027 
1028     return skip;
1029 }
1030 
PreCallRecordDestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)1031 void BestPractices::PreCallRecordDestroyPipeline(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator)
1032 {
1033     auto itr = graphicsPipelineCIs.find(pipeline);
1034     if (itr != graphicsPipelineCIs.end()) {
1035         graphicsPipelineCIs.erase(itr);
1036     }
1037     ValidationStateTracker::PreCallRecordDestroyPipeline(device, pipeline, pAllocator);
1038 }
1039 
ManualPostCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * cgpl_state_data)1040 void BestPractices::ManualPostCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1041                                                                 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1042                                                                 const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
1043                                                                 VkResult result, void* cgpl_state_data) {
1044     for (size_t i = 0; i < count; i++) {
1045         const auto* cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state*>(cgpl_state_data);
1046         const VkPipeline pipeline_handle = pPipelines[i];
1047 
1048         // record depth stencil state and color blend states for depth pre-pass tracking purposes
1049         GraphicsPipelineCIs& cis = graphicsPipelineCIs[pipeline_handle];
1050 
1051         auto& create_info = cgpl_state->pCreateInfos[i];
1052 
1053         if (create_info.pColorBlendState) {
1054             cis.colorBlendStateCI.emplace(create_info.pColorBlendState);
1055         }
1056 
1057         if (create_info.pDepthStencilState) {
1058             cis.depthStencilStateCI.emplace(create_info.pDepthStencilState);
1059         }
1060         if (create_info.renderPass == VK_NULL_HANDLE) {
1061             // TODO: this is necessary to avoid crashing
1062             LogWarning(device, kVUID_BestPractices_DynamicRendering_NotSupported,
1063                        "vkCreateGraphicsPipelines: pCreateInfos[%" PRIu32 "].renderPass is VK_NULL_HANDLE, VK_KHR_dynamic_rendering is not supported.\n",
1064                        static_cast<uint32_t>(i));
1065             continue;
1066         }
1067         // Record which frame buffer attachments we should consider to be accessed when a draw call is performed.
1068         auto rp = Get<RENDER_PASS_STATE>(create_info.renderPass);
1069         auto& subpass = rp->createInfo.pSubpasses[create_info.subpass];
1070         cis.accessFramebufferAttachments.clear();
1071 
1072         if (cis.colorBlendStateCI) {
1073             // According to spec, pColorBlendState must be ignored if subpass does not have color attachments.
1074             uint32_t num_color_attachments = std::min(subpass.colorAttachmentCount, cis.colorBlendStateCI->attachmentCount);
1075             for (uint32_t j = 0; j < num_color_attachments; j++) {
1076                 if (cis.colorBlendStateCI->pAttachments[j].colorWriteMask != 0) {
1077                     uint32_t attachment = subpass.pColorAttachments[j].attachment;
1078                     if (attachment != VK_ATTACHMENT_UNUSED) {
1079                         cis.accessFramebufferAttachments.push_back({ attachment, VK_IMAGE_ASPECT_COLOR_BIT });
1080                     }
1081                 }
1082             }
1083         }
1084 
1085         if (cis.depthStencilStateCI && (cis.depthStencilStateCI->depthTestEnable ||
1086                                         cis.depthStencilStateCI->depthBoundsTestEnable ||
1087                                         cis.depthStencilStateCI->stencilTestEnable)) {
1088             uint32_t attachment = subpass.pDepthStencilAttachment ?
1089                                   subpass.pDepthStencilAttachment->attachment :
1090                                   VK_ATTACHMENT_UNUSED;
1091             if (attachment != VK_ATTACHMENT_UNUSED) {
1092                 VkImageAspectFlags aspects = 0;
1093                 if (cis.depthStencilStateCI->depthTestEnable || cis.depthStencilStateCI->depthBoundsTestEnable) {
1094                     aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1095                 }
1096                 if (cis.depthStencilStateCI->stencilTestEnable) {
1097                     aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1098                 }
1099                 cis.accessFramebufferAttachments.push_back({ attachment, aspects });
1100             }
1101         }
1102     }
1103 
1104     // AMD best practice
1105     pipeline_cache = pipelineCache;
1106 }
1107 
PreCallValidateCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * ccpl_state_data) const1108 bool BestPractices::PreCallValidateCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
1109                                                           const VkComputePipelineCreateInfo* pCreateInfos,
1110                                                           const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
1111                                                           void* ccpl_state_data) const {
1112     bool skip = StateTracker::PreCallValidateCreateComputePipelines(device, pipelineCache, createInfoCount, pCreateInfos,
1113                                                                     pAllocator, pPipelines, ccpl_state_data);
1114 
1115     if ((createInfoCount > 1) && (!pipelineCache)) {
1116         skip |= LogPerformanceWarning(
1117             device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
1118             "Performance Warning: This vkCreateComputePipelines call is creating multiple pipelines but is not using a "
1119             "pipeline cache, which may help with performance");
1120     }
1121 
1122     if (VendorCheckEnabled(kBPVendorAMD)) {
1123         if (pipelineCache && pipeline_cache && pipelineCache != pipeline_cache) {
1124             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
1125                             "%s Performance Warning: A second pipeline cache is in use. Consider using only one pipeline cache to "
1126                 "improve cache hit rate",
1127                 VendorSpecificTag(kBPVendorAMD));
1128 		}
1129 	}
1130 
1131     if (VendorCheckEnabled(kBPVendorArm)) {
1132         for (size_t i = 0; i < createInfoCount; i++) {
1133             skip |= ValidateCreateComputePipelineArm(pCreateInfos[i]);
1134         }
1135     }
1136 
1137     return skip;
1138 }
1139 
ValidateCreateComputePipelineArm(const VkComputePipelineCreateInfo & createInfo) const1140 bool BestPractices::ValidateCreateComputePipelineArm(const VkComputePipelineCreateInfo& createInfo) const {
1141     bool skip = false;
1142     auto module = Get<SHADER_MODULE_STATE>(createInfo.stage.module);
1143     // Generate warnings about work group sizes based on active resources.
1144     auto entrypoint = module->FindEntrypoint(createInfo.stage.pName, createInfo.stage.stage);
1145     if (entrypoint == module->end()) return false;
1146 
1147     uint32_t x = 1, y = 1, z = 1;
1148     module->FindLocalSize(entrypoint, x, y, z);
1149 
1150     uint32_t thread_count = x * y * z;
1151 
1152     // Generate a priori warnings about work group sizes.
1153     if (thread_count > kMaxEfficientWorkGroupThreadCountArm) {
1154         skip |= LogPerformanceWarning(
1155             device, kVUID_BestPractices_CreateComputePipelines_ComputeWorkGroupSize,
1156             "%s vkCreateComputePipelines(): compute shader with work group dimensions (%u, %u, "
1157             "%u) (%u threads total), has more threads than advised in a single work group. It is advised to use work "
1158             "groups with less than %u threads, especially when using barrier() or shared memory.",
1159             VendorSpecificTag(kBPVendorArm), x, y, z, thread_count, kMaxEfficientWorkGroupThreadCountArm);
1160     }
1161 
1162     if (thread_count == 1 || ((x > 1) && (x & (kThreadGroupDispatchCountAlignmentArm - 1))) ||
1163         ((y > 1) && (y & (kThreadGroupDispatchCountAlignmentArm - 1))) ||
1164         ((z > 1) && (z & (kThreadGroupDispatchCountAlignmentArm - 1)))) {
1165         skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreateComputePipelines_ComputeThreadGroupAlignment,
1166                                       "%s vkCreateComputePipelines(): compute shader with work group dimensions (%u, "
1167                                       "%u, %u) is not aligned to %u "
1168                                       "threads. On Arm Mali architectures, not aligning work group sizes to %u may "
1169                                       "leave threads idle on the shader "
1170                                       "core.",
1171                                       VendorSpecificTag(kBPVendorArm), x, y, z, kThreadGroupDispatchCountAlignmentArm,
1172                                       kThreadGroupDispatchCountAlignmentArm);
1173     }
1174 
1175     auto accessible_ids = module->MarkAccessibleIds(entrypoint);
1176     auto descriptor_uses = module->CollectInterfaceByDescriptorSlot(accessible_ids);
1177 
1178     unsigned dimensions = 0;
1179     if (x > 1) dimensions++;
1180     if (y > 1) dimensions++;
1181     if (z > 1) dimensions++;
1182     // Here the dimension will really depend on the dispatch grid, but assume it's 1D.
1183     dimensions = std::max(dimensions, 1u);
1184 
1185     // If we're accessing images, we almost certainly want to have a 2D workgroup for cache reasons.
1186     // There are some false positives here. We could simply have a shader that does this within a 1D grid,
1187     // or we may have a linearly tiled image, but these cases are quite unlikely in practice.
1188     bool accesses_2d = false;
1189     for (const auto& usage : descriptor_uses) {
1190         auto dim = module->GetShaderResourceDimensionality(usage.second);
1191         if (dim < 0) continue;
1192         auto spvdim = spv::Dim(dim);
1193         if (spvdim != spv::Dim1D && spvdim != spv::DimBuffer) accesses_2d = true;
1194     }
1195 
1196     if (accesses_2d && dimensions < 2) {
1197         LogPerformanceWarning(device, kVUID_BestPractices_CreateComputePipelines_ComputeSpatialLocality,
1198                               "%s vkCreateComputePipelines(): compute shader has work group dimensions (%u, %u, %u), which "
1199                               "suggests a 1D dispatch, but the shader is accessing 2D or 3D images. The shader may be "
1200                               "exhibiting poor spatial locality with respect to one or more shader resources.",
1201                               VendorSpecificTag(kBPVendorArm), x, y, z);
1202     }
1203 
1204     return skip;
1205 }
1206 
CheckPipelineStageFlags(const std::string & api_name,VkPipelineStageFlags flags) const1207 bool BestPractices::CheckPipelineStageFlags(const std::string& api_name, VkPipelineStageFlags flags) const {
1208     bool skip = false;
1209 
1210     if (flags & VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT) {
1211         skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
1212                            "You are using VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT when %s is called\n", api_name.c_str());
1213     } else if (flags & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
1214         skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
1215                            "You are using VK_PIPELINE_STAGE_ALL_COMMANDS_BIT when %s is called\n", api_name.c_str());
1216     }
1217 
1218     return skip;
1219 }
1220 
CheckPipelineStageFlags(const std::string & api_name,VkPipelineStageFlags2KHR flags) const1221 bool BestPractices::CheckPipelineStageFlags(const std::string& api_name, VkPipelineStageFlags2KHR flags) const {
1222     bool skip = false;
1223 
1224     if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR) {
1225         skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
1226                            "You are using VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR when %s is called\n", api_name.c_str());
1227     } else if (flags & VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR) {
1228         skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
1229                            "You are using VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR when %s is called\n", api_name.c_str());
1230     }
1231 
1232     return skip;
1233 }
1234 
CheckDependencyInfo(const std::string & api_name,const VkDependencyInfoKHR & dep_info) const1235 bool BestPractices::CheckDependencyInfo(const std::string& api_name, const VkDependencyInfoKHR& dep_info) const {
1236     bool skip = false;
1237     auto stage_masks = sync_utils::GetGlobalStageMasks(dep_info);
1238 
1239     skip |= CheckPipelineStageFlags(api_name, stage_masks.src);
1240     skip |= CheckPipelineStageFlags(api_name, stage_masks.dst);
1241 
1242     return skip;
1243 }
1244 
ManualPostCallRecordQueuePresentKHR(VkQueue queue,const VkPresentInfoKHR * pPresentInfo,VkResult result)1245 void BestPractices::ManualPostCallRecordQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo, VkResult result) {
1246     for (uint32_t i = 0; i < pPresentInfo->swapchainCount; ++i) {
1247         auto swapchains_result = pPresentInfo->pResults ? pPresentInfo->pResults[i] : result;
1248         if (swapchains_result == VK_SUBOPTIMAL_KHR) {
1249             LogPerformanceWarning(
1250                 pPresentInfo->pSwapchains[i], kVUID_BestPractices_SuboptimalSwapchain,
1251                 "vkQueuePresentKHR: %s :VK_SUBOPTIMAL_KHR was returned. VK_SUBOPTIMAL_KHR - Presentation will still succeed, "
1252                 "subject to the window resize behavior, but the swapchain is no longer configured optimally for the surface it "
1253                 "targets. Applications should query updated surface information and recreate their swapchain at the next "
1254                 "convenient opportunity.",
1255                 report_data->FormatHandle(pPresentInfo->pSwapchains[i]).c_str());
1256         }
1257     }
1258 
1259     // AMD best practice
1260     // end-of-frame cleanup
1261     num_queue_submissions = 0;
1262     num_barriers_objects = 0;
1263     pipelines_used_in_frame.clear();
1264 }
1265 
PreCallValidateQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence) const1266 bool BestPractices::PreCallValidateQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits,
1267                                                VkFence fence) const {
1268     bool skip = false;
1269 
1270     for (uint32_t submit = 0; submit < submitCount; submit++) {
1271         for (uint32_t semaphore = 0; semaphore < pSubmits[submit].waitSemaphoreCount; semaphore++) {
1272             skip |= CheckPipelineStageFlags("vkQueueSubmit", pSubmits[submit].pWaitDstStageMask[semaphore]);
1273         }
1274     }
1275 
1276     return skip;
1277 }
1278 
PreCallValidateQueueSubmit2KHR(VkQueue queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence fence) const1279 bool BestPractices::PreCallValidateQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits,
1280                                                    VkFence fence) const {
1281     bool skip = false;
1282 
1283     for (uint32_t submit = 0; submit < submitCount; submit++) {
1284         for (uint32_t semaphore = 0; semaphore < pSubmits[submit].waitSemaphoreInfoCount; semaphore++) {
1285             skip |= CheckPipelineStageFlags("vkQueueSubmit2KHR", pSubmits[submit].pWaitSemaphoreInfos[semaphore].stageMask);
1286         }
1287     }
1288 
1289     return skip;
1290 }
1291 
PreCallValidateCreateCommandPool(VkDevice device,const VkCommandPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkCommandPool * pCommandPool) const1292 bool BestPractices::PreCallValidateCreateCommandPool(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo,
1293                                                      const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool) const {
1294     bool skip = false;
1295 
1296     if (pCreateInfo->flags & VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT) {
1297         skip |= LogPerformanceWarning(
1298             device, kVUID_BestPractices_CreateCommandPool_CommandBufferReset,
1299             "vkCreateCommandPool(): VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT is set. Consider resetting entire "
1300             "pool instead.");
1301     }
1302 
1303     return skip;
1304 }
1305 
PreCallValidateBeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo) const1306 bool BestPractices::PreCallValidateBeginCommandBuffer(VkCommandBuffer commandBuffer,
1307                                                       const VkCommandBufferBeginInfo* pBeginInfo) const {
1308     bool skip = false;
1309 
1310     if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
1311         skip |= LogPerformanceWarning(device, kVUID_BestPractices_BeginCommandBuffer_SimultaneousUse,
1312                                       "vkBeginCommandBuffer(): VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT is set.");
1313     }
1314 
1315     if (!(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) && VendorCheckEnabled(kBPVendorArm)) {
1316         skip |= LogPerformanceWarning(device, kVUID_BestPractices_BeginCommandBuffer_OneTimeSubmit,
1317                                       "%s vkBeginCommandBuffer(): VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT is not set. "
1318                                       "For best performance on Mali GPUs, consider setting ONE_TIME_SUBMIT by default.",
1319                                       VendorSpecificTag(kBPVendorArm));
1320     }
1321 
1322     return skip;
1323 }
1324 
PreCallValidateCmdSetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask) const1325 bool BestPractices::PreCallValidateCmdSetEvent(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) const {
1326     bool skip = false;
1327 
1328     skip |= CheckPipelineStageFlags("vkCmdSetEvent", stageMask);
1329 
1330     return skip;
1331 }
1332 
PreCallValidateCmdSetEvent2KHR(VkCommandBuffer commandBuffer,VkEvent event,const VkDependencyInfoKHR * pDependencyInfo) const1333 bool BestPractices::PreCallValidateCmdSetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event,
1334                                                    const VkDependencyInfoKHR* pDependencyInfo) const {
1335     return CheckDependencyInfo("vkCmdSetEvent2KHR", *pDependencyInfo);
1336 }
1337 
PreCallValidateCmdResetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask) const1338 bool BestPractices::PreCallValidateCmdResetEvent(VkCommandBuffer commandBuffer, VkEvent event,
1339                                                  VkPipelineStageFlags stageMask) const {
1340     bool skip = false;
1341 
1342     skip |= CheckPipelineStageFlags("vkCmdResetEvent", stageMask);
1343 
1344     return skip;
1345 }
1346 
PreCallValidateCmdResetEvent2KHR(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags2KHR stageMask) const1347 bool BestPractices::PreCallValidateCmdResetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event,
1348                                                      VkPipelineStageFlags2KHR stageMask) const {
1349     bool skip = false;
1350 
1351     skip |= CheckPipelineStageFlags("vkCmdResetEvent2KHR", stageMask);
1352 
1353     return skip;
1354 }
1355 
PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers) const1356 bool BestPractices::PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
1357                                                  VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
1358                                                  uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
1359                                                  uint32_t bufferMemoryBarrierCount,
1360                                                  const VkBufferMemoryBarrier* pBufferMemoryBarriers,
1361                                                  uint32_t imageMemoryBarrierCount,
1362                                                  const VkImageMemoryBarrier* pImageMemoryBarriers) const {
1363     bool skip = false;
1364 
1365     skip |= CheckPipelineStageFlags("vkCmdWaitEvents", srcStageMask);
1366     skip |= CheckPipelineStageFlags("vkCmdWaitEvents", dstStageMask);
1367 
1368     return skip;
1369 }
1370 
PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfoKHR * pDependencyInfos) const1371 bool BestPractices::PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
1372                                                      const VkDependencyInfoKHR* pDependencyInfos) const {
1373     bool skip = false;
1374     for (uint32_t i = 0; i < eventCount; i++) {
1375         skip = CheckDependencyInfo("vkCmdWaitEvents2KHR", pDependencyInfos[i]);
1376     }
1377 
1378     return skip;
1379 }
1380 
PreCallValidateCmdPipelineBarrier(VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkDependencyFlags dependencyFlags,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers) const1381 bool BestPractices::PreCallValidateCmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
1382                                                       VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags,
1383                                                       uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
1384                                                       uint32_t bufferMemoryBarrierCount,
1385                                                       const VkBufferMemoryBarrier* pBufferMemoryBarriers,
1386                                                       uint32_t imageMemoryBarrierCount,
1387                                                       const VkImageMemoryBarrier* pImageMemoryBarriers) const {
1388     bool skip = false;
1389 
1390     skip |= CheckPipelineStageFlags("vkCmdPipelineBarrier", srcStageMask);
1391     skip |= CheckPipelineStageFlags("vkCmdPipelineBarrier", dstStageMask);
1392 
1393     if (VendorCheckEnabled(kBPVendorAMD)) {
1394         if (num_barriers_objects + imageMemoryBarrierCount + bufferMemoryBarrierCount > kMaxRecommendedBarriersSizeAMD) {
1395             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CmdBuffer_highBarrierCount,
1396                         "%s Performance warning: In this frame, %" PRIu32 " barriers were already submitted. Barriers have a high cost and can "
1397                         "stall the GPU. "
1398                         "Consider consolidating and re-organizing the frame to use fewer barriers.",
1399                         VendorSpecificTag(kBPVendorAMD), num_barriers_objects);
1400         }
1401 
1402         std::array<VkImageLayout, 3> read_layouts = {
1403             VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1404             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1405             VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1406         };
1407 
1408         for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
1409             // read to read barriers
1410             auto found = std::find(read_layouts.begin(), read_layouts.end(), pImageMemoryBarriers[i].oldLayout);
1411             bool old_is_read_layout = found != read_layouts.end();
1412             found = std::find(read_layouts.begin(), read_layouts.end(), pImageMemoryBarriers[i].newLayout);
1413             bool new_is_read_layout = found != read_layouts.end();
1414             if (old_is_read_layout && new_is_read_layout) {
1415                 skip |= LogPerformanceWarning(device, kVUID_BestPractices_PipelineBarrier_readToReadBarrier,
1416                             "%s Performance warning: Don't issue read-to-read barriers. Get the resource in the right state the first "
1417                     "time you use it.",
1418                     VendorSpecificTag(kBPVendorAMD));
1419             }
1420 
1421             // general with no storage
1422             if (pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_GENERAL) {
1423                 auto image_state = Get<IMAGE_STATE>(pImageMemoryBarriers[i].image);
1424                 if (!(image_state->createInfo.usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1425                     skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_AvoidGeneral,
1426                                                   "%s Performance warning: VK_IMAGE_LAYOUT_GENERAL should only be used with "
1427                                                   "VK_IMAGE_USAGE_STORAGE_BIT images.",
1428                                                   VendorSpecificTag(kBPVendorAMD));
1429                 }
1430             }
1431         }
1432     }
1433 
1434     return skip;
1435 }
1436 
PreCallValidateCmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,const VkDependencyInfoKHR * pDependencyInfo) const1437 bool BestPractices::PreCallValidateCmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
1438                                                           const VkDependencyInfoKHR* pDependencyInfo) const {
1439     return CheckDependencyInfo("vkCmdPipelineBarrier2KHR", *pDependencyInfo);
1440 }
1441 
PreCallValidateCmdWriteTimestamp(VkCommandBuffer commandBuffer,VkPipelineStageFlagBits pipelineStage,VkQueryPool queryPool,uint32_t query) const1442 bool BestPractices::PreCallValidateCmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
1443                                                      VkQueryPool queryPool, uint32_t query) const {
1444     bool skip = false;
1445 
1446     skip |= CheckPipelineStageFlags("vkCmdWriteTimestamp", static_cast<VkPipelineStageFlags>(pipelineStage));
1447 
1448     return skip;
1449 }
1450 
PreCallValidateCmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer,VkPipelineStageFlags2KHR pipelineStage,VkQueryPool queryPool,uint32_t query) const1451 bool BestPractices::PreCallValidateCmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer, VkPipelineStageFlags2KHR pipelineStage,
1452                                                          VkQueryPool queryPool, uint32_t query) const {
1453     bool skip = false;
1454 
1455     skip |= CheckPipelineStageFlags("vkCmdWriteTimestamp2KHR", pipelineStage);
1456 
1457     return skip;
1458 }
1459 
PostCallRecordCmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipeline)1460 void BestPractices::PostCallRecordCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1461                                                   VkPipeline pipeline) {
1462     StateTracker::PostCallRecordCmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
1463 
1464     // AMD best practice
1465     pipelines_used_in_frame.emplace(pipeline);
1466 
1467     if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1468         // check for depth/blend state tracking
1469         auto gp_cis = graphicsPipelineCIs.find(pipeline);
1470         if (gp_cis != graphicsPipelineCIs.end()) {
1471             auto cb_node = GetCBState(commandBuffer);
1472             assert(cb_node);
1473             auto& render_pass_state = cb_node->render_pass_state;
1474 
1475             render_pass_state.nextDrawTouchesAttachments = gp_cis->second.accessFramebufferAttachments;
1476             render_pass_state.drawTouchAttachments = true;
1477 
1478             const auto& blend_state = gp_cis->second.colorBlendStateCI;
1479             const auto& stencil_state = gp_cis->second.depthStencilStateCI;
1480 
1481             if (blend_state) {
1482                 // assume the pipeline is depth-only unless any of the attachments have color writes enabled
1483                 render_pass_state.depthOnly = true;
1484                 for (size_t i = 0; i < blend_state->attachmentCount; i++) {
1485                     if (blend_state->pAttachments[i].colorWriteMask != 0) {
1486                         render_pass_state.depthOnly = false;
1487                     }
1488                 }
1489             }
1490 
1491             // check for depth value usage
1492             render_pass_state.depthEqualComparison = false;
1493 
1494             if (stencil_state && stencil_state->depthTestEnable) {
1495                 switch (stencil_state->depthCompareOp) {
1496                     case VK_COMPARE_OP_EQUAL:
1497                     case VK_COMPARE_OP_GREATER_OR_EQUAL:
1498                     case VK_COMPARE_OP_LESS_OR_EQUAL:
1499                         render_pass_state.depthEqualComparison = true;
1500                         break;
1501                     default:
1502                         break;
1503                 }
1504             }
1505         }
1506     }
1507 }
1508 
RenderPassUsesAttachmentAsResolve(const safe_VkRenderPassCreateInfo2 & createInfo,uint32_t attachment)1509 static inline bool RenderPassUsesAttachmentAsResolve(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
1510     for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
1511         const auto& subpass_info = createInfo.pSubpasses[subpass];
1512         if (subpass_info.pResolveAttachments) {
1513             for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
1514                 if (subpass_info.pResolveAttachments[i].attachment == attachment) return true;
1515             }
1516         }
1517     }
1518 
1519     return false;
1520 }
1521 
RenderPassUsesAttachmentOnTile(const safe_VkRenderPassCreateInfo2 & createInfo,uint32_t attachment)1522 static inline bool RenderPassUsesAttachmentOnTile(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
1523     for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
1524         const auto& subpass_info = createInfo.pSubpasses[subpass];
1525 
1526         // If an attachment is ever used as a color attachment,
1527         // resolve attachment or depth stencil attachment,
1528         // it needs to exist on tile at some point.
1529 
1530         for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
1531             if (subpass_info.pColorAttachments[i].attachment == attachment) return true;
1532         }
1533 
1534         if (subpass_info.pResolveAttachments) {
1535             for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
1536                 if (subpass_info.pResolveAttachments[i].attachment == attachment) return true;
1537             }
1538         }
1539 
1540         if (subpass_info.pDepthStencilAttachment && subpass_info.pDepthStencilAttachment->attachment == attachment) return true;
1541     }
1542 
1543     return false;
1544 }
1545 
RenderPassUsesAttachmentAsImageOnly(const safe_VkRenderPassCreateInfo2 & createInfo,uint32_t attachment)1546 static inline bool RenderPassUsesAttachmentAsImageOnly(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
1547     if (RenderPassUsesAttachmentOnTile(createInfo, attachment)) {
1548         return false;
1549     }
1550 
1551     for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
1552         const auto& subpassInfo = createInfo.pSubpasses[subpass];
1553 
1554         for (uint32_t i = 0; i < subpassInfo.inputAttachmentCount; i++) {
1555             if (subpassInfo.pInputAttachments[i].attachment == attachment) {
1556                 return true;
1557             }
1558         }
1559     }
1560 
1561     return false;
1562 }
1563 
ValidateCmdBeginRenderPass(VkCommandBuffer commandBuffer,RenderPassCreateVersion rp_version,const VkRenderPassBeginInfo * pRenderPassBegin) const1564 bool BestPractices::ValidateCmdBeginRenderPass(VkCommandBuffer commandBuffer, RenderPassCreateVersion rp_version,
1565                                                const VkRenderPassBeginInfo* pRenderPassBegin) const {
1566     bool skip = false;
1567 
1568     if (!pRenderPassBegin) {
1569         return skip;
1570     }
1571 
1572     if (pRenderPassBegin->renderArea.extent.width == 0 || pRenderPassBegin->renderArea.extent.height == 0) {
1573         skip |= LogWarning(device, kVUID_BestPractices_BeginRenderPass_ZeroSizeRenderArea,
1574                            "This render pass has a zero-size render area. It cannot write to any attachments, "
1575                            "and can only be used for side effects such as layout transitions.");
1576     }
1577 
1578     auto rp_state = Get<RENDER_PASS_STATE>(pRenderPassBegin->renderPass);
1579     if (rp_state) {
1580         if (rp_state->createInfo.flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT) {
1581             const VkRenderPassAttachmentBeginInfo* rpabi = LvlFindInChain<VkRenderPassAttachmentBeginInfo>(pRenderPassBegin->pNext);
1582             if (rpabi) {
1583                 skip = ValidateAttachments(rp_state->createInfo.ptr(), rpabi->attachmentCount, rpabi->pAttachments);
1584             }
1585         }
1586         // Check if any attachments have LOAD operation on them
1587         for (uint32_t att = 0; att < rp_state->createInfo.attachmentCount; att++) {
1588             const auto& attachment = rp_state->createInfo.pAttachments[att];
1589 
1590             bool attachment_has_readback = false;
1591             if (!FormatIsStencilOnly(attachment.format) && attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1592                 attachment_has_readback = true;
1593             }
1594 
1595             if (FormatHasStencil(attachment.format) && attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1596                 attachment_has_readback = true;
1597             }
1598 
1599             bool attachment_needs_readback = false;
1600 
1601             // Check if the attachment is actually used in any subpass on-tile
1602             if (attachment_has_readback && RenderPassUsesAttachmentOnTile(rp_state->createInfo, att)) {
1603                 attachment_needs_readback = true;
1604             }
1605 
1606             // Using LOAD_OP_LOAD is expensive on tiled GPUs, so flag it as a potential improvement
1607             if (attachment_needs_readback && VendorCheckEnabled(kBPVendorArm)) {
1608                 skip |= LogPerformanceWarning(
1609                     device, kVUID_BestPractices_BeginRenderPass_AttachmentNeedsReadback,
1610                     "%s Attachment #%u in render pass has begun with VK_ATTACHMENT_LOAD_OP_LOAD.\n"
1611                     "Submitting this renderpass will cause the driver to inject a readback of the attachment "
1612                                           "which will copy in total %u pixels (renderArea = "
1613                     "{ %" PRId32 ", %" PRId32 ", %" PRIu32", %" PRIu32 " }) to the tile buffer.",
1614                     VendorSpecificTag(kBPVendorArm), att,
1615                     pRenderPassBegin->renderArea.extent.width * pRenderPassBegin->renderArea.extent.height,
1616                     pRenderPassBegin->renderArea.offset.x, pRenderPassBegin->renderArea.offset.y,
1617                     pRenderPassBegin->renderArea.extent.width, pRenderPassBegin->renderArea.extent.height);
1618             }
1619         }
1620     }
1621 
1622     return skip;
1623 }
1624 
QueueValidateImageView(QueueCallbacks & funcs,const char * function_name,IMAGE_VIEW_STATE * view,IMAGE_SUBRESOURCE_USAGE_BP usage)1625 void BestPractices::QueueValidateImageView(QueueCallbacks &funcs, const char* function_name,
1626                                            IMAGE_VIEW_STATE* view, IMAGE_SUBRESOURCE_USAGE_BP usage) {
1627     if (view) {
1628         QueueValidateImage(funcs, function_name, GetImageUsageState(view->create_info.image), usage,
1629                            view->normalized_subresource_range);
1630     }
1631 }
1632 
QueueValidateImage(QueueCallbacks & funcs,const char * function_name,IMAGE_STATE_BP * state,IMAGE_SUBRESOURCE_USAGE_BP usage,const VkImageSubresourceRange & subresource_range)1633 void BestPractices::QueueValidateImage(QueueCallbacks &funcs, const char* function_name,
1634                                        IMAGE_STATE_BP* state, IMAGE_SUBRESOURCE_USAGE_BP usage,
1635                                        const VkImageSubresourceRange& subresource_range) {
1636     IMAGE_STATE* image = state->image;
1637 
1638     // If we're viewing a 3D slice, ignore base array layer.
1639     // The entire 3D subresource is accessed as one atomic unit.
1640     const uint32_t base_array_layer = image->createInfo.imageType == VK_IMAGE_TYPE_3D ? 0 : subresource_range.baseArrayLayer;
1641 
1642     const uint32_t max_layers = image->createInfo.arrayLayers - base_array_layer;
1643     const uint32_t array_layers = std::min(subresource_range.layerCount, max_layers);
1644     const uint32_t max_levels = image->createInfo.mipLevels - subresource_range.baseMipLevel;
1645     const uint32_t mip_levels = std::min(image->createInfo.mipLevels, max_levels);
1646 
1647     for (uint32_t layer = 0; layer < array_layers; layer++) {
1648         for (uint32_t level = 0; level < mip_levels; level++) {
1649             QueueValidateImage(funcs, function_name, state, usage, layer + base_array_layer,
1650                                level + subresource_range.baseMipLevel);
1651         }
1652     }
1653 }
1654 
QueueValidateImage(QueueCallbacks & funcs,const char * function_name,IMAGE_STATE_BP * state,IMAGE_SUBRESOURCE_USAGE_BP usage,const VkImageSubresourceLayers & subresource_layers)1655 void BestPractices::QueueValidateImage(QueueCallbacks &funcs, const char* function_name,
1656                                        IMAGE_STATE_BP* state, IMAGE_SUBRESOURCE_USAGE_BP usage,
1657                                        const VkImageSubresourceLayers& subresource_layers) {
1658     IMAGE_STATE* image = state->image;
1659     const uint32_t max_layers = image->createInfo.arrayLayers - subresource_layers.baseArrayLayer;
1660     const uint32_t array_layers = std::min(subresource_layers.layerCount, max_layers);
1661 
1662     for (uint32_t layer = 0; layer < array_layers; layer++) {
1663         QueueValidateImage(funcs, function_name, state, usage, layer + subresource_layers.baseArrayLayer, subresource_layers.mipLevel);
1664     }
1665 }
1666 
QueueValidateImage(QueueCallbacks & funcs,const char * function_name,IMAGE_STATE_BP * state,IMAGE_SUBRESOURCE_USAGE_BP usage,uint32_t array_layer,uint32_t mip_level)1667 void BestPractices::QueueValidateImage(QueueCallbacks &funcs, const char* function_name,
1668                                        IMAGE_STATE_BP* state, IMAGE_SUBRESOURCE_USAGE_BP usage,
1669                                        uint32_t array_layer, uint32_t mip_level) {
1670     funcs.push_back([this, function_name, state, usage, array_layer, mip_level](const ValidationStateTracker&, const QUEUE_STATE&,
1671                                                                                 const CMD_BUFFER_STATE&) -> bool {
1672         ValidateImageInQueue(function_name, state, usage, array_layer, mip_level);
1673         return false;
1674     });
1675 }
1676 
ValidateImageInQueueArm(const char * function_name,IMAGE_STATE * image,IMAGE_SUBRESOURCE_USAGE_BP last_usage,IMAGE_SUBRESOURCE_USAGE_BP usage,uint32_t array_layer,uint32_t mip_level)1677 void BestPractices::ValidateImageInQueueArm(const char* function_name, IMAGE_STATE* image,
1678                                             IMAGE_SUBRESOURCE_USAGE_BP last_usage,
1679                                             IMAGE_SUBRESOURCE_USAGE_BP usage,
1680                                             uint32_t array_layer, uint32_t mip_level) {
1681     // Swapchain images are implicitly read so clear after store is expected.
1682     if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_STORED &&
1683         !image->IsSwapchainImage()) {
1684         LogPerformanceWarning(
1685             device, kVUID_BestPractices_RenderPass_RedundantStore,
1686             "%s: %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time "
1687             "image was used, it was written to with STORE_OP_STORE. "
1688             "Storing to the image is probably redundant in this case, and wastes bandwidth on tile-based "
1689             "architectures.",
1690             function_name, VendorSpecificTag(kBPVendorArm), array_layer, mip_level);
1691     } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED) {
1692         LogPerformanceWarning(
1693             device, kVUID_BestPractices_RenderPass_RedundantClear,
1694             "%s: %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time "
1695             "image was used, it was written to with vkCmdClear*Image(). "
1696             "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on "
1697             "tile-based architectures."
1698             "architectures.",
1699             function_name, VendorSpecificTag(kBPVendorArm), array_layer, mip_level);
1700     } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE &&
1701                (last_usage == IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE ||
1702                 last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED ||
1703                 last_usage == IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE ||
1704                 last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE)) {
1705         const char *last_cmd = nullptr;
1706         const char *vuid = nullptr;
1707         const char *suggestion = nullptr;
1708 
1709         switch (last_usage) {
1710             case IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE:
1711                 vuid = kVUID_BestPractices_RenderPass_BlitImage_LoadOpLoad;
1712                 last_cmd = "vkCmdBlitImage";
1713                 suggestion =
1714                     "The blit is probably redundant in this case, and wastes bandwidth on tile-based architectures. "
1715                     "Rather than blitting, just render the source image in a fragment shader in this render pass, "
1716                     "which avoids the memory roundtrip.";
1717                 break;
1718             case IMAGE_SUBRESOURCE_USAGE_BP::CLEARED:
1719                 vuid = kVUID_BestPractices_RenderPass_InefficientClear;
1720                 last_cmd = "vkCmdClear*Image";
1721                 suggestion =
1722                     "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on "
1723                     "tile-based architectures. "
1724                     "Use LOAD_OP_CLEAR instead to clear the image for free.";
1725                 break;
1726             case IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE:
1727                 vuid = kVUID_BestPractices_RenderPass_CopyImage_LoadOpLoad;
1728                 last_cmd = "vkCmdCopy*Image";
1729                 suggestion =
1730                     "The copy is probably redundant in this case, and wastes bandwidth on tile-based architectures. "
1731                     "Rather than copying, just render the source image in a fragment shader in this render pass, "
1732                     "which avoids the memory roundtrip.";
1733                 break;
1734             case IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE:
1735                 vuid = kVUID_BestPractices_RenderPass_ResolveImage_LoadOpLoad;
1736                 last_cmd = "vkCmdResolveImage";
1737                 suggestion =
1738                     "The resolve is probably redundant in this case, and wastes a lot of bandwidth on tile-based architectures. "
1739                     "Rather than resolving, and then loading, try to keep rendering in the same render pass, "
1740                     "which avoids the memory roundtrip.";
1741                 break;
1742             default:
1743                 break;
1744         }
1745 
1746         LogPerformanceWarning(
1747             device, vuid,
1748             "%s: %s Subresource (arrayLayer: %u, mipLevel: %u) of image was loaded to tile as part of LOAD_OP_LOAD, but last "
1749             "time image was used, it was written to with %s. %s",
1750             function_name, VendorSpecificTag(kBPVendorArm), array_layer, mip_level, last_cmd, suggestion);
1751     }
1752 }
1753 
ValidateImageInQueue(const char * function_name,IMAGE_STATE_BP * state,IMAGE_SUBRESOURCE_USAGE_BP usage,uint32_t array_layer,uint32_t mip_level)1754 void BestPractices::ValidateImageInQueue(const char* function_name, IMAGE_STATE_BP* state,
1755                                          IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer,
1756                                          uint32_t mip_level) {
1757     IMAGE_STATE* image = state->image;
1758     IMAGE_SUBRESOURCE_USAGE_BP last_usage = state->usages[array_layer][mip_level];
1759     state->usages[array_layer][mip_level] = usage;
1760     if (VendorCheckEnabled(kBPVendorArm)) {
1761         ValidateImageInQueueArm(function_name, image, last_usage, usage, array_layer, mip_level);
1762     }
1763 }
1764 
AddDeferredQueueOperations(CMD_BUFFER_STATE_BP * cb)1765 void BestPractices::AddDeferredQueueOperations(CMD_BUFFER_STATE_BP* cb) {
1766     cb->queue_submit_functions.insert(cb->queue_submit_functions.end(),
1767                                       cb->queue_submit_functions_after_render_pass.begin(),
1768                                       cb->queue_submit_functions_after_render_pass.end());
1769     cb->queue_submit_functions_after_render_pass.clear();
1770 }
1771 
PreCallRecordCmdEndRenderPass(VkCommandBuffer commandBuffer)1772 void BestPractices::PreCallRecordCmdEndRenderPass(VkCommandBuffer commandBuffer) {
1773     ValidationStateTracker::PreCallRecordCmdEndRenderPass(commandBuffer);
1774     auto cb_node = GetCBState(commandBuffer);
1775     AddDeferredQueueOperations(cb_node.get());
1776 }
1777 
PreCallRecordCmdEndRenderPass2(VkCommandBuffer commandBuffer,const VkSubpassEndInfo * pSubpassInfo)1778 void BestPractices::PreCallRecordCmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassInfo) {
1779     ValidationStateTracker::PreCallRecordCmdEndRenderPass2(commandBuffer, pSubpassInfo);
1780     auto cb_node = GetCBState(commandBuffer);
1781     AddDeferredQueueOperations(cb_node.get());
1782 }
1783 
PreCallRecordCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,const VkSubpassEndInfoKHR * pSubpassInfo)1784 void BestPractices::PreCallRecordCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR *pSubpassInfo) {
1785     ValidationStateTracker::PreCallRecordCmdEndRenderPass2KHR(commandBuffer, pSubpassInfo);
1786     auto cb_node = GetCBState(commandBuffer);
1787     AddDeferredQueueOperations(cb_node.get());
1788 }
1789 
PreCallRecordCmdBeginRenderPass(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,VkSubpassContents contents)1790 void BestPractices::PreCallRecordCmdBeginRenderPass(VkCommandBuffer commandBuffer,
1791                                                     const VkRenderPassBeginInfo* pRenderPassBegin,
1792                                                     VkSubpassContents contents) {
1793     ValidationStateTracker::PreCallRecordCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
1794     RecordCmdBeginRenderPass(commandBuffer, pRenderPassBegin);
1795 }
1796 
PreCallRecordCmdBeginRenderPass2(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo)1797 void BestPractices::PreCallRecordCmdBeginRenderPass2(VkCommandBuffer commandBuffer,
1798                                                      const VkRenderPassBeginInfo* pRenderPassBegin,
1799                                                      const VkSubpassBeginInfo* pSubpassBeginInfo) {
1800     ValidationStateTracker::PreCallRecordCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1801     RecordCmdBeginRenderPass(commandBuffer, pRenderPassBegin);
1802 }
1803 
PreCallRecordCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo)1804 void BestPractices::PreCallRecordCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
1805                                                         const VkRenderPassBeginInfo* pRenderPassBegin,
1806                                                         const VkSubpassBeginInfo* pSubpassBeginInfo) {
1807     ValidationStateTracker::PreCallRecordCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1808     RecordCmdBeginRenderPass(commandBuffer, pRenderPassBegin);
1809 }
1810 
RecordCmdBeginRenderPass(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin)1811 void BestPractices::RecordCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin) {
1812 
1813     if (!pRenderPassBegin) {
1814         return;
1815     }
1816 
1817     auto cb = GetCBState(commandBuffer);
1818 
1819     auto rp_state = Get<RENDER_PASS_STATE>(pRenderPassBegin->renderPass);
1820     if (rp_state) {
1821         // Check load ops
1822         for (uint32_t att = 0; att < rp_state->createInfo.attachmentCount; att++) {
1823             const auto& attachment = rp_state->createInfo.pAttachments[att];
1824 
1825             if (!RenderPassUsesAttachmentAsImageOnly(rp_state->createInfo, att) &&
1826                 !RenderPassUsesAttachmentOnTile(rp_state->createInfo, att)) {
1827                 continue;
1828             }
1829 
1830             IMAGE_SUBRESOURCE_USAGE_BP usage = IMAGE_SUBRESOURCE_USAGE_BP::UNDEFINED;
1831 
1832             if ((!FormatIsStencilOnly(attachment.format) && attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) ||
1833                 (FormatHasStencil(attachment.format) && attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD)) {
1834                 usage = IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE;
1835             } else if ((!FormatIsStencilOnly(attachment.format) && attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) ||
1836                        (FormatHasStencil(attachment.format) && attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)) {
1837                 usage = IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED;
1838             } else if (RenderPassUsesAttachmentAsImageOnly(rp_state->createInfo, att)) {
1839                 usage = IMAGE_SUBRESOURCE_USAGE_BP::DESCRIPTOR_ACCESS;
1840             }
1841 
1842             auto framebuffer = Get<FRAMEBUFFER_STATE>(pRenderPassBegin->framebuffer);
1843             std::shared_ptr<IMAGE_VIEW_STATE> image_view = nullptr;
1844 
1845             if (framebuffer->createInfo.flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT) {
1846                 const VkRenderPassAttachmentBeginInfo* rpabi = LvlFindInChain<VkRenderPassAttachmentBeginInfo>(pRenderPassBegin->pNext);
1847                 if (rpabi) {
1848                     image_view = Get<IMAGE_VIEW_STATE>(rpabi->pAttachments[att]);
1849                 }
1850             } else {
1851                 image_view = Get<IMAGE_VIEW_STATE>(framebuffer->createInfo.pAttachments[att]);
1852             }
1853 
1854             QueueValidateImageView(cb->queue_submit_functions, "vkCmdBeginRenderPass()", image_view.get(), usage);
1855         }
1856 
1857         // Check store ops
1858         for (uint32_t att = 0; att < rp_state->createInfo.attachmentCount; att++) {
1859             const auto& attachment = rp_state->createInfo.pAttachments[att];
1860 
1861             if (!RenderPassUsesAttachmentOnTile(rp_state->createInfo, att)) {
1862                 continue;
1863             }
1864 
1865             IMAGE_SUBRESOURCE_USAGE_BP usage = IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_DISCARDED;
1866 
1867             if ((!FormatIsStencilOnly(attachment.format) && attachment.storeOp == VK_ATTACHMENT_STORE_OP_STORE) ||
1868                 (FormatHasStencil(attachment.format) && attachment.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE)) {
1869                 usage = IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_STORED;
1870             }
1871 
1872             auto framebuffer = Get<FRAMEBUFFER_STATE>(pRenderPassBegin->framebuffer);
1873 
1874             std::shared_ptr<IMAGE_VIEW_STATE> image_view;
1875             if (framebuffer->createInfo.flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT) {
1876                 const VkRenderPassAttachmentBeginInfo* rpabi = LvlFindInChain<VkRenderPassAttachmentBeginInfo>(pRenderPassBegin->pNext);
1877                 if (rpabi) {
1878                     image_view = Get<IMAGE_VIEW_STATE>(rpabi->pAttachments[att]);
1879                 }
1880             } else {
1881                 image_view = Get<IMAGE_VIEW_STATE>(framebuffer->createInfo.pAttachments[att]);
1882             }
1883 
1884             QueueValidateImageView(cb->queue_submit_functions_after_render_pass, "vkCmdEndRenderPass()", image_view.get(), usage);
1885         }
1886     }
1887 }
1888 
PreCallValidateCmdBeginRenderPass(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,VkSubpassContents contents) const1889 bool BestPractices::PreCallValidateCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin,
1890                                                       VkSubpassContents contents) const {
1891     bool skip = StateTracker::PreCallValidateCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
1892     skip |= ValidateCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_1, pRenderPassBegin);
1893     return skip;
1894 }
1895 
PreCallValidateCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo) const1896 bool BestPractices::PreCallValidateCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
1897                                                           const VkRenderPassBeginInfo* pRenderPassBegin,
1898                                                           const VkSubpassBeginInfo* pSubpassBeginInfo) const {
1899     bool skip = StateTracker::PreCallValidateCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1900     skip |= ValidateCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_2, pRenderPassBegin);
1901     return skip;
1902 }
1903 
PreCallValidateCmdBeginRenderPass2(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo) const1904 bool BestPractices::PreCallValidateCmdBeginRenderPass2(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin,
1905                                                        const VkSubpassBeginInfo* pSubpassBeginInfo) const {
1906     bool skip = StateTracker::PreCallValidateCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1907     skip |= ValidateCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_2, pRenderPassBegin);
1908     return skip;
1909 }
1910 
RecordCmdBeginRenderPass(VkCommandBuffer commandBuffer,RenderPassCreateVersion rp_version,const VkRenderPassBeginInfo * pRenderPassBegin)1911 void BestPractices::RecordCmdBeginRenderPass(VkCommandBuffer commandBuffer, RenderPassCreateVersion rp_version,
1912                                              const VkRenderPassBeginInfo* pRenderPassBegin) {
1913     // Reset the renderpass state
1914     auto cb = GetCBState(commandBuffer);
1915     cb->hasDrawCmd = false;
1916     assert(cb);
1917     auto& render_pass_state = cb->render_pass_state;
1918     render_pass_state.touchesAttachments.clear();
1919     render_pass_state.earlyClearAttachments.clear();
1920     render_pass_state.numDrawCallsDepthOnly = 0;
1921     render_pass_state.numDrawCallsDepthEqualCompare = 0;
1922     render_pass_state.colorAttachment = false;
1923     render_pass_state.depthAttachment = false;
1924     render_pass_state.drawTouchAttachments = true;
1925     // Don't reset state related to pipeline state.
1926 
1927     const auto rp_state = Get<RENDER_PASS_STATE>(pRenderPassBegin->renderPass);
1928 
1929     // track depth / color attachment usage within the renderpass
1930     for (size_t i = 0; i < rp_state->createInfo.subpassCount; i++) {
1931         // record if depth/color attachments are in use for this renderpass
1932         if (rp_state->createInfo.pSubpasses[i].pDepthStencilAttachment != nullptr) render_pass_state.depthAttachment = true;
1933 
1934         if (rp_state->createInfo.pSubpasses[i].colorAttachmentCount > 0) render_pass_state.colorAttachment = true;
1935     }
1936 }
1937 
PostCallRecordCmdBeginRenderPass(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,VkSubpassContents contents)1938 void BestPractices::PostCallRecordCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin,
1939                                                      VkSubpassContents contents) {
1940     StateTracker::PostCallRecordCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
1941     RecordCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_1, pRenderPassBegin);
1942 }
1943 
PostCallRecordCmdBeginRenderPass2(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo)1944 void BestPractices::PostCallRecordCmdBeginRenderPass2(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin,
1945                                                       const VkSubpassBeginInfo* pSubpassBeginInfo) {
1946     StateTracker::PostCallRecordCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1947     RecordCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_2, pRenderPassBegin);
1948 }
1949 
PostCallRecordCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,const VkRenderPassBeginInfo * pRenderPassBegin,const VkSubpassBeginInfo * pSubpassBeginInfo)1950 void BestPractices::PostCallRecordCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
1951                                                          const VkRenderPassBeginInfo* pRenderPassBegin,
1952                                                          const VkSubpassBeginInfo* pSubpassBeginInfo) {
1953     StateTracker::PostCallRecordCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
1954     RecordCmdBeginRenderPass(commandBuffer, RENDER_PASS_VERSION_2, pRenderPassBegin);
1955 }
1956 
1957 // Generic function to handle validation for all CmdDraw* type functions
ValidateCmdDrawType(VkCommandBuffer cmd_buffer,const char * caller) const1958 bool BestPractices::ValidateCmdDrawType(VkCommandBuffer cmd_buffer, const char* caller) const {
1959     bool skip = false;
1960     const auto cb_state = GetCBState(cmd_buffer);
1961     if (cb_state) {
1962         const auto lv_bind_point = ConvertToLvlBindPoint(VK_PIPELINE_BIND_POINT_GRAPHICS);
1963         const auto* pipeline_state = cb_state->lastBound[lv_bind_point].pipeline_state;
1964         const auto& current_vtx_bfr_binding_info = cb_state->current_vertex_buffer_binding_info.vertex_buffer_bindings;
1965 
1966         // Verify vertex binding
1967         if (pipeline_state->vertex_binding_descriptions_.size() <= 0) {
1968             if ((!current_vtx_bfr_binding_info.empty()) && (!cb_state->vertex_buffer_used)) {
1969                 skip |= LogPerformanceWarning(cb_state->commandBuffer(), kVUID_BestPractices_DrawState_VtxIndexOutOfBounds,
1970                                               "Vertex buffers are bound to %s but no vertex buffers are attached to %s.",
1971                                               report_data->FormatHandle(cb_state->commandBuffer()).c_str(),
1972                                               report_data->FormatHandle(pipeline_state->pipeline()).c_str());
1973             }
1974         }
1975 
1976         const auto* pipe = cb_state->GetCurrentPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS);
1977         if (pipe) {
1978             const auto* rp_state = pipe->rp_state.get();
1979             if (rp_state) {
1980                 for (uint32_t i = 0; i < rp_state->createInfo.subpassCount; ++i) {
1981                     const auto& subpass = rp_state->createInfo.pSubpasses[i];
1982                     const auto& create_info = pipe->create_info.graphics;
1983                     const uint32_t depth_stencil_attachment =
1984                         GetSubpassDepthStencilAttachmentIndex(create_info.pDepthStencilState, subpass.pDepthStencilAttachment);
1985                     if ((depth_stencil_attachment == VK_ATTACHMENT_UNUSED) && create_info.pRasterizationState &&
1986                         create_info.pRasterizationState->depthBiasEnable == VK_TRUE) {
1987                         skip |= LogWarning(cb_state->commandBuffer(), kVUID_BestPractices_DepthBiasNoAttachment,
1988                                            "%s: depthBiasEnable == VK_TRUE without a depth-stencil attachment.", caller);
1989                     }
1990                 }
1991             }
1992         }
1993     }
1994     return skip;
1995 }
1996 
RecordCmdDrawType(VkCommandBuffer cmd_buffer,uint32_t draw_count,const char * caller)1997 void BestPractices::RecordCmdDrawType(VkCommandBuffer cmd_buffer, uint32_t draw_count, const char* caller) {
1998     auto cb_node = GetCBState(cmd_buffer);
1999     assert(cb_node);
2000     auto& render_pass_state = cb_node->render_pass_state;
2001     if (VendorCheckEnabled(kBPVendorArm)) {
2002         RecordCmdDrawTypeArm(render_pass_state, draw_count, caller);
2003     }
2004 
2005     if (render_pass_state.drawTouchAttachments) {
2006         for (auto& touch : render_pass_state.nextDrawTouchesAttachments) {
2007             RecordAttachmentAccess(render_pass_state, touch.framebufferAttachment, touch.aspects);
2008         }
2009         // No need to touch the same attachments over and over.
2010         render_pass_state.drawTouchAttachments = false;
2011     }
2012 }
2013 
RecordCmdDrawTypeArm(RenderPassState & render_pass_state,uint32_t draw_count,const char * caller)2014 void BestPractices::RecordCmdDrawTypeArm(RenderPassState& render_pass_state, uint32_t draw_count, const char* caller) {
2015     if (draw_count >= kDepthPrePassMinDrawCountArm) {
2016         if (render_pass_state.depthOnly) render_pass_state.numDrawCallsDepthOnly++;
2017         if (render_pass_state.depthEqualComparison) render_pass_state.numDrawCallsDepthEqualCompare++;
2018     }
2019 }
2020 
PreCallValidateCmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance) const2021 bool BestPractices::PreCallValidateCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
2022                                            uint32_t firstVertex, uint32_t firstInstance) const {
2023     bool skip = false;
2024 
2025     if (instanceCount == 0) {
2026         skip |= LogWarning(device, kVUID_BestPractices_CmdDraw_InstanceCountZero,
2027                            "Warning: You are calling vkCmdDraw() with an instanceCount of Zero.");
2028     }
2029     skip |= ValidateCmdDrawType(commandBuffer, "vkCmdDraw()");
2030 
2031     return skip;
2032 }
2033 
PostCallRecordCmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)2034 void BestPractices::PostCallRecordCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
2035                                           uint32_t firstVertex, uint32_t firstInstance) {
2036     StateTracker::PostCallRecordCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
2037     RecordCmdDrawType(commandBuffer, vertexCount * instanceCount, "vkCmdDraw()");
2038 }
2039 
PreCallValidateCmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance) const2040 bool BestPractices::PreCallValidateCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
2041                                                   uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) const {
2042     bool skip = false;
2043 
2044     if (instanceCount == 0) {
2045         skip |= LogWarning(device, kVUID_BestPractices_CmdDraw_InstanceCountZero,
2046                            "Warning: You are calling vkCmdDrawIndexed() with an instanceCount of Zero.");
2047     }
2048     skip |= ValidateCmdDrawType(commandBuffer, "vkCmdDrawIndexed()");
2049 
2050     // Check if we reached the limit for small indexed draw calls.
2051     // Note that we cannot update the draw call count here, so we do it in PreCallRecordCmdDrawIndexed.
2052     const auto cmd_state = GetCBState(commandBuffer);
2053     if ((indexCount * instanceCount) <= kSmallIndexedDrawcallIndices &&
2054         (cmd_state->small_indexed_draw_call_count == kMaxSmallIndexedDrawcalls - 1) &&
2055         VendorCheckEnabled(kBPVendorArm)) {
2056         skip |= LogPerformanceWarning(device, kVUID_BestPractices_CmdDrawIndexed_ManySmallIndexedDrawcalls,
2057                                       "%s: The command buffer contains many small indexed drawcalls "
2058                                       "(at least %u drawcalls with less than %u indices each). This may cause pipeline bubbles. "
2059                                       "You can try batching drawcalls or instancing when applicable.",
2060                                       VendorSpecificTag(kBPVendorArm), kMaxSmallIndexedDrawcalls, kSmallIndexedDrawcallIndices);
2061     }
2062 
2063     if (VendorCheckEnabled(kBPVendorArm)) {
2064         ValidateIndexBufferArm(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
2065     }
2066 
2067     return skip;
2068 }
2069 
ValidateIndexBufferArm(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance) const2070 bool BestPractices::ValidateIndexBufferArm(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
2071                                            uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) const {
2072     bool skip = false;
2073 
2074     // check for sparse/underutilised index buffer, and post-transform cache thrashing
2075     const auto cmd_state = GetCBState(commandBuffer);
2076     if (cmd_state == nullptr) return skip;
2077 
2078     const auto* ib_state = cmd_state->index_buffer_binding.buffer_state.get();
2079     if (ib_state == nullptr || cmd_state->index_buffer_binding.buffer_state->Destroyed()) return skip;
2080 
2081     const VkIndexType ib_type = cmd_state->index_buffer_binding.index_type;
2082     const auto& ib_mem_state = *ib_state->MemState();
2083     const VkDeviceSize ib_mem_offset = ib_mem_state.mapped_range.offset;
2084     const void* ib_mem = ib_mem_state.p_driver_data;
2085     bool primitive_restart_enable = false;
2086 
2087     const auto lv_bind_point = ConvertToLvlBindPoint(VK_PIPELINE_BIND_POINT_GRAPHICS);
2088     const auto& pipeline_binding_iter = cmd_state->lastBound[lv_bind_point];
2089     const auto* pipeline_state = pipeline_binding_iter.pipeline_state;
2090 
2091     if (pipeline_state != nullptr && pipeline_state->create_info.graphics.pInputAssemblyState != nullptr) {
2092         primitive_restart_enable = pipeline_state->create_info.graphics.pInputAssemblyState->primitiveRestartEnable == VK_TRUE;
2093     }
2094 
2095     // no point checking index buffer if the memory is nonexistant/unmapped, or if there is no graphics pipeline bound to this CB
2096     if (ib_mem && pipeline_binding_iter.IsUsing()) {
2097         uint32_t scan_stride;
2098         if (ib_type == VK_INDEX_TYPE_UINT8_EXT) {
2099             scan_stride = sizeof(uint8_t);
2100         } else if (ib_type == VK_INDEX_TYPE_UINT16) {
2101             scan_stride = sizeof(uint16_t);
2102         } else {
2103             scan_stride = sizeof(uint32_t);
2104         }
2105 
2106         const uint8_t* scan_begin = static_cast<const uint8_t*>(ib_mem) + ib_mem_offset + firstIndex * scan_stride;
2107         const uint8_t* scan_end = scan_begin + indexCount * scan_stride;
2108 
2109         // Min and max are important to track for some Mali architectures. In older Mali devices without IDVS, all
2110         // vertices corresponding to indices between the minimum and maximum may be loaded, and possibly shaded,
2111         // irrespective of whether or not they're part of the draw call.
2112 
2113         // start with minimum as 0xFFFFFFFF and adjust to indices in the buffer
2114         uint32_t min_index = ~0u;
2115         // start with maximum as 0 and adjust to indices in the buffer
2116         uint32_t max_index = 0u;
2117 
2118         // first scan-through, we're looking to simulate a model LRU post-transform cache, estimating the number of vertices shaded
2119         // for the given index buffer
2120         uint32_t vertex_shade_count = 0;
2121 
2122         PostTransformLRUCacheModel post_transform_cache;
2123 
2124         // The size of the cache being modelled positively correlates with how much behaviour it can capture about
2125         // arbitrary ground-truth hardware/architecture cache behaviour. I.e. it's a good solution when we don't know the
2126         // target architecture.
2127         // However, modelling a post-transform cache with more than 32 elements gives diminishing returns in practice.
2128         // http://eelpi.gotdns.org/papers/fast_vert_cache_opt.html
2129         post_transform_cache.resize(32);
2130 
2131         for (const uint8_t* scan_ptr = scan_begin; scan_ptr < scan_end; scan_ptr += scan_stride) {
2132             uint32_t scan_index;
2133             uint32_t primitive_restart_value;
2134             if (ib_type == VK_INDEX_TYPE_UINT8_EXT) {
2135                 scan_index = *reinterpret_cast<const uint8_t*>(scan_ptr);
2136                 primitive_restart_value = 0xFF;
2137             } else if (ib_type == VK_INDEX_TYPE_UINT16) {
2138                 scan_index = *reinterpret_cast<const uint16_t*>(scan_ptr);
2139                 primitive_restart_value = 0xFFFF;
2140             } else {
2141                 scan_index = *reinterpret_cast<const uint32_t*>(scan_ptr);
2142                 primitive_restart_value = 0xFFFFFFFF;
2143             }
2144 
2145             max_index = std::max(max_index, scan_index);
2146             min_index = std::min(min_index, scan_index);
2147 
2148             if (!primitive_restart_enable || scan_index != primitive_restart_value) {
2149                 bool in_cache = post_transform_cache.query_cache(scan_index);
2150                 // if the shaded vertex corresponding to the index is not in the PT-cache, we need to shade again
2151                 if (!in_cache) vertex_shade_count++;
2152             }
2153         }
2154 
2155         // if the max and min values were not set, then we either have no indices, or all primitive restarts, exit...
2156         // if the max and min are the same, then it implies all the indices are the same, then we don't need to do anything
2157         if (max_index < min_index || max_index == min_index) return skip;
2158 
2159         if (max_index - min_index >= indexCount) {
2160             skip |=
2161                 LogPerformanceWarning(device, kVUID_BestPractices_CmdDrawIndexed_SparseIndexBuffer,
2162                                       "%s The indices which were specified for the draw call only utilise approximately %.02f%% of "
2163                                       "index buffer value range. Arm Mali architectures before G71 do not have IDVS (Index-Driven "
2164                                       "Vertex Shading), meaning all vertices corresponding to indices between the minimum and "
2165                                       "maximum would be loaded, and possibly shaded, whether or not they are used.",
2166                                       VendorSpecificTag(kBPVendorArm),
2167                                       (static_cast<float>(indexCount) / static_cast<float>(max_index - min_index)) * 100.0f);
2168             return skip;
2169         }
2170 
2171         // use a dynamic vector of bitsets as a memory-compact representation of which indices are included in the draw call
2172         // each bit of the n-th bucket contains the inclusion information for indices (n*n_buckets) to ((n+1)*n_buckets)
2173         const size_t refs_per_bucket = 64;
2174         std::vector<std::bitset<refs_per_bucket>> vertex_reference_buckets;
2175 
2176         const uint32_t n_indices = max_index - min_index + 1;
2177         const uint32_t n_buckets = (n_indices / static_cast<uint32_t>(refs_per_bucket)) +
2178                                    ((n_indices % static_cast<uint32_t>(refs_per_bucket)) != 0 ? 1 : 0);
2179 
2180         // there needs to be at least one bitset to store a set of indices smaller than n_buckets
2181         vertex_reference_buckets.resize(std::max(1u, n_buckets));
2182 
2183         // To avoid using too much memory, we run over the indices again.
2184         // Knowing the size from the last scan allows us to record index usage with bitsets
2185         for (const uint8_t* scan_ptr = scan_begin; scan_ptr < scan_end; scan_ptr += scan_stride) {
2186             uint32_t scan_index;
2187             if (ib_type == VK_INDEX_TYPE_UINT8_EXT) {
2188                 scan_index = *reinterpret_cast<const uint8_t*>(scan_ptr);
2189             } else if (ib_type == VK_INDEX_TYPE_UINT16) {
2190                 scan_index = *reinterpret_cast<const uint16_t*>(scan_ptr);
2191             } else {
2192                 scan_index = *reinterpret_cast<const uint32_t*>(scan_ptr);
2193             }
2194             // keep track of the set of all indices used to reference vertices in the draw call
2195             size_t index_offset = scan_index - min_index;
2196             size_t bitset_bucket_index = index_offset / refs_per_bucket;
2197             uint64_t used_indices = 1ull << ((index_offset % refs_per_bucket) & 0xFFFFFFFFu);
2198             vertex_reference_buckets[bitset_bucket_index] |= used_indices;
2199         }
2200 
2201         uint32_t vertex_reference_count = 0;
2202         for (const auto& bitset : vertex_reference_buckets) {
2203             vertex_reference_count += static_cast<uint32_t>(bitset.count());
2204         }
2205 
2206         // low index buffer utilization implies that: of the vertices available to the draw call, not all are utilized
2207         float utilization = static_cast<float>(vertex_reference_count) / static_cast<float>(max_index - min_index + 1);
2208         // low hit rate (high miss rate) implies the order of indices in the draw call may be possible to improve
2209         float cache_hit_rate = static_cast<float>(vertex_reference_count) / static_cast<float>(vertex_shade_count);
2210 
2211         if (utilization < 0.5f) {
2212             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CmdDrawIndexed_SparseIndexBuffer,
2213                                           "%s The indices which were specified for the draw call only utilise approximately "
2214                                           "%.02f%% of the bound vertex buffer.",
2215                                           VendorSpecificTag(kBPVendorArm), utilization);
2216         }
2217 
2218         if (cache_hit_rate <= 0.5f) {
2219             skip |=
2220                 LogPerformanceWarning(device, kVUID_BestPractices_CmdDrawIndexed_PostTransformCacheThrashing,
2221                                       "%s The indices which were specified for the draw call are estimated to cause thrashing of "
2222                                       "the post-transform vertex cache, with a hit-rate of %.02f%%. "
2223                                       "I.e. the ordering of the index buffer may not make optimal use of indices associated with "
2224                                       "recently shaded vertices.",
2225                                       VendorSpecificTag(kBPVendorArm), cache_hit_rate * 100.0f);
2226         }
2227     }
2228 
2229     return skip;
2230 }
2231 
PreCallValidateCmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers) const2232 bool BestPractices::PreCallValidateCmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
2233                                                       const VkCommandBuffer* pCommandBuffers) const {
2234     bool skip = false;
2235     const auto primary = GetCBState(commandBuffer);
2236     for (uint32_t i = 0; i < commandBufferCount; i++) {
2237         const auto secondary_cb = GetCBState(pCommandBuffers[i]);
2238         if (secondary_cb == nullptr) {
2239             continue;
2240         }
2241         const auto& secondary = secondary_cb->render_pass_state;
2242         for (auto& clear : secondary.earlyClearAttachments) {
2243             if (ClearAttachmentsIsFullClear(primary.get(), uint32_t(clear.rects.size()), clear.rects.data())) {
2244                 skip |= ValidateClearAttachment(commandBuffer, primary.get(), clear.framebufferAttachment, clear.colorAttachment,
2245                                                 clear.aspects, true);
2246             }
2247         }
2248     }
2249 
2250     if (VendorCheckEnabled(kBPVendorAMD)) {
2251         if (commandBufferCount > 0) {
2252             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CmdBuffer_AvoidSecondaryCmdBuffers,
2253                                           "%s Performance warning: Use of secondary command buffers is not recommended. ",
2254                                           VendorSpecificTag(kBPVendorAMD));
2255         }
2256     }
2257     return skip;
2258 }
2259 
PreCallRecordCmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)2260 void BestPractices::PreCallRecordCmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
2261                                                     const VkCommandBuffer* pCommandBuffers) {
2262     auto primary = GetCBState(commandBuffer);
2263     auto& primary_state = primary->render_pass_state;
2264 
2265     for (uint32_t i = 0; i < commandBufferCount; i++) {
2266         auto secondary_cb = GetCBState(pCommandBuffers[i]);
2267         if (secondary_cb == nullptr) {
2268             continue;
2269         }
2270         auto& secondary = secondary_cb->render_pass_state;
2271 
2272         for (auto& early_clear : secondary.earlyClearAttachments) {
2273             if (ClearAttachmentsIsFullClear(primary.get(), uint32_t(early_clear.rects.size()), early_clear.rects.data())) {
2274                 RecordAttachmentClearAttachments(primary.get(), primary_state, early_clear.framebufferAttachment,
2275                                                  early_clear.colorAttachment, early_clear.aspects,
2276                                                  uint32_t(early_clear.rects.size()), early_clear.rects.data());
2277             } else {
2278                 RecordAttachmentAccess(primary_state, early_clear.framebufferAttachment,
2279                                        early_clear.aspects);
2280             }
2281         }
2282 
2283         for (auto& touch : secondary.touchesAttachments) {
2284             RecordAttachmentAccess(primary_state, touch.framebufferAttachment,
2285                                    touch.aspects);
2286         }
2287 
2288         primary_state.numDrawCallsDepthEqualCompare += secondary.numDrawCallsDepthEqualCompare;
2289         primary_state.numDrawCallsDepthOnly += secondary.numDrawCallsDepthOnly;
2290 
2291         auto second_state = GetCBState(pCommandBuffers[i]);
2292         if (second_state->hasDrawCmd) {
2293             primary->hasDrawCmd = true;
2294         }
2295     }
2296 
2297     ValidationStateTracker::PreCallRecordCmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers);
2298 }
2299 
RecordAttachmentAccess(RenderPassState & state,uint32_t fb_attachment,VkImageAspectFlags aspects)2300 void BestPractices::RecordAttachmentAccess(RenderPassState& state, uint32_t fb_attachment, VkImageAspectFlags aspects) {
2301     // Called when we have a partial clear attachment, or a normal draw call which accesses an attachment.
2302     auto itr = std::find_if(state.touchesAttachments.begin(), state.touchesAttachments.end(),
2303                             [&](const AttachmentInfo& info) {
2304                                 return info.framebufferAttachment == fb_attachment;
2305                             });
2306 
2307     if (itr != state.touchesAttachments.end()) {
2308         itr->aspects |= aspects;
2309     } else {
2310         state.touchesAttachments.push_back({ fb_attachment, aspects });
2311     }
2312 }
2313 
RecordAttachmentClearAttachments(CMD_BUFFER_STATE_BP * cmd_state,RenderPassState & state,uint32_t fb_attachment,uint32_t color_attachment,VkImageAspectFlags aspects,uint32_t rectCount,const VkClearRect * pRects)2314 void BestPractices::RecordAttachmentClearAttachments(CMD_BUFFER_STATE_BP* cmd_state, RenderPassState& state, uint32_t fb_attachment,
2315                                                      uint32_t color_attachment, VkImageAspectFlags aspects, uint32_t rectCount,
2316                                                      const VkClearRect* pRects) {
2317     // If we observe a full clear before any other access to a frame buffer attachment,
2318     // we have candidate for redundant clear attachments.
2319     auto itr = std::find_if(state.touchesAttachments.begin(), state.touchesAttachments.end(),
2320                             [&](const AttachmentInfo& info) {
2321                                 return info.framebufferAttachment == fb_attachment;
2322                             });
2323 
2324     uint32_t new_aspects = aspects;
2325     if (itr != state.touchesAttachments.end()) {
2326         new_aspects = aspects & ~itr->aspects;
2327         itr->aspects |= aspects;
2328     } else {
2329         state.touchesAttachments.push_back({ fb_attachment, aspects });
2330     }
2331 
2332     if (new_aspects == 0) {
2333         return;
2334     }
2335 
2336     if (cmd_state->createInfo.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
2337         // The first command might be a clear, but might not be the first in the render pass, defer any checks until
2338         // CmdExecuteCommands.
2339         state.earlyClearAttachments.push_back({ fb_attachment, color_attachment, new_aspects,
2340                                                 std::vector<VkClearRect>{pRects, pRects + rectCount} });
2341     }
2342 }
2343 
PreCallRecordCmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pClearAttachments,uint32_t rectCount,const VkClearRect * pRects)2344 void BestPractices::PreCallRecordCmdClearAttachments(VkCommandBuffer commandBuffer,
2345                                                      uint32_t attachmentCount, const VkClearAttachment* pClearAttachments,
2346                                                      uint32_t rectCount, const VkClearRect* pRects) {
2347     auto cmd_state = GetCBState(commandBuffer);
2348     RENDER_PASS_STATE* rp_state = cmd_state->activeRenderPass.get();
2349     FRAMEBUFFER_STATE* fb_state = cmd_state->activeFramebuffer.get();
2350     RenderPassState& tracking_state = cmd_state->render_pass_state;
2351     bool is_secondary = cmd_state->createInfo.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY;
2352 
2353     if (rectCount == 0 || !rp_state) {
2354         return;
2355     }
2356 
2357     if (!is_secondary && !fb_state) {
2358         return;
2359     }
2360 
2361     // If we have a rect which covers the entire frame buffer, we have a LOAD_OP_CLEAR-like command.
2362     bool full_clear = ClearAttachmentsIsFullClear(cmd_state.get(), rectCount, pRects);
2363 
2364     auto& subpass = rp_state->createInfo.pSubpasses[cmd_state->activeSubpass];
2365     for (uint32_t i = 0; i < attachmentCount; i++) {
2366         auto& attachment = pClearAttachments[i];
2367         uint32_t fb_attachment = VK_ATTACHMENT_UNUSED;
2368         VkImageAspectFlags aspects = attachment.aspectMask;
2369 
2370         if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2371             if (subpass.pDepthStencilAttachment) {
2372                 fb_attachment = subpass.pDepthStencilAttachment->attachment;
2373             }
2374         } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2375             fb_attachment = subpass.pColorAttachments[attachment.colorAttachment].attachment;
2376         }
2377 
2378         if (fb_attachment != VK_ATTACHMENT_UNUSED) {
2379             if (full_clear) {
2380                 RecordAttachmentClearAttachments(cmd_state.get(), tracking_state, fb_attachment, attachment.colorAttachment,
2381                                                  aspects, rectCount, pRects);
2382             } else {
2383                 RecordAttachmentAccess(tracking_state, fb_attachment, aspects);
2384             }
2385         }
2386     }
2387 
2388     ValidationStateTracker::PreCallRecordCmdClearAttachments(commandBuffer, attachmentCount, pClearAttachments,
2389                                                              rectCount, pRects);
2390 }
2391 
PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)2392 void BestPractices::PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
2393                                                 uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) {
2394     ValidationStateTracker::PreCallRecordCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
2395                                                         firstInstance);
2396 
2397     auto cmd_state = GetCBState(commandBuffer);
2398     if ((indexCount * instanceCount) <= kSmallIndexedDrawcallIndices) {
2399         cmd_state->small_indexed_draw_call_count++;
2400     }
2401 
2402     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDrawIndexed()");
2403 }
2404 
PostCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)2405 void BestPractices::PostCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
2406                                                  uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) {
2407     StateTracker::PostCallRecordCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
2408     RecordCmdDrawType(commandBuffer, indexCount * instanceCount, "vkCmdDrawIndexed()");
2409 }
2410 
PreCallValidateCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride) const2411 bool BestPractices::PreCallValidateCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2412                                                                VkBuffer countBuffer, VkDeviceSize countBufferOffset,
2413                                                                uint32_t maxDrawCount, uint32_t stride) const {
2414     bool skip = ValidateCmdDrawType(commandBuffer, "vkCmdDrawIndexedIndirectCount()");
2415 
2416     return skip;
2417 }
2418 
PreCallValidateCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride) const2419 bool BestPractices::PreCallValidateCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer,
2420                                                                   VkDeviceSize offset, VkBuffer countBuffer,
2421                                                                   VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
2422                                                                   uint32_t stride) const {
2423     bool skip = ValidateCmdDrawType(commandBuffer, "vkCmdDrawIndexedIndirectCountKHR()");
2424 
2425     return skip;
2426 }
2427 
PreCallValidateCmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride) const2428 bool BestPractices::PreCallValidateCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2429                                                    uint32_t drawCount, uint32_t stride) const {
2430     bool skip = false;
2431 
2432     if (drawCount == 0) {
2433         skip |= LogWarning(device, kVUID_BestPractices_CmdDraw_DrawCountZero,
2434                            "Warning: You are calling vkCmdDrawIndirect() with a drawCount of Zero.");
2435         skip |= ValidateCmdDrawType(commandBuffer, "vkCmdDrawIndirect()");
2436     }
2437 
2438     return skip;
2439 }
2440 
PostCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)2441 void BestPractices::PostCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2442                                                   uint32_t count, uint32_t stride) {
2443     StateTracker::PostCallRecordCmdDrawIndirect(commandBuffer, buffer, offset, count, stride);
2444     RecordCmdDrawType(commandBuffer, count, "vkCmdDrawIndirect()");
2445 }
2446 
PreCallValidateCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride) const2447 bool BestPractices::PreCallValidateCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2448                                                           uint32_t drawCount, uint32_t stride) const {
2449     bool skip = false;
2450 
2451     if (drawCount == 0) {
2452         skip |= LogWarning(device, kVUID_BestPractices_CmdDraw_DrawCountZero,
2453                            "Warning: You are calling vkCmdDrawIndexedIndirect() with a drawCount of Zero.");
2454         skip |= ValidateCmdDrawType(commandBuffer, "vkCmdDrawIndexedIndirect()");
2455     }
2456 
2457     return skip;
2458 }
2459 
PostCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)2460 void BestPractices::PostCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2461                                                          uint32_t count, uint32_t stride) {
2462     StateTracker::PostCallRecordCmdDrawIndexedIndirect(commandBuffer, buffer, offset, count, stride);
2463     RecordCmdDrawType(commandBuffer, count, "vkCmdDrawIndexedIndirect()");
2464 }
2465 
ValidateBoundDescriptorSets(VkCommandBuffer commandBuffer,const char * function_name)2466 void BestPractices::ValidateBoundDescriptorSets(VkCommandBuffer commandBuffer, const char* function_name) {
2467     auto cb_state = GetCBState(commandBuffer);
2468 
2469     if (cb_state) {
2470         for (auto descriptor_set : cb_state->validated_descriptor_sets) {
2471             const auto& layout = *descriptor_set->GetLayout();
2472 
2473             for (uint32_t index = 0; index < descriptor_set->GetBindingCount(); ++index) {
2474                 // For bindless scenarios, we should not attempt to track descriptor set state.
2475                 // It is highly uncertain which resources are actually bound.
2476                 // Resources which are written to such a descriptor should be marked as indeterminate w.r.t. state.
2477                 VkDescriptorBindingFlags flags = layout.GetDescriptorBindingFlagsFromIndex(index);
2478                 if (flags & (VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT |
2479                              VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
2480                              VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT)) {
2481                     continue;
2482                 }
2483 
2484                 auto index_range = layout.GetGlobalIndexRangeFromIndex(index);
2485                 for (uint32_t i = index_range.start; i < index_range.end; ++i) {
2486                     VkImageView image_view{VK_NULL_HANDLE};
2487 
2488                     auto descriptor = descriptor_set->GetDescriptorFromGlobalIndex(i);
2489                     switch (descriptor->GetClass()) {
2490                         case cvdescriptorset::DescriptorClass::Image: {
2491                             if (const auto image_descriptor = static_cast<const cvdescriptorset::ImageDescriptor*>(descriptor)) {
2492                                 image_view = image_descriptor->GetImageView();
2493                             }
2494                             break;
2495                         }
2496                         case cvdescriptorset::DescriptorClass::ImageSampler: {
2497                             if (const auto image_sampler_descriptor =
2498                                     static_cast<const cvdescriptorset::ImageSamplerDescriptor*>(descriptor)) {
2499                                 image_view = image_sampler_descriptor->GetImageView();
2500                             }
2501                             break;
2502                         }
2503                         default:
2504                             break;
2505                     }
2506 
2507                     if (image_view) {
2508                         auto image_view_state = Get<IMAGE_VIEW_STATE>(image_view);
2509                         QueueValidateImageView(cb_state->queue_submit_functions, function_name, image_view_state.get(),
2510                                                IMAGE_SUBRESOURCE_USAGE_BP::DESCRIPTOR_ACCESS);
2511                     }
2512                 }
2513             }
2514         }
2515     }
2516 }
2517 
PreCallRecordCmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)2518 void BestPractices::PreCallRecordCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
2519                                          uint32_t firstVertex, uint32_t firstInstance) {
2520     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDraw()");
2521 }
2522 
PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)2523 void BestPractices::PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2524                                                  uint32_t drawCount, uint32_t stride) {
2525     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDrawIndirect()");
2526 }
2527 
PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)2528 void BestPractices::PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
2529                                                         uint32_t drawCount, uint32_t stride) {
2530     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDrawIndexedIndirect()");
2531 }
2532 
PreCallValidateCmdDispatch(VkCommandBuffer commandBuffer,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ) const2533 bool BestPractices::PreCallValidateCmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY,
2534                                                uint32_t groupCountZ) const {
2535     bool skip = false;
2536 
2537     if ((groupCountX == 0) || (groupCountY == 0) || (groupCountZ == 0)) {
2538         skip |= LogWarning(device, kVUID_BestPractices_CmdDispatch_GroupCountZero,
2539                            "Warning: You are calling vkCmdDispatch() while one or more groupCounts are zero (groupCountX = %" PRIu32
2540                            ", groupCountY = %" PRIu32 ", groupCountZ = %" PRIu32 ").",
2541                            groupCountX, groupCountY, groupCountZ);
2542     }
2543 
2544     return skip;
2545 }
2546 
PreCallValidateCmdEndRenderPass2(VkCommandBuffer commandBuffer,const VkSubpassEndInfo * pSubpassEndInfo) const2547 bool BestPractices::PreCallValidateCmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo* pSubpassEndInfo) const {
2548     bool skip = false;
2549     skip |= StateTracker::PreCallValidateCmdEndRenderPass2(commandBuffer, pSubpassEndInfo);
2550     skip |= ValidateCmdEndRenderPass(commandBuffer);
2551     return skip;
2552 }
2553 
PreCallValidateCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,const VkSubpassEndInfo * pSubpassEndInfo) const2554 bool BestPractices::PreCallValidateCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, const VkSubpassEndInfo* pSubpassEndInfo) const {
2555     bool skip = false;
2556     skip |= StateTracker::PreCallValidateCmdEndRenderPass2KHR(commandBuffer, pSubpassEndInfo);
2557     skip |= ValidateCmdEndRenderPass(commandBuffer);
2558     return skip;
2559 }
2560 
PreCallValidateCmdEndRenderPass(VkCommandBuffer commandBuffer) const2561 bool BestPractices::PreCallValidateCmdEndRenderPass(VkCommandBuffer commandBuffer) const {
2562     bool skip = false;
2563     skip |= StateTracker::PreCallValidateCmdEndRenderPass(commandBuffer);
2564     skip |= ValidateCmdEndRenderPass(commandBuffer);
2565     return skip;
2566 }
2567 
ValidateCmdEndRenderPass(VkCommandBuffer commandBuffer) const2568 bool BestPractices::ValidateCmdEndRenderPass(VkCommandBuffer commandBuffer) const {
2569     bool skip = false;
2570     const auto cmd = GetCBState(commandBuffer);
2571 
2572     if (cmd == nullptr) return skip;
2573     auto &render_pass_state = cmd->render_pass_state;
2574 
2575     bool uses_depth = (render_pass_state.depthAttachment || render_pass_state.colorAttachment) &&
2576                       render_pass_state.numDrawCallsDepthEqualCompare >= kDepthPrePassNumDrawCallsArm &&
2577                       render_pass_state.numDrawCallsDepthOnly >= kDepthPrePassNumDrawCallsArm;
2578     if (uses_depth) {
2579         skip |= LogPerformanceWarning(
2580             device, kVUID_BestPractices_EndRenderPass_DepthPrePassUsage,
2581             "%s Depth pre-passes may be in use. In general, this is not recommended, as in Arm Mali GPUs since "
2582             "Mali-T620, Forward Pixel Killing (FPK) can already perform automatic hidden surface removal; in which "
2583             "case, using depth pre-passes for hidden surface removal may worsen performance.",
2584             VendorSpecificTag(kBPVendorArm));
2585     }
2586 
2587     RENDER_PASS_STATE* rp = cmd->activeRenderPass.get();
2588 
2589     if (VendorCheckEnabled(kBPVendorArm) && rp) {
2590 
2591         // If we use an attachment on-tile, we should access it in some way. Otherwise,
2592         // it is redundant to have it be part of the render pass.
2593         // Only consider it redundant if it will actually consume bandwidth, i.e.
2594         // LOAD_OP_LOAD is used or STORE_OP_STORE. CLEAR -> DONT_CARE is benign,
2595         // as is using pure input attachments.
2596         // CLEAR -> STORE might be considered a "useful" thing to do, but
2597         // the optimal thing to do is to defer the clear until you're actually
2598         // going to render to the image.
2599 
2600         uint32_t num_attachments = rp->createInfo.attachmentCount;
2601         for (uint32_t i = 0; i < num_attachments; i++) {
2602             if (!RenderPassUsesAttachmentOnTile(rp->createInfo, i) ||
2603                 RenderPassUsesAttachmentAsResolve(rp->createInfo, i)) {
2604                 continue;
2605             }
2606 
2607             auto& attachment = rp->createInfo.pAttachments[i];
2608 
2609             VkImageAspectFlags bandwidth_aspects = 0;
2610 
2611             if (!FormatIsStencilOnly(attachment.format) &&
2612                 (attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
2613                  attachment.storeOp == VK_ATTACHMENT_STORE_OP_STORE)) {
2614                 if (FormatHasDepth(attachment.format)) {
2615                     bandwidth_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
2616                 } else {
2617                     bandwidth_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
2618                 }
2619             }
2620 
2621             if (FormatHasStencil(attachment.format) &&
2622                 (attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
2623                  attachment.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE)) {
2624                 bandwidth_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
2625             }
2626 
2627             if (!bandwidth_aspects) {
2628                 continue;
2629             }
2630 
2631             auto itr = std::find_if(render_pass_state.touchesAttachments.begin(), render_pass_state.touchesAttachments.end(),
2632                                     [&](const AttachmentInfo& info) { return info.framebufferAttachment == i; });
2633             uint32_t untouched_aspects = bandwidth_aspects;
2634             if (itr != render_pass_state.touchesAttachments.end()) {
2635                 untouched_aspects &= ~itr->aspects;
2636             }
2637 
2638             if (untouched_aspects) {
2639                 skip |= LogPerformanceWarning(
2640                     device, kVUID_BestPractices_EndRenderPass_RedundantAttachmentOnTile,
2641                     "%s Render pass was ended, but attachment #%u (format: %u, untouched aspects 0x%x) "
2642                     "was never accessed by a pipeline or clear command. "
2643                     "On tile-based architectures, LOAD_OP_LOAD and STORE_OP_STORE consume bandwidth and should not be part of the render pass "
2644                     "if the attachments are not intended to be accessed.",
2645                     VendorSpecificTag(kBPVendorArm), i, attachment.format, untouched_aspects);
2646             }
2647         }
2648     }
2649 
2650     return skip;
2651 }
2652 
PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)2653 void BestPractices::PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) {
2654     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDispatch()");
2655 }
2656 
PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)2657 void BestPractices::PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) {
2658     ValidateBoundDescriptorSets(commandBuffer, "vkCmdDispatchIndirect()");
2659 }
2660 
ValidateGetPhysicalDeviceDisplayPlanePropertiesKHRQuery(VkPhysicalDevice physicalDevice,const char * api_name) const2661 bool BestPractices::ValidateGetPhysicalDeviceDisplayPlanePropertiesKHRQuery(VkPhysicalDevice physicalDevice,
2662                                                                             const char* api_name) const {
2663     bool skip = false;
2664     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
2665 
2666     if (bp_pd_state) {
2667         if (bp_pd_state->vkGetPhysicalDeviceDisplayPlanePropertiesKHRState == UNCALLED) {
2668             skip |= LogWarning(physicalDevice, kVUID_BestPractices_DisplayPlane_PropertiesNotCalled,
2669                                "Potential problem with calling %s() without first retrieving properties from "
2670                                "vkGetPhysicalDeviceDisplayPlanePropertiesKHR or vkGetPhysicalDeviceDisplayPlaneProperties2KHR.",
2671                                api_name);
2672         }
2673     }
2674 
2675     return skip;
2676 }
2677 
PreCallValidateGetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice,uint32_t planeIndex,uint32_t * pDisplayCount,VkDisplayKHR * pDisplays) const2678 bool BestPractices::PreCallValidateGetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physicalDevice, uint32_t planeIndex,
2679                                                                        uint32_t* pDisplayCount, VkDisplayKHR* pDisplays) const {
2680     bool skip = false;
2681 
2682     skip |= ValidateGetPhysicalDeviceDisplayPlanePropertiesKHRQuery(physicalDevice, "vkGetDisplayPlaneSupportedDisplaysKHR");
2683 
2684     return skip;
2685 }
2686 
PreCallValidateGetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice,VkDisplayModeKHR mode,uint32_t planeIndex,VkDisplayPlaneCapabilitiesKHR * pCapabilities) const2687 bool BestPractices::PreCallValidateGetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode,
2688                                                                   uint32_t planeIndex,
2689                                                                   VkDisplayPlaneCapabilitiesKHR* pCapabilities) const {
2690     bool skip = false;
2691 
2692     skip |= ValidateGetPhysicalDeviceDisplayPlanePropertiesKHRQuery(physicalDevice, "vkGetDisplayPlaneCapabilitiesKHR");
2693 
2694     return skip;
2695 }
2696 
PreCallValidateGetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physicalDevice,const VkDisplayPlaneInfo2KHR * pDisplayPlaneInfo,VkDisplayPlaneCapabilities2KHR * pCapabilities) const2697 bool BestPractices::PreCallValidateGetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physicalDevice,
2698                                                                    const VkDisplayPlaneInfo2KHR* pDisplayPlaneInfo,
2699                                                                    VkDisplayPlaneCapabilities2KHR* pCapabilities) const {
2700     bool skip = false;
2701 
2702     skip |= ValidateGetPhysicalDeviceDisplayPlanePropertiesKHRQuery(physicalDevice, "vkGetDisplayPlaneCapabilities2KHR");
2703 
2704     return skip;
2705 }
2706 
PreCallValidateGetSwapchainImagesKHR(VkDevice device,VkSwapchainKHR swapchain,uint32_t * pSwapchainImageCount,VkImage * pSwapchainImages) const2707 bool BestPractices::PreCallValidateGetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount,
2708                                                          VkImage* pSwapchainImages) const {
2709     bool skip = false;
2710 
2711     const auto swapchain_state = std::static_pointer_cast<const SWAPCHAIN_STATE_BP>(Get<SWAPCHAIN_NODE>(swapchain));
2712 
2713     if (swapchain_state && pSwapchainImages) {
2714         // Compare the preliminary value of *pSwapchainImageCount with the value this time:
2715         if (swapchain_state->vkGetSwapchainImagesKHRState == UNCALLED) {
2716             skip |=
2717                 LogWarning(device, kVUID_Core_Swapchain_PriorCount,
2718                            "vkGetSwapchainImagesKHR() called with non-NULL pSwapchainImageCount; but no prior positive value has "
2719                            "been seen for pSwapchainImages.");
2720         }
2721 
2722         if (*pSwapchainImageCount > swapchain_state->get_swapchain_image_count) {
2723             skip |= LogWarning(
2724                 device, kVUID_BestPractices_Swapchain_InvalidCount,
2725                 "vkGetSwapchainImagesKHR() called with non-NULL pSwapchainImages, and with pSwapchainImageCount set to a "
2726                 "value (%" PRId32 ") that is greater than the value (%" PRId32 ") that was returned when pSwapchainImages was NULL.",
2727                 *pSwapchainImageCount, swapchain_state->get_swapchain_image_count);
2728         }
2729     }
2730 
2731     return skip;
2732 }
2733 
2734 // Common function to handle validation for GetPhysicalDeviceQueueFamilyProperties & 2KHR version
ValidateCommonGetPhysicalDeviceQueueFamilyProperties(const PHYSICAL_DEVICE_STATE * bp_pd_state,uint32_t requested_queue_family_property_count,const CALL_STATE call_state,const char * caller_name) const2735 bool BestPractices::ValidateCommonGetPhysicalDeviceQueueFamilyProperties(const PHYSICAL_DEVICE_STATE* bp_pd_state,
2736                                                                          uint32_t requested_queue_family_property_count,
2737                                                                          const CALL_STATE call_state,
2738                                                                          const char* caller_name) const {
2739     bool skip = false;
2740     // Verify that for each physical device, this command is called first with NULL pQueueFamilyProperties in order to get count
2741     if (UNCALLED == call_state) {
2742         skip |= LogWarning(
2743             bp_pd_state->Handle(), kVUID_Core_DevLimit_MissingQueryCount,
2744             "%s is called with non-NULL pQueueFamilyProperties before obtaining pQueueFamilyPropertyCount. It is "
2745             "recommended "
2746             "to first call %s with NULL pQueueFamilyProperties in order to obtain the maximal pQueueFamilyPropertyCount.",
2747             caller_name, caller_name);
2748         // Then verify that pCount that is passed in on second call matches what was returned
2749     } else if (bp_pd_state->queue_family_known_count != requested_queue_family_property_count) {
2750         skip |= LogWarning(bp_pd_state->Handle(), kVUID_Core_DevLimit_CountMismatch,
2751                            "%s is called with non-NULL pQueueFamilyProperties and pQueueFamilyPropertyCount value %" PRIu32
2752                            ", but the largest previously returned pQueueFamilyPropertyCount for this physicalDevice is %" PRIu32
2753                            ". It is recommended to instead receive all the properties by calling %s with "
2754                            "pQueueFamilyPropertyCount that was "
2755                            "previously obtained by calling %s with NULL pQueueFamilyProperties.",
2756                            caller_name, requested_queue_family_property_count, bp_pd_state->queue_family_known_count, caller_name,
2757                            caller_name);
2758     }
2759 
2760     return skip;
2761 }
2762 
PreCallValidateBindAccelerationStructureMemoryNV(VkDevice device,uint32_t bindInfoCount,const VkBindAccelerationStructureMemoryInfoNV * pBindInfos) const2763 bool BestPractices::PreCallValidateBindAccelerationStructureMemoryNV(
2764     VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoNV* pBindInfos) const {
2765     bool skip = false;
2766 
2767     for (uint32_t i = 0; i < bindInfoCount; i++) {
2768         const auto as_state = Get<ACCELERATION_STRUCTURE_STATE>(pBindInfos[i].accelerationStructure);
2769         if (!as_state->memory_requirements_checked) {
2770             // There's not an explicit requirement in the spec to call vkGetImageMemoryRequirements() prior to calling
2771             // BindAccelerationStructureMemoryNV but it's implied in that memory being bound must conform with
2772             // VkAccelerationStructureMemoryRequirementsInfoNV from vkGetAccelerationStructureMemoryRequirementsNV
2773             skip |= LogWarning(
2774                 device, kVUID_BestPractices_BindAccelNV_NoMemReqQuery,
2775                 "vkBindAccelerationStructureMemoryNV(): "
2776                 "Binding memory to %s but vkGetAccelerationStructureMemoryRequirementsNV() has not been called on that structure.",
2777                 report_data->FormatHandle(pBindInfos[i].accelerationStructure).c_str());
2778         }
2779     }
2780 
2781     return skip;
2782 }
2783 
PreCallValidateGetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties * pQueueFamilyProperties) const2784 bool BestPractices::PreCallValidateGetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,
2785                                                                           uint32_t* pQueueFamilyPropertyCount,
2786                                                                           VkQueueFamilyProperties* pQueueFamilyProperties) const {
2787     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
2788     if (pQueueFamilyProperties && bp_pd_state) {
2789         return ValidateCommonGetPhysicalDeviceQueueFamilyProperties(bp_pd_state.get(), *pQueueFamilyPropertyCount,
2790                                                                     bp_pd_state->vkGetPhysicalDeviceQueueFamilyPropertiesState,
2791                                                                     "vkGetPhysicalDeviceQueueFamilyProperties()");
2792     }
2793     return false;
2794 }
2795 
PreCallValidateGetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties) const2796 bool BestPractices::PreCallValidateGetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
2797                                                                            uint32_t* pQueueFamilyPropertyCount,
2798                                                                            VkQueueFamilyProperties2* pQueueFamilyProperties) const {
2799     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
2800     if (pQueueFamilyProperties && bp_pd_state) {
2801         return ValidateCommonGetPhysicalDeviceQueueFamilyProperties(bp_pd_state.get(), *pQueueFamilyPropertyCount,
2802                                                                     bp_pd_state->vkGetPhysicalDeviceQueueFamilyProperties2State,
2803                                                                     "vkGetPhysicalDeviceQueueFamilyProperties2()");
2804     }
2805     return false;
2806 }
2807 
PreCallValidateGetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties) const2808 bool BestPractices::PreCallValidateGetPhysicalDeviceQueueFamilyProperties2KHR(
2809     VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2* pQueueFamilyProperties) const {
2810     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
2811     if (pQueueFamilyProperties && bp_pd_state) {
2812         return ValidateCommonGetPhysicalDeviceQueueFamilyProperties(bp_pd_state.get(), *pQueueFamilyPropertyCount,
2813                                                                     bp_pd_state->vkGetPhysicalDeviceQueueFamilyProperties2KHRState,
2814                                                                     "vkGetPhysicalDeviceQueueFamilyProperties2KHR()");
2815     }
2816     return false;
2817 }
2818 
PreCallValidateGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice,VkSurfaceKHR surface,uint32_t * pSurfaceFormatCount,VkSurfaceFormatKHR * pSurfaceFormats) const2819 bool BestPractices::PreCallValidateGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
2820                                                                       uint32_t* pSurfaceFormatCount,
2821                                                                       VkSurfaceFormatKHR* pSurfaceFormats) const {
2822     if (!pSurfaceFormats) return false;
2823     const auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
2824     const auto& call_state = bp_pd_state->vkGetPhysicalDeviceSurfaceFormatsKHRState;
2825     bool skip = false;
2826     if (call_state == UNCALLED) {
2827         // Since we haven't recorded a preliminary value of *pSurfaceFormatCount, that likely means that the application didn't
2828         // previously call this function with a NULL value of pSurfaceFormats:
2829         skip |= LogWarning(physicalDevice, kVUID_Core_DevLimit_MustQueryCount,
2830                            "vkGetPhysicalDeviceSurfaceFormatsKHR() called with non-NULL pSurfaceFormatCount; but no prior "
2831                            "positive value has been seen for pSurfaceFormats.");
2832     } else {
2833         if (*pSurfaceFormatCount > bp_pd_state->surface_formats_count) {
2834             skip |= LogWarning(physicalDevice, kVUID_Core_DevLimit_CountMismatch,
2835                                "vkGetPhysicalDeviceSurfaceFormatsKHR() called with non-NULL pSurfaceFormatCount, and with "
2836                                "pSurfaceFormats set to a value (%u) that is greater than the value (%u) that was returned "
2837                                "when pSurfaceFormatCount was NULL.",
2838                                *pSurfaceFormatCount, bp_pd_state->surface_formats_count);
2839         }
2840     }
2841     return skip;
2842 }
2843 
PreCallValidateQueueBindSparse(VkQueue queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence) const2844 bool BestPractices::PreCallValidateQueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo,
2845                                                    VkFence fence) const {
2846     bool skip = false;
2847 
2848     for (uint32_t bind_idx = 0; bind_idx < bindInfoCount; bind_idx++) {
2849         const VkBindSparseInfo& bind_info = pBindInfo[bind_idx];
2850         // Store sparse binding image_state and after binding is complete make sure that any requiring metadata have it bound
2851         layer_data::unordered_set<const IMAGE_STATE*> sparse_images;
2852         // Track images getting metadata bound by this call in a set, it'll be recorded into the image_state
2853         // in RecordQueueBindSparse.
2854         layer_data::unordered_set<const IMAGE_STATE*> sparse_images_with_metadata;
2855         // If we're binding sparse image memory make sure reqs were queried and note if metadata is required and bound
2856         for (uint32_t i = 0; i < bind_info.imageBindCount; ++i) {
2857             const auto& image_bind = bind_info.pImageBinds[i];
2858             auto image_state = Get<IMAGE_STATE>(image_bind.image);
2859             if (!image_state) {
2860                 continue;  // Param/Object validation should report image_bind.image handles being invalid, so just skip here.
2861             }
2862             sparse_images.insert(image_state.get());
2863             if (image_state->createInfo.flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
2864                 if (!image_state->get_sparse_reqs_called || image_state->sparse_requirements.empty()) {
2865                     // For now just warning if sparse image binding occurs without calling to get reqs first
2866                     skip |= LogWarning(image_state->image(), kVUID_Core_MemTrack_InvalidState,
2867                                        "vkQueueBindSparse(): Binding sparse memory to %s without first calling "
2868                                        "vkGetImageSparseMemoryRequirements[2KHR]() to retrieve requirements.",
2869                                        report_data->FormatHandle(image_state->image()).c_str());
2870                 }
2871             }
2872             if (!image_state->memory_requirements_checked[0]) {
2873                 // For now just warning if sparse image binding occurs without calling to get reqs first
2874                 skip |= LogWarning(image_state->image(), kVUID_Core_MemTrack_InvalidState,
2875                                    "vkQueueBindSparse(): Binding sparse memory to %s without first calling "
2876                                    "vkGetImageMemoryRequirements() to retrieve requirements.",
2877                                    report_data->FormatHandle(image_state->image()).c_str());
2878             }
2879         }
2880         for (uint32_t i = 0; i < bind_info.imageOpaqueBindCount; ++i) {
2881             const auto& image_opaque_bind = bind_info.pImageOpaqueBinds[i];
2882             auto image_state = Get<IMAGE_STATE>(bind_info.pImageOpaqueBinds[i].image);
2883             if (!image_state) {
2884                 continue;  // Param/Object validation should report image_bind.image handles being invalid, so just skip here.
2885             }
2886             sparse_images.insert(image_state.get());
2887             if (image_state->createInfo.flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
2888                 if (!image_state->get_sparse_reqs_called || image_state->sparse_requirements.empty()) {
2889                     // For now just warning if sparse image binding occurs without calling to get reqs first
2890                     skip |= LogWarning(image_state->image(), kVUID_Core_MemTrack_InvalidState,
2891                                        "vkQueueBindSparse(): Binding opaque sparse memory to %s without first calling "
2892                                        "vkGetImageSparseMemoryRequirements[2KHR]() to retrieve requirements.",
2893                                        report_data->FormatHandle(image_state->image()).c_str());
2894                 }
2895             }
2896             if (!image_state->memory_requirements_checked[0]) {
2897                 // For now just warning if sparse image binding occurs without calling to get reqs first
2898                 skip |= LogWarning(image_state->image(), kVUID_Core_MemTrack_InvalidState,
2899                                    "vkQueueBindSparse(): Binding opaque sparse memory to %s without first calling "
2900                                    "vkGetImageMemoryRequirements() to retrieve requirements.",
2901                                    report_data->FormatHandle(image_state->image()).c_str());
2902             }
2903             for (uint32_t j = 0; j < image_opaque_bind.bindCount; ++j) {
2904                 if (image_opaque_bind.pBinds[j].flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT) {
2905                     sparse_images_with_metadata.insert(image_state.get());
2906                 }
2907             }
2908         }
2909         for (const auto& sparse_image_state : sparse_images) {
2910             if (sparse_image_state->sparse_metadata_required && !sparse_image_state->sparse_metadata_bound &&
2911                 sparse_images_with_metadata.find(sparse_image_state) == sparse_images_with_metadata.end()) {
2912                 // Warn if sparse image binding metadata required for image with sparse binding, but metadata not bound
2913                 skip |= LogWarning(sparse_image_state->image(), kVUID_Core_MemTrack_InvalidState,
2914                                    "vkQueueBindSparse(): Binding sparse memory to %s which requires a metadata aspect but no "
2915                                    "binding with VK_SPARSE_MEMORY_BIND_METADATA_BIT set was made.",
2916                                    report_data->FormatHandle(sparse_image_state->image()).c_str());
2917             }
2918         }
2919     }
2920 
2921     return skip;
2922 }
2923 
ManualPostCallRecordQueueBindSparse(VkQueue queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence,VkResult result)2924 void BestPractices::ManualPostCallRecordQueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo,
2925                                                         VkFence fence, VkResult result) {
2926     if (result != VK_SUCCESS) {
2927         return;
2928     }
2929 
2930     for (uint32_t bind_idx = 0; bind_idx < bindInfoCount; bind_idx++) {
2931         const VkBindSparseInfo& bind_info = pBindInfo[bind_idx];
2932         for (uint32_t i = 0; i < bind_info.imageOpaqueBindCount; ++i) {
2933             const auto& image_opaque_bind = bind_info.pImageOpaqueBinds[i];
2934             auto image_state = Get<IMAGE_STATE>(bind_info.pImageOpaqueBinds[i].image);
2935             if (!image_state) {
2936                 continue;  // Param/Object validation should report image_bind.image handles being invalid, so just skip here.
2937             }
2938             for (uint32_t j = 0; j < image_opaque_bind.bindCount; ++j) {
2939                 if (image_opaque_bind.pBinds[j].flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT) {
2940                     image_state->sparse_metadata_bound = true;
2941                 }
2942             }
2943         }
2944     }
2945 }
2946 
ClearAttachmentsIsFullClear(const CMD_BUFFER_STATE_BP * cmd,uint32_t rectCount,const VkClearRect * pRects) const2947 bool BestPractices::ClearAttachmentsIsFullClear(const CMD_BUFFER_STATE_BP* cmd, uint32_t rectCount,
2948                                                 const VkClearRect* pRects) const {
2949     if (cmd->createInfo.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
2950         // We don't know the accurate render area in a secondary,
2951         // so assume we clear the entire frame buffer.
2952         // This is resolved in CmdExecuteCommands where we can check if the clear is a full clear.
2953         return true;
2954     }
2955 
2956     // If we have a rect which covers the entire frame buffer, we have a LOAD_OP_CLEAR-like command.
2957     for (uint32_t i = 0; i < rectCount; i++) {
2958         auto& rect = pRects[i];
2959         auto& render_area = cmd->activeRenderPassBeginInfo.renderArea;
2960         if (rect.rect.extent.width == render_area.extent.width && rect.rect.extent.height == render_area.extent.height) {
2961             return true;
2962         }
2963     }
2964 
2965     return false;
2966 }
2967 
ValidateClearAttachment(VkCommandBuffer commandBuffer,const CMD_BUFFER_STATE_BP * cmd,uint32_t fb_attachment,uint32_t color_attachment,VkImageAspectFlags aspects,bool secondary) const2968 bool BestPractices::ValidateClearAttachment(VkCommandBuffer commandBuffer, const CMD_BUFFER_STATE_BP* cmd, uint32_t fb_attachment,
2969                                             uint32_t color_attachment, VkImageAspectFlags aspects, bool secondary) const {
2970     const RENDER_PASS_STATE* rp = cmd->activeRenderPass.get();
2971     bool skip = false;
2972 
2973     if (!rp || fb_attachment == VK_ATTACHMENT_UNUSED) {
2974         return skip;
2975     }
2976 
2977     const auto& rp_state = cmd->render_pass_state;
2978 
2979     auto attachment_itr = std::find_if(rp_state.touchesAttachments.begin(), rp_state.touchesAttachments.end(),
2980                                        [&](const AttachmentInfo& info) {
2981                                            return info.framebufferAttachment == fb_attachment;
2982                                        });
2983 
2984     // Only report aspects which haven't been touched yet.
2985     VkImageAspectFlags new_aspects = aspects;
2986     if (attachment_itr != rp_state.touchesAttachments.end()) {
2987         new_aspects &= ~attachment_itr->aspects;
2988     }
2989 
2990     // Warn if this is issued prior to Draw Cmd and clearing the entire attachment
2991     if (!cmd->hasDrawCmd) {
2992         skip |= LogPerformanceWarning(
2993             commandBuffer, kVUID_BestPractices_DrawState_ClearCmdBeforeDraw,
2994             "vkCmdClearAttachments() issued on %s prior to any Draw Cmds in current render pass. It is recommended you "
2995             "use RenderPass LOAD_OP_CLEAR on attachments instead.",
2996             report_data->FormatHandle(commandBuffer).c_str());
2997     }
2998 
2999     if ((new_aspects & VK_IMAGE_ASPECT_COLOR_BIT) &&
3000         rp->createInfo.pAttachments[fb_attachment].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
3001         skip |= LogPerformanceWarning(
3002             device, kVUID_BestPractices_ClearAttachments_ClearAfterLoad,
3003             "%svkCmdClearAttachments() issued on %s for color attachment #%u in this subpass, "
3004             "but LOAD_OP_LOAD was used. If you need to clear the framebuffer, always use LOAD_OP_CLEAR as "
3005             "it is more efficient.",
3006             secondary ? "vkCmdExecuteCommands(): " : "",
3007             report_data->FormatHandle(commandBuffer).c_str(), color_attachment);
3008     }
3009 
3010     if ((new_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
3011         rp->createInfo.pAttachments[fb_attachment].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
3012         skip |= LogPerformanceWarning(
3013             device, kVUID_BestPractices_ClearAttachments_ClearAfterLoad,
3014             "%svkCmdClearAttachments() issued on %s for the depth attachment in this subpass, "
3015             "but LOAD_OP_LOAD was used. If you need to clear the framebuffer, always use LOAD_OP_CLEAR as "
3016             "it is more efficient.",
3017             secondary ? "vkCmdExecuteCommands(): " : "",
3018             report_data->FormatHandle(commandBuffer).c_str());
3019     }
3020 
3021     if ((new_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
3022         rp->createInfo.pAttachments[fb_attachment].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
3023         skip |= LogPerformanceWarning(
3024             device, kVUID_BestPractices_ClearAttachments_ClearAfterLoad,
3025             "%svkCmdClearAttachments() issued on %s for the stencil attachment in this subpass, "
3026             "but LOAD_OP_LOAD was used. If you need to clear the framebuffer, always use LOAD_OP_CLEAR as "
3027             "it is more efficient.",
3028             secondary ? "vkCmdExecuteCommands(): " : "",
3029             report_data->FormatHandle(commandBuffer).c_str());
3030     }
3031 
3032     return skip;
3033 }
3034 
PreCallValidateCmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects) const3035 bool BestPractices::PreCallValidateCmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
3036                                                        const VkClearAttachment* pAttachments, uint32_t rectCount,
3037                                                        const VkClearRect* pRects) const {
3038     bool skip = false;
3039     const auto cb_node = GetCBState(commandBuffer);
3040     if (!cb_node) return skip;
3041 
3042     if (cb_node->createInfo.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
3043         // Defer checks to ExecuteCommands.
3044         return skip;
3045     }
3046 
3047     // Only care about full clears, partial clears might have legitimate uses.
3048     if (!ClearAttachmentsIsFullClear(cb_node.get(), rectCount, pRects)) {
3049         return skip;
3050     }
3051 
3052     // Check for uses of ClearAttachments along with LOAD_OP_LOAD,
3053     // as it can be more efficient to just use LOAD_OP_CLEAR
3054     const RENDER_PASS_STATE* rp = cb_node->activeRenderPass.get();
3055     if (rp) {
3056         const auto& subpass = rp->createInfo.pSubpasses[cb_node->activeSubpass];
3057 
3058         for (uint32_t i = 0; i < attachmentCount; i++) {
3059             const auto& attachment = pAttachments[i];
3060 
3061             if (attachment.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
3062                 uint32_t color_attachment = attachment.colorAttachment;
3063                 uint32_t fb_attachment = subpass.pColorAttachments[color_attachment].attachment;
3064                 skip |= ValidateClearAttachment(commandBuffer, cb_node.get(), fb_attachment, color_attachment,
3065                                                 attachment.aspectMask, false);
3066             }
3067 
3068             if (subpass.pDepthStencilAttachment &&
3069                 (attachment.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
3070                 uint32_t fb_attachment = subpass.pDepthStencilAttachment->attachment;
3071                 skip |= ValidateClearAttachment(commandBuffer, cb_node.get(), fb_attachment, VK_ATTACHMENT_UNUSED,
3072                                                 attachment.aspectMask, false);
3073             }
3074         }
3075     }
3076 
3077     if (VendorCheckEnabled(kBPVendorAMD)) {
3078         for (uint32_t attachment_idx = 0; attachment_idx < attachmentCount; attachment_idx++) {
3079             if (pAttachments[attachment_idx].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
3080                 bool black_check = false;
3081                 black_check |= pAttachments[attachment_idx].clearValue.color.float32[0] != 0.0f;
3082                 black_check |= pAttachments[attachment_idx].clearValue.color.float32[1] != 0.0f;
3083                 black_check |= pAttachments[attachment_idx].clearValue.color.float32[2] != 0.0f;
3084                 black_check |= pAttachments[attachment_idx].clearValue.color.float32[3] != 0.0f &&
3085                                pAttachments[attachment_idx].clearValue.color.float32[3] != 1.0f;
3086 
3087                 bool white_check = false;
3088                 white_check |= pAttachments[attachment_idx].clearValue.color.float32[0] != 1.0f;
3089                 white_check |= pAttachments[attachment_idx].clearValue.color.float32[1] != 1.0f;
3090                 white_check |= pAttachments[attachment_idx].clearValue.color.float32[2] != 1.0f;
3091                 white_check |= pAttachments[attachment_idx].clearValue.color.float32[3] != 0.0f &&
3092                                pAttachments[attachment_idx].clearValue.color.float32[3] != 1.0f;
3093 
3094                 if (black_check && white_check) {
3095                     skip |= LogPerformanceWarning(device, kVUID_BestPractices_ClearAttachment_FastClearValues,
3096                         "%s Performance warning: vkCmdClearAttachments() clear value for color attachment %" PRId32 " is not a fast clear value."
3097                         "Consider changing to one of the following:"
3098                         "RGBA(0, 0, 0, 0) "
3099                         "RGBA(0, 0, 0, 1) "
3100                         "RGBA(1, 1, 1, 0) "
3101                         "RGBA(1, 1, 1, 1)",
3102                         VendorSpecificTag(kBPVendorAMD), attachment_idx);
3103                 }
3104             } else {
3105                 if ((pAttachments[attachment_idx].clearValue.depthStencil.depth != 0 &&
3106                      pAttachments[attachment_idx].clearValue.depthStencil.depth != 1) &&
3107                     pAttachments[attachment_idx].clearValue.depthStencil.stencil != 0) {
3108                     skip |= LogPerformanceWarning(device, kVUID_BestPractices_ClearAttachment_FastClearValues,
3109                                                   "%s Performance warning: vkCmdClearAttachments() clear value for depth/stencil "
3110                                                   "attachment %" PRId32 " is not a fast clear value."
3111                                                   "Consider changing to one of the following:"
3112                                                   "D=0.0f, S=0"
3113                                                   "D=1.0f, S=0",
3114                                                   VendorSpecificTag(kBPVendorAMD), attachment_idx);
3115                 }
3116             }
3117         }
3118     }
3119 
3120     return skip;
3121 }
3122 
PreCallValidateCmdResolveImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageResolve * pRegions) const3123 bool BestPractices::PreCallValidateCmdResolveImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3124                                                    VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount,
3125                                                    const VkImageResolve* pRegions) const {
3126     bool skip = false;
3127 
3128     skip |= VendorCheckEnabled(kBPVendorArm) &&
3129             LogPerformanceWarning(device, kVUID_BestPractices_CmdResolveImage_ResolvingImage,
3130                                   "%s Attempting to use vkCmdResolveImage to resolve a multisampled image. "
3131                                   "This is a very slow and extremely bandwidth intensive path. "
3132                                   "You should always resolve multisampled images on-tile with pResolveAttachments in VkRenderPass.",
3133                                   VendorSpecificTag(kBPVendorArm));
3134 
3135     return skip;
3136 }
3137 
PreCallValidateCmdResolveImage2KHR(VkCommandBuffer commandBuffer,const VkResolveImageInfo2KHR * pResolveImageInfo) const3138 bool BestPractices::PreCallValidateCmdResolveImage2KHR(VkCommandBuffer commandBuffer,
3139                                                        const VkResolveImageInfo2KHR* pResolveImageInfo) const {
3140     bool skip = false;
3141 
3142     skip |= VendorCheckEnabled(kBPVendorArm) &&
3143             LogPerformanceWarning(device, kVUID_BestPractices_CmdResolveImage2KHR_ResolvingImage,
3144                                   "%s Attempting to use vkCmdResolveImage2KHR to resolve a multisampled image. "
3145                                   "This is a very slow and extremely bandwidth intensive path. "
3146                                   "You should always resolve multisampled images on-tile with pResolveAttachments in VkRenderPass.",
3147                                   VendorSpecificTag(kBPVendorArm));
3148 
3149     return skip;
3150 }
3151 
PreCallRecordCmdResolveImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageResolve * pRegions)3152 void BestPractices::PreCallRecordCmdResolveImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3153                                                  VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount,
3154                                                  const VkImageResolve* pRegions) {
3155     auto cb = GetCBState(commandBuffer);
3156     auto &funcs = cb->queue_submit_functions;
3157     auto* src = GetImageUsageState(srcImage);
3158     auto* dst = GetImageUsageState(dstImage);
3159 
3160     for (uint32_t i = 0; i < regionCount; i++) {
3161         QueueValidateImage(funcs, "vkCmdResolveImage()", src, IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_READ, pRegions[i].srcSubresource);
3162         QueueValidateImage(funcs, "vkCmdResolveImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE, pRegions[i].dstSubresource);
3163     }
3164 }
3165 
PreCallRecordCmdResolveImage2KHR(VkCommandBuffer commandBuffer,const VkResolveImageInfo2KHR * pResolveImageInfo)3166 void BestPractices::PreCallRecordCmdResolveImage2KHR(VkCommandBuffer commandBuffer,
3167                                                      const VkResolveImageInfo2KHR* pResolveImageInfo) {
3168     auto cb = GetCBState(commandBuffer);
3169     auto &funcs = cb->queue_submit_functions;
3170     auto* src = GetImageUsageState(pResolveImageInfo->srcImage);
3171     auto* dst = GetImageUsageState(pResolveImageInfo->dstImage);
3172     uint32_t regionCount = pResolveImageInfo->regionCount;
3173 
3174     for (uint32_t i = 0; i < regionCount; i++) {
3175         QueueValidateImage(funcs, "vkCmdResolveImage2KHR()", src, IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_READ, pResolveImageInfo->pRegions[i].srcSubresource);
3176         QueueValidateImage(funcs, "vkCmdResolveImage2KHR()", dst, IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE, pResolveImageInfo->pRegions[i].dstSubresource);
3177     }
3178 }
3179 
PreCallRecordCmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3180 void BestPractices::PreCallRecordCmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout,
3181                                                     const VkClearColorValue* pColor, uint32_t rangeCount,
3182                                                     const VkImageSubresourceRange* pRanges) {
3183     auto cb = GetCBState(commandBuffer);
3184     auto &funcs = cb->queue_submit_functions;
3185     auto* dst = GetImageUsageState(image);
3186 
3187     for (uint32_t i = 0; i < rangeCount; i++) {
3188         QueueValidateImage(funcs, "vkCmdClearColorImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::CLEARED, pRanges[i]);
3189     }
3190 }
3191 
PreCallRecordCmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3192 void BestPractices::PreCallRecordCmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout,
3193                                                            const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount,
3194                                                            const VkImageSubresourceRange* pRanges) {
3195     auto cb = GetCBState(commandBuffer);
3196     auto &funcs = cb->queue_submit_functions;
3197     auto* dst = GetImageUsageState(image);
3198 
3199     for (uint32_t i = 0; i < rangeCount; i++) {
3200         QueueValidateImage(funcs, "vkCmdClearDepthStencilImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::CLEARED, pRanges[i]);
3201     }
3202 }
3203 
PreCallRecordCmdCopyImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageCopy * pRegions)3204 void BestPractices::PreCallRecordCmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3205                                               VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount,
3206                                               const VkImageCopy* pRegions) {
3207     auto cb = GetCBState(commandBuffer);
3208     auto &funcs = cb->queue_submit_functions;
3209     auto* src = GetImageUsageState(srcImage);
3210     auto* dst = GetImageUsageState(dstImage);
3211 
3212     for (uint32_t i = 0; i < regionCount; i++) {
3213         QueueValidateImage(funcs, "vkCmdCopyImage()", src, IMAGE_SUBRESOURCE_USAGE_BP::COPY_READ, pRegions[i].srcSubresource);
3214         QueueValidateImage(funcs, "vkCmdCopyImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE, pRegions[i].dstSubresource);
3215     }
3216 }
3217 
PreCallRecordCmdCopyBufferToImage(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions)3218 void BestPractices::PreCallRecordCmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage,
3219                                                       VkImageLayout dstImageLayout, uint32_t regionCount,
3220                                                       const VkBufferImageCopy* pRegions) {
3221     auto cb = GetCBState(commandBuffer);
3222     auto &funcs = cb->queue_submit_functions;
3223     auto* dst = GetImageUsageState(dstImage);
3224 
3225     for (uint32_t i = 0; i < regionCount; i++) {
3226         QueueValidateImage(funcs, "vkCmdCopyBufferToImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE, pRegions[i].imageSubresource);
3227     }
3228 }
3229 
PreCallRecordCmdCopyImageToBuffer(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkBuffer dstBuffer,uint32_t regionCount,const VkBufferImageCopy * pRegions)3230 void BestPractices::PreCallRecordCmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3231                                                       VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) {
3232     auto cb = GetCBState(commandBuffer);
3233     auto &funcs = cb->queue_submit_functions;
3234     auto* src = GetImageUsageState(srcImage);
3235 
3236     for (uint32_t i = 0; i < regionCount; i++) {
3237         QueueValidateImage(funcs, "vkCmdCopyImageToBuffer()", src, IMAGE_SUBRESOURCE_USAGE_BP::COPY_READ, pRegions[i].imageSubresource);
3238     }
3239 }
3240 
PreCallRecordCmdBlitImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageBlit * pRegions,VkFilter filter)3241 void BestPractices::PreCallRecordCmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3242                                               VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount,
3243                                               const VkImageBlit* pRegions, VkFilter filter) {
3244     auto cb = GetCBState(commandBuffer);
3245     auto &funcs = cb->queue_submit_functions;
3246     auto* src = GetImageUsageState(srcImage);
3247     auto* dst = GetImageUsageState(dstImage);
3248 
3249     for (uint32_t i = 0; i < regionCount; i++) {
3250         QueueValidateImage(funcs, "vkCmdBlitImage()", src, IMAGE_SUBRESOURCE_USAGE_BP::BLIT_READ, pRegions[i].srcSubresource);
3251         QueueValidateImage(funcs, "vkCmdBlitImage()", dst, IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE, pRegions[i].dstSubresource);
3252     }
3253 }
3254 
PreCallValidateCreateSampler(VkDevice device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler) const3255 bool BestPractices::PreCallValidateCreateSampler(VkDevice device, const VkSamplerCreateInfo* pCreateInfo,
3256                                                  const VkAllocationCallbacks* pAllocator, VkSampler* pSampler) const {
3257     bool skip = false;
3258 
3259     if (VendorCheckEnabled(kBPVendorArm)) {
3260         if ((pCreateInfo->addressModeU != pCreateInfo->addressModeV) || (pCreateInfo->addressModeV != pCreateInfo->addressModeW)) {
3261             skip |= LogPerformanceWarning(
3262                 device, kVUID_BestPractices_CreateSampler_DifferentWrappingModes,
3263                 "%s Creating a sampler object with wrapping modes which do not match (U = %u, V = %u, W = %u). "
3264                 "This may cause reduced performance even if only U (1D image) or U/V wrapping modes (2D "
3265                 "image) are actually used. If you need different wrapping modes, disregard this warning.",
3266                 VendorSpecificTag(kBPVendorArm), pCreateInfo->addressModeU, pCreateInfo->addressModeV, pCreateInfo->addressModeW);
3267         }
3268 
3269         if ((pCreateInfo->minLod != 0.0f) || (pCreateInfo->maxLod < VK_LOD_CLAMP_NONE)) {
3270             skip |= LogPerformanceWarning(
3271                 device, kVUID_BestPractices_CreateSampler_LodClamping,
3272                 "%s Creating a sampler object with LOD clamping (minLod = %f, maxLod = %f). This may cause reduced performance. "
3273                 "Instead of clamping LOD in the sampler, consider using an VkImageView which restricts the mip-levels, set minLod "
3274                 "to 0.0, and maxLod to VK_LOD_CLAMP_NONE.",
3275                 VendorSpecificTag(kBPVendorArm), pCreateInfo->minLod, pCreateInfo->maxLod);
3276         }
3277 
3278         if (pCreateInfo->mipLodBias != 0.0f) {
3279             skip |=
3280                 LogPerformanceWarning(device, kVUID_BestPractices_CreateSampler_LodBias,
3281                                       "%s Creating a sampler object with LOD bias != 0.0 (%f). This will lead to less efficient "
3282                                       "descriptors being created and may cause reduced performance.",
3283                                       VendorSpecificTag(kBPVendorArm), pCreateInfo->mipLodBias);
3284         }
3285 
3286         if ((pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
3287              pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
3288              pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) &&
3289             (pCreateInfo->borderColor != VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK)) {
3290             skip |= LogPerformanceWarning(
3291                 device, kVUID_BestPractices_CreateSampler_BorderClampColor,
3292                 "%s Creating a sampler object with border clamping and borderColor != VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK. "
3293                 "This will lead to less efficient descriptors being created and may cause reduced performance. "
3294                 "If possible, use VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK as the border color.",
3295                 VendorSpecificTag(kBPVendorArm));
3296         }
3297 
3298         if (pCreateInfo->unnormalizedCoordinates) {
3299             skip |= LogPerformanceWarning(
3300                 device, kVUID_BestPractices_CreateSampler_UnnormalizedCoordinates,
3301                 "%s Creating a sampler object with unnormalized coordinates. This will lead to less efficient "
3302                 "descriptors being created and may cause reduced performance.",
3303                 VendorSpecificTag(kBPVendorArm));
3304         }
3305 
3306         if (pCreateInfo->anisotropyEnable) {
3307             skip |= LogPerformanceWarning(
3308                 device, kVUID_BestPractices_CreateSampler_Anisotropy,
3309                 "%s Creating a sampler object with anisotropy. This will lead to less efficient descriptors being created "
3310                 "and may cause reduced performance.",
3311                 VendorSpecificTag(kBPVendorArm));
3312         }
3313     }
3314 
3315     return skip;
3316 }
3317 
PreCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * cgpl_state)3318 void BestPractices::PreCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
3319                                                          const VkGraphicsPipelineCreateInfo* pCreateInfos,
3320                                                          const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
3321                                                          void* cgpl_state) {
3322     ValidationStateTracker::PreCallRecordCreateGraphicsPipelines(device, pipelineCache, createInfoCount, pCreateInfos, pAllocator,
3323                                                                  pPipelines);
3324     // AMD best practice
3325     num_pso += createInfoCount;
3326 }
3327 
PreCallValidateUpdateDescriptorSets(VkDevice device,uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites,uint32_t descriptorCopyCount,const VkCopyDescriptorSet * pDescriptorCopies) const3328 bool BestPractices::PreCallValidateUpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount,
3329                                                         const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount,
3330                                                         const VkCopyDescriptorSet* pDescriptorCopies) const {
3331     bool skip = false;
3332     if (VendorCheckEnabled(kBPVendorAMD)) {
3333         if (descriptorCopyCount > 0) {
3334             skip |= LogPerformanceWarning(device, kVUID_BestPractices_UpdateDescriptors_AvoidCopyingDescriptors,
3335                                           "%s Performance warning: copying descriptor sets is not recommended",
3336                                           VendorSpecificTag(kBPVendorAMD));
3337         }
3338     }
3339 
3340     return skip;
3341 }
3342 
PreCallValidateCreateDescriptorUpdateTemplate(VkDevice device,const VkDescriptorUpdateTemplateCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDescriptorUpdateTemplate * pDescriptorUpdateTemplate) const3343 bool BestPractices::PreCallValidateCreateDescriptorUpdateTemplate(VkDevice device,
3344                                                                   const VkDescriptorUpdateTemplateCreateInfo* pCreateInfo,
3345                                                                   const VkAllocationCallbacks* pAllocator,
3346                                                                   VkDescriptorUpdateTemplate* pDescriptorUpdateTemplate) const {
3347     bool skip = false;
3348     if (VendorCheckEnabled(kBPVendorAMD)) {
3349         skip |= LogPerformanceWarning(device, kVUID_BestPractices_UpdateDescriptors_PreferNonTemplate,
3350                                       "%s Performance warning: using DescriptorSetWithTemplate is not recommended. Prefer using "
3351                                       "vkUpdateDescriptorSet instead",
3352                                       VendorSpecificTag(kBPVendorAMD));
3353     }
3354 
3355     return skip;
3356 }
3357 
PreCallValidateCmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges) const3358 bool BestPractices::PreCallValidateCmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout,
3359                                                       const VkClearColorValue* pColor, uint32_t rangeCount,
3360                                                       const VkImageSubresourceRange* pRanges) const {
3361     bool skip = false;
3362     if (VendorCheckEnabled(kBPVendorAMD)) {
3363         skip |= LogPerformanceWarning(device, kVUID_BestPractices_ClearAttachment_ClearImage,
3364             "%s Performance warning: using vkCmdClearColorImage is not recommended. Prefer using LOAD_OP_CLEAR or "
3365             "vkCmdClearAttachments instead",
3366             VendorSpecificTag(kBPVendorAMD));
3367     }
3368 
3369     return skip;
3370 }
3371 
PreCallValidateCmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges) const3372 bool BestPractices::PreCallValidateCmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image,
3373                                                              VkImageLayout imageLayout,
3374                                                              const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount,
3375                                                              const VkImageSubresourceRange* pRanges) const {
3376     bool skip = false;
3377     if (VendorCheckEnabled(kBPVendorAMD)) {
3378         skip |= LogPerformanceWarning(
3379             device, kVUID_BestPractices_ClearAttachment_ClearImage,
3380                         "%s Performance warning: using vkCmdClearDepthStencilImage is not recommended. Prefer using LOAD_OP_CLEAR or "
3381                     "vkCmdClearAttachments instead",
3382                     VendorSpecificTag(kBPVendorAMD));
3383     }
3384 
3385     return skip;
3386 }
3387 
PreCallValidateCreatePipelineLayout(VkDevice device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout) const3388 bool BestPractices::PreCallValidateCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo,
3389                                                         const VkAllocationCallbacks* pAllocator,
3390                                                         VkPipelineLayout* pPipelineLayout) const {
3391     bool skip = false;
3392     if (VendorCheckEnabled(kBPVendorAMD)) {
3393         // Descriptor sets cost 1 DWORD each.
3394         // Dynamic buffers cost 2 DWORDs each when robust buffer access is OFF.
3395         // Dynamic buffers cost 4 DWORDs each when robust buffer access is ON.
3396         // Push constants cost 1 DWORD per 4 bytes in the Push constant range.
3397         uint32_t pipeline_size = pCreateInfo->setLayoutCount;  // in DWORDS
3398         for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; i++) {
3399             auto descriptor_set_layout_state = Get<cvdescriptorset::DescriptorSetLayout>(pCreateInfo->pSetLayouts[i]);
3400             pipeline_size += descriptor_set_layout_state->GetDynamicDescriptorCount() * (robust_buffer_access ? 4 : 2);
3401         }
3402 
3403         for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
3404             pipeline_size += pCreateInfo->pPushConstantRanges[i].size / 4;
3405         }
3406 
3407         if (pipeline_size > kPipelineLayoutSizeWarningLimitAMD) {
3408             skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelinesLayout_KeepLayoutSmall,
3409                         "%s Performance warning: pipeline layout size is too large. Prefer smaller pipeline layouts."
3410                         "Descriptor sets cost 1 DWORD each. "
3411                         "Dynamic buffers cost 2 DWORDs each when robust buffer access is OFF. "
3412                         "Dynamic buffers cost 4 DWORDs each when robust buffer access is ON. "
3413                         "Push constants cost 1 DWORD per 4 bytes in the Push constant range. ",
3414                                       VendorSpecificTag(kBPVendorAMD));
3415         }
3416     }
3417 
3418     return skip;
3419 }
3420 
PreCallValidateCmdCopyImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageCopy * pRegions) const3421 bool BestPractices::PreCallValidateCmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout,
3422                                                 VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount,
3423                                                 const VkImageCopy* pRegions) const {
3424     bool skip = false;
3425     std::stringstream src_image_hex;
3426     std::stringstream dst_image_hex;
3427     src_image_hex << "0x" << std::hex << HandleToUint64(srcImage);
3428     dst_image_hex << "0x" << std::hex << HandleToUint64(dstImage);
3429 
3430     if (VendorCheckEnabled(kBPVendorAMD)) {
3431         const auto src_state = Get<IMAGE_STATE>(srcImage);
3432         const auto dst_state = Get<IMAGE_STATE>(dstImage);
3433 
3434         if (src_state && dst_state) {
3435             VkImageTiling src_Tiling = src_state->createInfo.tiling;
3436             VkImageTiling dst_Tiling = dst_state->createInfo.tiling;
3437             if (src_Tiling != dst_Tiling && (src_Tiling == VK_IMAGE_TILING_LINEAR || dst_Tiling == VK_IMAGE_TILING_LINEAR)) {
3438                 skip |=
3439                     LogPerformanceWarning(device, kVUID_BestPractices_vkImage_AvoidImageToImageCopy,
3440                                           "%s Performance warning: image %s and image %s have differing tilings. Use buffer to "
3441                                           "image (vkCmdCopyImageToBuffer) "
3442                                           "and image to buffer (vkCmdCopyBufferToImage) copies instead of image to image "
3443                                           "copies when converting between linear and optimal images",
3444                                           VendorSpecificTag(kBPVendorAMD), src_image_hex.str().c_str(), dst_image_hex.str().c_str());
3445             }
3446         }
3447     }
3448 
3449     return skip;
3450 }
3451 
PreCallValidateCmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipeline) const3452 bool BestPractices::PreCallValidateCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
3453                                                    VkPipeline pipeline) const {
3454     bool skip = false;
3455 
3456     if (VendorCheckEnabled(kBPVendorAMD)) {
3457         if (pipelines_used_in_frame.find(pipeline) != pipelines_used_in_frame.end()) {
3458             skip |= LogPerformanceWarning(device, kVUID_BestPractices_Pipeline_SortAndBind,
3459                         "%s Performance warning: Pipeline %s was bound twice in the frame. Keep pipeline state changes to a minimum,"
3460                         "for example, by sorting draw calls by pipeline.",
3461                         VendorSpecificTag(kBPVendorAMD), report_data->FormatHandle(pipeline).c_str());
3462         }
3463     }
3464 
3465     return skip;
3466 }
3467 
ManualPostCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence,VkResult result)3468 void BestPractices::ManualPostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits,
3469                                                     VkFence fence, VkResult result) {
3470     // AMD best practice
3471     num_queue_submissions += submitCount;
3472 }
3473 
PreCallValidateQueuePresentKHR(VkQueue queue,const VkPresentInfoKHR * pPresentInfo) const3474 bool BestPractices::PreCallValidateQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) const {
3475     bool skip = false;
3476 
3477     if (VendorCheckEnabled(kBPVendorAMD)) {
3478         if (num_queue_submissions > kNumberOfSubmissionWarningLimitAMD) {
3479             skip |= LogPerformanceWarning(
3480                 device, kVUID_BestPractices_Submission_ReduceNumberOfSubmissions,
3481                 "%s Performance warning: command buffers submitted %" PRId32 " times this frame. Submitting command buffers has a CPU "
3482                 "and GPU overhead. Submit fewer times to incur less overhead.",
3483                 VendorSpecificTag(kBPVendorAMD), num_queue_submissions);
3484         }
3485     }
3486 
3487     return skip;
3488 }
3489 
PostCallRecordCmdPipelineBarrier(VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkDependencyFlags dependencyFlags,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)3490 void BestPractices::PostCallRecordCmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
3491                                                      VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags,
3492                                                      uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
3493                                                      uint32_t bufferMemoryBarrierCount,
3494                                                      const VkBufferMemoryBarrier* pBufferMemoryBarriers,
3495                                                      uint32_t imageMemoryBarrierCount,
3496                                                      const VkImageMemoryBarrier* pImageMemoryBarriers) {
3497     num_barriers_objects += memoryBarrierCount;
3498     num_barriers_objects += imageMemoryBarrierCount;
3499     num_barriers_objects += bufferMemoryBarrierCount;
3500 }
3501 
ManualPostCallRecordCreateFence(VkDevice device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence,VkResult result)3502 void BestPractices::ManualPostCallRecordCreateFence(VkDevice device, const VkFenceCreateInfo* pCreateInfo,
3503                                               const VkAllocationCallbacks* pAllocator, VkFence* pFence, VkResult result) {
3504     // AMD best practice
3505     if (result == VK_SUCCESS) {
3506         num_fence_objects++;
3507     }
3508 }
3509 
ManualPostCallRecordCreateSemaphore(VkDevice device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore,VkResult result)3510 void BestPractices::ManualPostCallRecordCreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo,
3511                                                   const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore,
3512                                                   VkResult result) {
3513     // AMD best practice
3514     if (result == VK_SUCCESS) {
3515         num_semaphore_objects++;
3516     }
3517 }
3518 
PreCallValidateCreateSemaphore(VkDevice device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore) const3519 bool BestPractices::PreCallValidateCreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo,
3520                                                    const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore) const {
3521     bool skip = false;
3522     if (VendorCheckEnabled(kBPVendorAMD)) {
3523         if (num_semaphore_objects > kMaxRecommendedSemaphoreObjectsSizeAMD) {
3524             skip |= LogPerformanceWarning(device, kVUID_BestPractices_SyncObjects_HighNumberOfSemaphores,
3525                             "%s Performance warning: High number of vkSemaphore objects created."
3526                             "Minimize the amount of queue synchronization that is used. "
3527                             "Semaphores and fences have overhead. Each fence has a CPU and GPU cost with it.",
3528                             VendorSpecificTag(kBPVendorAMD));
3529         }
3530     }
3531 
3532     return skip;
3533 }
3534 
PreCallValidateCreateFence(VkDevice device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence) const3535 bool BestPractices::PreCallValidateCreateFence(VkDevice device, const VkFenceCreateInfo* pCreateInfo,
3536                                                const VkAllocationCallbacks* pAllocator, VkFence* pFence) const {
3537     bool skip = false;
3538     if (VendorCheckEnabled(kBPVendorAMD)) {
3539         if (num_fence_objects > kMaxRecommendedFenceObjectsSizeAMD) {
3540             skip |= LogPerformanceWarning(device, kVUID_BestPractices_SyncObjects_HighNumberOfFences,
3541                                           "%s Performance warning: High number of VkFence objects created."
3542                                           "Minimize the amount of CPU-GPU synchronization that is used. "
3543                                           "Semaphores and fences have overhead.Each fence has a CPU and GPU cost with it.",
3544                                           VendorSpecificTag(kBPVendorAMD));
3545         }
3546     }
3547 
3548     return skip;
3549 }
3550 
resize(size_t size)3551 void BestPractices::PostTransformLRUCacheModel::resize(size_t size) { _entries.resize(size); }
3552 
query_cache(uint32_t value)3553 bool BestPractices::PostTransformLRUCacheModel::query_cache(uint32_t value) {
3554     // look for a cache hit
3555     auto hit = std::find_if(_entries.begin(), _entries.end(), [value](const CacheEntry& entry) { return entry.value == value; });
3556     if (hit != _entries.end()) {
3557         // mark the cache hit as being most recently used
3558         hit->age = iteration++;
3559         return true;
3560     }
3561 
3562     // if there's no cache hit, we need to model the entry being inserted into the cache
3563     CacheEntry new_entry = {value, iteration};
3564     if (iteration < static_cast<uint32_t>(std::distance(_entries.begin(), _entries.end()))) {
3565         // if there is still space left in the cache, use the next available slot
3566         *(_entries.begin() + iteration) = new_entry;
3567     } else {
3568         // otherwise replace the least recently used cache entry
3569         auto lru = std::min_element(_entries.begin(), hit, [](const CacheEntry& a, const CacheEntry& b) { return a.age < b.age; });
3570         *lru = new_entry;
3571     }
3572     iteration++;
3573     return false;
3574 }
3575 
PreCallValidateAcquireNextImageKHR(VkDevice device,VkSwapchainKHR swapchain,uint64_t timeout,VkSemaphore semaphore,VkFence fence,uint32_t * pImageIndex) const3576 bool BestPractices::PreCallValidateAcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout,
3577                                                        VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) const {
3578     const auto swapchain_data = Get<SWAPCHAIN_NODE>(swapchain);
3579     bool skip = false;
3580     if (swapchain_data && swapchain_data->images.size() == 0) {
3581         skip |= LogWarning(swapchain, kVUID_Core_DrawState_SwapchainImagesNotFound,
3582                            "vkAcquireNextImageKHR: No images found to acquire from. Application probably did not call "
3583                            "vkGetSwapchainImagesKHR after swapchain creation.");
3584     }
3585     return skip;
3586 }
3587 
CommonPostCallRecordGetPhysicalDeviceQueueFamilyProperties(CALL_STATE & call_state,bool no_pointer)3588 void BestPractices::CommonPostCallRecordGetPhysicalDeviceQueueFamilyProperties(CALL_STATE& call_state, bool no_pointer) {
3589     if (no_pointer) {
3590         if (UNCALLED == call_state) {
3591             call_state = QUERY_COUNT;
3592         }
3593     } else {  // Save queue family properties
3594         call_state = QUERY_DETAILS;
3595     }
3596 }
3597 
PostCallRecordGetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties * pQueueFamilyProperties)3598 void BestPractices::PostCallRecordGetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,
3599                                                                          uint32_t* pQueueFamilyPropertyCount,
3600                                                                          VkQueueFamilyProperties* pQueueFamilyProperties) {
3601     ValidationStateTracker::PostCallRecordGetPhysicalDeviceQueueFamilyProperties(physicalDevice, pQueueFamilyPropertyCount,
3602                                                                                  pQueueFamilyProperties);
3603     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3604     if (bp_pd_state) {
3605         CommonPostCallRecordGetPhysicalDeviceQueueFamilyProperties(bp_pd_state->vkGetPhysicalDeviceQueueFamilyPropertiesState,
3606                                                                    nullptr == pQueueFamilyProperties);
3607     }
3608 }
3609 
PostCallRecordGetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)3610 void BestPractices::PostCallRecordGetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
3611                                                                           uint32_t* pQueueFamilyPropertyCount,
3612                                                                           VkQueueFamilyProperties2* pQueueFamilyProperties) {
3613     ValidationStateTracker::PostCallRecordGetPhysicalDeviceQueueFamilyProperties2(physicalDevice, pQueueFamilyPropertyCount,
3614                                                                                   pQueueFamilyProperties);
3615     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3616     if (bp_pd_state) {
3617         CommonPostCallRecordGetPhysicalDeviceQueueFamilyProperties(bp_pd_state->vkGetPhysicalDeviceQueueFamilyProperties2State,
3618                                                                    nullptr == pQueueFamilyProperties);
3619     }
3620 }
3621 
PostCallRecordGetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)3622 void BestPractices::PostCallRecordGetPhysicalDeviceQueueFamilyProperties2KHR(VkPhysicalDevice physicalDevice,
3623                                                                              uint32_t* pQueueFamilyPropertyCount,
3624                                                                              VkQueueFamilyProperties2* pQueueFamilyProperties) {
3625     ValidationStateTracker::PostCallRecordGetPhysicalDeviceQueueFamilyProperties2KHR(physicalDevice, pQueueFamilyPropertyCount,
3626                                                                                      pQueueFamilyProperties);
3627     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3628     if (bp_pd_state) {
3629         CommonPostCallRecordGetPhysicalDeviceQueueFamilyProperties(bp_pd_state->vkGetPhysicalDeviceQueueFamilyProperties2KHRState,
3630                                                                    nullptr == pQueueFamilyProperties);
3631     }
3632 }
3633 
PostCallRecordGetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)3634 void BestPractices::PostCallRecordGetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) {
3635     ValidationStateTracker::PostCallRecordGetPhysicalDeviceFeatures(physicalDevice, pFeatures);
3636     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3637     if (bp_pd_state) {
3638         bp_pd_state->vkGetPhysicalDeviceFeaturesState = QUERY_DETAILS;
3639     }
3640 }
3641 
PostCallRecordGetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)3642 void BestPractices::PostCallRecordGetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
3643                                                              VkPhysicalDeviceFeatures2* pFeatures) {
3644     ValidationStateTracker::PostCallRecordGetPhysicalDeviceFeatures2(physicalDevice, pFeatures);
3645     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3646     if (bp_pd_state) {
3647         bp_pd_state->vkGetPhysicalDeviceFeaturesState = QUERY_DETAILS;
3648     }
3649 }
3650 
PostCallRecordGetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)3651 void BestPractices::PostCallRecordGetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice,
3652                                                                 VkPhysicalDeviceFeatures2* pFeatures) {
3653     ValidationStateTracker::PostCallRecordGetPhysicalDeviceFeatures2KHR(physicalDevice, pFeatures);
3654     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3655     if (bp_pd_state) {
3656         bp_pd_state->vkGetPhysicalDeviceFeaturesState = QUERY_DETAILS;
3657     }
3658 }
3659 
ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice,VkSurfaceKHR surface,VkSurfaceCapabilitiesKHR * pSurfaceCapabilities,VkResult result)3660 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice,
3661                                                                                 VkSurfaceKHR surface,
3662                                                                                 VkSurfaceCapabilitiesKHR* pSurfaceCapabilities,
3663                                                                                 VkResult result) {
3664     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3665     if (bp_pd_state) {
3666         bp_pd_state->vkGetPhysicalDeviceSurfaceCapabilitiesKHRState = QUERY_DETAILS;
3667     }
3668 }
3669 
ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilities2KHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSurfaceInfo2KHR * pSurfaceInfo,VkSurfaceCapabilities2KHR * pSurfaceCapabilities,VkResult result)3670 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilities2KHR(
3671     VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
3672     VkSurfaceCapabilities2KHR* pSurfaceCapabilities, VkResult result) {
3673     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3674     if (bp_pd_state) {
3675         bp_pd_state->vkGetPhysicalDeviceSurfaceCapabilitiesKHRState = QUERY_DETAILS;
3676     }
3677 }
3678 
ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilities2EXT(VkPhysicalDevice physicalDevice,VkSurfaceKHR surface,VkSurfaceCapabilities2EXT * pSurfaceCapabilities,VkResult result)3679 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfaceCapabilities2EXT(VkPhysicalDevice physicalDevice,
3680                                                                                  VkSurfaceKHR surface,
3681                                                                                  VkSurfaceCapabilities2EXT* pSurfaceCapabilities,
3682                                                                                  VkResult result) {
3683     auto bp_pd_state = GetPhysicalDeviceState(physicalDevice);
3684     if (bp_pd_state) {
3685         bp_pd_state->vkGetPhysicalDeviceSurfaceCapabilitiesKHRState = QUERY_DETAILS;
3686     }
3687 }
3688 
ManualPostCallRecordGetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice,VkSurfaceKHR surface,uint32_t * pPresentModeCount,VkPresentModeKHR * pPresentModes,VkResult result)3689 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice,
3690                                                                                 VkSurfaceKHR surface, uint32_t* pPresentModeCount,
3691                                                                                 VkPresentModeKHR* pPresentModes, VkResult result) {
3692     auto bp_pd_data = GetPhysicalDeviceState(physicalDevice);
3693     if (bp_pd_data) {
3694         auto& call_state = bp_pd_data->vkGetPhysicalDeviceSurfacePresentModesKHRState;
3695 
3696         if (*pPresentModeCount) {
3697             if (call_state < QUERY_COUNT) {
3698                 call_state = QUERY_COUNT;
3699             }
3700         }
3701         if (pPresentModes) {
3702             if (call_state < QUERY_DETAILS) {
3703                 call_state = QUERY_DETAILS;
3704             }
3705         }
3706     }
3707 }
3708 
ManualPostCallRecordGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice,VkSurfaceKHR surface,uint32_t * pSurfaceFormatCount,VkSurfaceFormatKHR * pSurfaceFormats,VkResult result)3709 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
3710                                                                            uint32_t* pSurfaceFormatCount,
3711                                                                            VkSurfaceFormatKHR* pSurfaceFormats, VkResult result) {
3712     auto bp_pd_data = GetPhysicalDeviceState(physicalDevice);
3713     if (bp_pd_data) {
3714         auto& call_state = bp_pd_data->vkGetPhysicalDeviceSurfaceFormatsKHRState;
3715 
3716         if (*pSurfaceFormatCount) {
3717             if (call_state < QUERY_COUNT) {
3718                 call_state = QUERY_COUNT;
3719             }
3720             bp_pd_data->surface_formats_count = *pSurfaceFormatCount;
3721         }
3722         if (pSurfaceFormats) {
3723             if (call_state < QUERY_DETAILS) {
3724                 call_state = QUERY_DETAILS;
3725             }
3726         }
3727     }
3728 }
3729 
ManualPostCallRecordGetPhysicalDeviceSurfaceFormats2KHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSurfaceInfo2KHR * pSurfaceInfo,uint32_t * pSurfaceFormatCount,VkSurfaceFormat2KHR * pSurfaceFormats,VkResult result)3730 void BestPractices::ManualPostCallRecordGetPhysicalDeviceSurfaceFormats2KHR(VkPhysicalDevice physicalDevice,
3731                                                                             const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
3732                                                                             uint32_t* pSurfaceFormatCount,
3733                                                                             VkSurfaceFormat2KHR* pSurfaceFormats, VkResult result) {
3734     auto bp_pd_data = GetPhysicalDeviceState(physicalDevice);
3735     if (bp_pd_data) {
3736         if (*pSurfaceFormatCount) {
3737             if (bp_pd_data->vkGetPhysicalDeviceSurfaceFormatsKHRState < QUERY_COUNT) {
3738                 bp_pd_data->vkGetPhysicalDeviceSurfaceFormatsKHRState = QUERY_COUNT;
3739             }
3740             bp_pd_data->surface_formats_count = *pSurfaceFormatCount;
3741         }
3742         if (pSurfaceFormats) {
3743             if (bp_pd_data->vkGetPhysicalDeviceSurfaceFormatsKHRState < QUERY_DETAILS) {
3744                 bp_pd_data->vkGetPhysicalDeviceSurfaceFormatsKHRState = QUERY_DETAILS;
3745             }
3746         }
3747     }
3748 }
3749 
ManualPostCallRecordGetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkDisplayPlanePropertiesKHR * pProperties,VkResult result)3750 void BestPractices::ManualPostCallRecordGetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physicalDevice,
3751                                                                                    uint32_t* pPropertyCount,
3752                                                                                    VkDisplayPlanePropertiesKHR* pProperties,
3753                                                                                    VkResult result) {
3754     auto bp_pd_data = GetPhysicalDeviceState(physicalDevice);
3755     if (bp_pd_data) {
3756         if (*pPropertyCount) {
3757             if (bp_pd_data->vkGetPhysicalDeviceDisplayPlanePropertiesKHRState < QUERY_COUNT) {
3758                 bp_pd_data->vkGetPhysicalDeviceDisplayPlanePropertiesKHRState = QUERY_COUNT;
3759             }
3760         }
3761         if (pProperties) {
3762             if (bp_pd_data->vkGetPhysicalDeviceDisplayPlanePropertiesKHRState < QUERY_DETAILS) {
3763                 bp_pd_data->vkGetPhysicalDeviceDisplayPlanePropertiesKHRState = QUERY_DETAILS;
3764             }
3765         }
3766     }
3767 }
3768 
ManualPostCallRecordGetSwapchainImagesKHR(VkDevice device,VkSwapchainKHR swapchain,uint32_t * pSwapchainImageCount,VkImage * pSwapchainImages,VkResult result)3769 void BestPractices::ManualPostCallRecordGetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain,
3770                                                               uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages,
3771                                                               VkResult result) {
3772     auto swapchain_state = std::static_pointer_cast<SWAPCHAIN_STATE_BP>(Get<SWAPCHAIN_NODE>(swapchain));
3773     if (swapchain_state && (pSwapchainImages || *pSwapchainImageCount)) {
3774         if (swapchain_state->vkGetSwapchainImagesKHRState < QUERY_DETAILS) {
3775             swapchain_state->vkGetSwapchainImagesKHRState = QUERY_DETAILS;
3776         }
3777     }
3778 }
3779 
ManualPostCallRecordCreateDevice(VkPhysicalDevice gpu,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice,VkResult result)3780 void BestPractices::ManualPostCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo,
3781                                                      const VkAllocationCallbacks* pAllocator, VkDevice* pDevice, VkResult result) {
3782     if (VK_SUCCESS == result) {
3783         if ((pCreateInfo->pEnabledFeatures != nullptr) && (pCreateInfo->pEnabledFeatures->robustBufferAccess == VK_TRUE)) {
3784             robust_buffer_access = true;
3785         }
3786     }
3787 }
3788 
PreCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)3789 void BestPractices::PreCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) {
3790     ValidationStateTracker::PreCallRecordQueueSubmit(queue, submitCount, pSubmits, fence);
3791 
3792     auto queue_state = Get<QUEUE_STATE>(queue);
3793     for (uint32_t submit = 0; submit < submitCount; submit++) {
3794         const auto& submit_info = pSubmits[submit];
3795         for (uint32_t cb_index = 0; cb_index < submit_info.commandBufferCount; cb_index++) {
3796             auto cb = GetCBState(submit_info.pCommandBuffers[cb_index]);
3797             for (auto &func : cb->queue_submit_functions) {
3798                 func(*this, *queue_state, *cb);
3799             }
3800         }
3801     }
3802 }
3803