1 /* Copyright (c) 2018-2021 The Khronos Group Inc.
2  * Copyright (c) 2018-2021 Valve Corporation
3  * Copyright (c) 2018-2021 LunarG, Inc.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  * Author: Karl Schultz <karl@lunarg.com>
18  * Author: Tony Barbour <tony@lunarg.com>
19  */
20 
21 #include <climits>
22 #include <cmath>
23 #include "gpu_validation.h"
24 #include "spirv-tools/optimizer.hpp"
25 #include "spirv-tools/instrument.hpp"
26 #include "layer_chassis_dispatch.h"
27 #include "gpu_vuids.h"
28 #include "gpu_pre_draw_constants.h"
29 #include "sync_utils.h"
30 #include "buffer_state.h"
31 #include "cmd_buffer_state.h"
32 #include "render_pass_state.h"
33 
34 static const VkShaderStageFlags kShaderStageAllRayTracing =
35     VK_SHADER_STAGE_ANY_HIT_BIT_NV | VK_SHADER_STAGE_CALLABLE_BIT_NV | VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV |
36     VK_SHADER_STAGE_INTERSECTION_BIT_NV | VK_SHADER_STAGE_MISS_BIT_NV | VK_SHADER_STAGE_RAYGEN_BIT_NV;
37 
38 // Keep in sync with the GLSL shader below.
39 struct GpuAccelerationStructureBuildValidationBuffer {
40     uint32_t instances_to_validate;
41     uint32_t replacement_handle_bits_0;
42     uint32_t replacement_handle_bits_1;
43     uint32_t invalid_handle_found;
44     uint32_t invalid_handle_bits_0;
45     uint32_t invalid_handle_bits_1;
46     uint32_t valid_handles_count;
47 };
48 
49 // This is the GLSL source for the compute shader that is used during ray tracing acceleration structure
50 // building validation which inspects instance buffers for top level acceleration structure builds and
51 // reports and replaces invalid bottom level acceleration structure handles with good bottom level
52 // acceleration structure handle so that applications can continue without undefined behavior long enough
53 // to report errors.
54 //
55 // #version 450
56 // layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
57 // struct VkGeometryInstanceNV {
58 //     uint unused[14];
59 //     uint handle_bits_0;
60 //     uint handle_bits_1;
61 // };
62 // layout(set=0, binding=0, std430) buffer InstanceBuffer {
63 //     VkGeometryInstanceNV instances[];
64 // };
65 // layout(set=0, binding=1, std430) buffer ValidationBuffer {
66 //     uint instances_to_validate;
67 //     uint replacement_handle_bits_0;
68 //     uint replacement_handle_bits_1;
69 //     uint invalid_handle_found;
70 //     uint invalid_handle_bits_0;
71 //     uint invalid_handle_bits_1;
72 //     uint valid_handles_count;
73 //     uint valid_handles[];
74 // };
75 // void main() {
76 //     for (uint instance_index = 0; instance_index < instances_to_validate; instance_index++) {
77 //         uint instance_handle_bits_0 = instances[instance_index].handle_bits_0;
78 //         uint instance_handle_bits_1 = instances[instance_index].handle_bits_1;
79 //         bool valid = false;
80 //         for (uint valid_handle_index = 0; valid_handle_index < valid_handles_count; valid_handle_index++) {
81 //             if (instance_handle_bits_0 == valid_handles[2*valid_handle_index+0] &&
82 //                 instance_handle_bits_1 == valid_handles[2*valid_handle_index+1]) {
83 //                 valid = true;
84 //                 break;
85 //             }
86 //         }
87 //         if (!valid) {
88 //             invalid_handle_found += 1;
89 //             invalid_handle_bits_0 = instance_handle_bits_0;
90 //             invalid_handle_bits_1 = instance_handle_bits_1;
91 //             instances[instance_index].handle_bits_0 = replacement_handle_bits_0;
92 //             instances[instance_index].handle_bits_1 = replacement_handle_bits_1;
93 //         }
94 //     }
95 // }
96 //
97 // To regenerate the spirv below:
98 //   1. Save the above GLSL source to a file called validation_shader.comp.
99 //   2. Run in terminal
100 //
101 //      glslangValidator.exe -x -V validation_shader.comp -o validation_shader.comp.spv
102 //
103 //   4. Copy-paste the contents of validation_shader.comp.spv here (clang-format will fix up the alignment).
104 static const uint32_t kComputeShaderSpirv[] = {
105     0x07230203, 0x00010000, 0x00080007, 0x0000006d, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001, 0x4c534c47,
106     0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0005000f, 0x00000005, 0x00000004, 0x6e69616d,
107     0x00000000, 0x00060010, 0x00000004, 0x00000011, 0x00000001, 0x00000001, 0x00000001, 0x00030003, 0x00000002, 0x000001c2,
108     0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00060005, 0x00000008, 0x74736e69, 0x65636e61, 0x646e695f, 0x00007865,
109     0x00070005, 0x00000011, 0x696c6156, 0x69746164, 0x75426e6f, 0x72656666, 0x00000000, 0x00090006, 0x00000011, 0x00000000,
110     0x74736e69, 0x65636e61, 0x6f745f73, 0x6c61765f, 0x74616469, 0x00000065, 0x000a0006, 0x00000011, 0x00000001, 0x6c706572,
111     0x6d656361, 0x5f746e65, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x000a0006, 0x00000011, 0x00000002, 0x6c706572,
112     0x6d656361, 0x5f746e65, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000031, 0x00090006, 0x00000011, 0x00000003, 0x61766e69,
113     0x5f64696c, 0x646e6168, 0x665f656c, 0x646e756f, 0x00000000, 0x00090006, 0x00000011, 0x00000004, 0x61766e69, 0x5f64696c,
114     0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x00090006, 0x00000011, 0x00000005, 0x61766e69, 0x5f64696c, 0x646e6168,
115     0x625f656c, 0x5f737469, 0x00000031, 0x00080006, 0x00000011, 0x00000006, 0x696c6176, 0x61685f64, 0x656c646e, 0x6f635f73,
116     0x00746e75, 0x00070006, 0x00000011, 0x00000007, 0x696c6176, 0x61685f64, 0x656c646e, 0x00000073, 0x00030005, 0x00000013,
117     0x00000000, 0x00080005, 0x0000001b, 0x74736e69, 0x65636e61, 0x6e61685f, 0x5f656c64, 0x73746962, 0x0000305f, 0x00080005,
118     0x0000001e, 0x65476b56, 0x74656d6f, 0x6e497972, 0x6e617473, 0x564e6563, 0x00000000, 0x00050006, 0x0000001e, 0x00000000,
119     0x73756e75, 0x00006465, 0x00070006, 0x0000001e, 0x00000001, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x00070006,
120     0x0000001e, 0x00000002, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000031, 0x00060005, 0x00000020, 0x74736e49, 0x65636e61,
121     0x66667542, 0x00007265, 0x00060006, 0x00000020, 0x00000000, 0x74736e69, 0x65636e61, 0x00000073, 0x00030005, 0x00000022,
122     0x00000000, 0x00080005, 0x00000027, 0x74736e69, 0x65636e61, 0x6e61685f, 0x5f656c64, 0x73746962, 0x0000315f, 0x00040005,
123     0x0000002d, 0x696c6176, 0x00000064, 0x00070005, 0x0000002f, 0x696c6176, 0x61685f64, 0x656c646e, 0x646e695f, 0x00007865,
124     0x00040047, 0x00000010, 0x00000006, 0x00000004, 0x00050048, 0x00000011, 0x00000000, 0x00000023, 0x00000000, 0x00050048,
125     0x00000011, 0x00000001, 0x00000023, 0x00000004, 0x00050048, 0x00000011, 0x00000002, 0x00000023, 0x00000008, 0x00050048,
126     0x00000011, 0x00000003, 0x00000023, 0x0000000c, 0x00050048, 0x00000011, 0x00000004, 0x00000023, 0x00000010, 0x00050048,
127     0x00000011, 0x00000005, 0x00000023, 0x00000014, 0x00050048, 0x00000011, 0x00000006, 0x00000023, 0x00000018, 0x00050048,
128     0x00000011, 0x00000007, 0x00000023, 0x0000001c, 0x00030047, 0x00000011, 0x00000003, 0x00040047, 0x00000013, 0x00000022,
129     0x00000000, 0x00040047, 0x00000013, 0x00000021, 0x00000001, 0x00040047, 0x0000001d, 0x00000006, 0x00000004, 0x00050048,
130     0x0000001e, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000001e, 0x00000001, 0x00000023, 0x00000038, 0x00050048,
131     0x0000001e, 0x00000002, 0x00000023, 0x0000003c, 0x00040047, 0x0000001f, 0x00000006, 0x00000040, 0x00050048, 0x00000020,
132     0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x00000020, 0x00000003, 0x00040047, 0x00000022, 0x00000022, 0x00000000,
133     0x00040047, 0x00000022, 0x00000021, 0x00000000, 0x00020013, 0x00000002, 0x00030021, 0x00000003, 0x00000002, 0x00040015,
134     0x00000006, 0x00000020, 0x00000000, 0x00040020, 0x00000007, 0x00000007, 0x00000006, 0x0004002b, 0x00000006, 0x00000009,
135     0x00000000, 0x0003001d, 0x00000010, 0x00000006, 0x000a001e, 0x00000011, 0x00000006, 0x00000006, 0x00000006, 0x00000006,
136     0x00000006, 0x00000006, 0x00000006, 0x00000010, 0x00040020, 0x00000012, 0x00000002, 0x00000011, 0x0004003b, 0x00000012,
137     0x00000013, 0x00000002, 0x00040015, 0x00000014, 0x00000020, 0x00000001, 0x0004002b, 0x00000014, 0x00000015, 0x00000000,
138     0x00040020, 0x00000016, 0x00000002, 0x00000006, 0x00020014, 0x00000019, 0x0004002b, 0x00000006, 0x0000001c, 0x0000000e,
139     0x0004001c, 0x0000001d, 0x00000006, 0x0000001c, 0x0005001e, 0x0000001e, 0x0000001d, 0x00000006, 0x00000006, 0x0003001d,
140     0x0000001f, 0x0000001e, 0x0003001e, 0x00000020, 0x0000001f, 0x00040020, 0x00000021, 0x00000002, 0x00000020, 0x0004003b,
141     0x00000021, 0x00000022, 0x00000002, 0x0004002b, 0x00000014, 0x00000024, 0x00000001, 0x0004002b, 0x00000014, 0x00000029,
142     0x00000002, 0x00040020, 0x0000002c, 0x00000007, 0x00000019, 0x0003002a, 0x00000019, 0x0000002e, 0x0004002b, 0x00000014,
143     0x00000036, 0x00000006, 0x0004002b, 0x00000014, 0x0000003b, 0x00000007, 0x0004002b, 0x00000006, 0x0000003c, 0x00000002,
144     0x0004002b, 0x00000006, 0x00000048, 0x00000001, 0x00030029, 0x00000019, 0x00000050, 0x0004002b, 0x00000014, 0x00000058,
145     0x00000003, 0x0004002b, 0x00000014, 0x0000005d, 0x00000004, 0x0004002b, 0x00000014, 0x00000060, 0x00000005, 0x00050036,
146     0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005, 0x0004003b, 0x00000007, 0x00000008, 0x00000007,
147     0x0004003b, 0x00000007, 0x0000001b, 0x00000007, 0x0004003b, 0x00000007, 0x00000027, 0x00000007, 0x0004003b, 0x0000002c,
148     0x0000002d, 0x00000007, 0x0004003b, 0x00000007, 0x0000002f, 0x00000007, 0x0003003e, 0x00000008, 0x00000009, 0x000200f9,
149     0x0000000a, 0x000200f8, 0x0000000a, 0x000400f6, 0x0000000c, 0x0000000d, 0x00000000, 0x000200f9, 0x0000000e, 0x000200f8,
150     0x0000000e, 0x0004003d, 0x00000006, 0x0000000f, 0x00000008, 0x00050041, 0x00000016, 0x00000017, 0x00000013, 0x00000015,
151     0x0004003d, 0x00000006, 0x00000018, 0x00000017, 0x000500b0, 0x00000019, 0x0000001a, 0x0000000f, 0x00000018, 0x000400fa,
152     0x0000001a, 0x0000000b, 0x0000000c, 0x000200f8, 0x0000000b, 0x0004003d, 0x00000006, 0x00000023, 0x00000008, 0x00070041,
153     0x00000016, 0x00000025, 0x00000022, 0x00000015, 0x00000023, 0x00000024, 0x0004003d, 0x00000006, 0x00000026, 0x00000025,
154     0x0003003e, 0x0000001b, 0x00000026, 0x0004003d, 0x00000006, 0x00000028, 0x00000008, 0x00070041, 0x00000016, 0x0000002a,
155     0x00000022, 0x00000015, 0x00000028, 0x00000029, 0x0004003d, 0x00000006, 0x0000002b, 0x0000002a, 0x0003003e, 0x00000027,
156     0x0000002b, 0x0003003e, 0x0000002d, 0x0000002e, 0x0003003e, 0x0000002f, 0x00000009, 0x000200f9, 0x00000030, 0x000200f8,
157     0x00000030, 0x000400f6, 0x00000032, 0x00000033, 0x00000000, 0x000200f9, 0x00000034, 0x000200f8, 0x00000034, 0x0004003d,
158     0x00000006, 0x00000035, 0x0000002f, 0x00050041, 0x00000016, 0x00000037, 0x00000013, 0x00000036, 0x0004003d, 0x00000006,
159     0x00000038, 0x00000037, 0x000500b0, 0x00000019, 0x00000039, 0x00000035, 0x00000038, 0x000400fa, 0x00000039, 0x00000031,
160     0x00000032, 0x000200f8, 0x00000031, 0x0004003d, 0x00000006, 0x0000003a, 0x0000001b, 0x0004003d, 0x00000006, 0x0000003d,
161     0x0000002f, 0x00050084, 0x00000006, 0x0000003e, 0x0000003c, 0x0000003d, 0x00050080, 0x00000006, 0x0000003f, 0x0000003e,
162     0x00000009, 0x00060041, 0x00000016, 0x00000040, 0x00000013, 0x0000003b, 0x0000003f, 0x0004003d, 0x00000006, 0x00000041,
163     0x00000040, 0x000500aa, 0x00000019, 0x00000042, 0x0000003a, 0x00000041, 0x000300f7, 0x00000044, 0x00000000, 0x000400fa,
164     0x00000042, 0x00000043, 0x00000044, 0x000200f8, 0x00000043, 0x0004003d, 0x00000006, 0x00000045, 0x00000027, 0x0004003d,
165     0x00000006, 0x00000046, 0x0000002f, 0x00050084, 0x00000006, 0x00000047, 0x0000003c, 0x00000046, 0x00050080, 0x00000006,
166     0x00000049, 0x00000047, 0x00000048, 0x00060041, 0x00000016, 0x0000004a, 0x00000013, 0x0000003b, 0x00000049, 0x0004003d,
167     0x00000006, 0x0000004b, 0x0000004a, 0x000500aa, 0x00000019, 0x0000004c, 0x00000045, 0x0000004b, 0x000200f9, 0x00000044,
168     0x000200f8, 0x00000044, 0x000700f5, 0x00000019, 0x0000004d, 0x00000042, 0x00000031, 0x0000004c, 0x00000043, 0x000300f7,
169     0x0000004f, 0x00000000, 0x000400fa, 0x0000004d, 0x0000004e, 0x0000004f, 0x000200f8, 0x0000004e, 0x0003003e, 0x0000002d,
170     0x00000050, 0x000200f9, 0x00000032, 0x000200f8, 0x0000004f, 0x000200f9, 0x00000033, 0x000200f8, 0x00000033, 0x0004003d,
171     0x00000006, 0x00000052, 0x0000002f, 0x00050080, 0x00000006, 0x00000053, 0x00000052, 0x00000024, 0x0003003e, 0x0000002f,
172     0x00000053, 0x000200f9, 0x00000030, 0x000200f8, 0x00000032, 0x0004003d, 0x00000019, 0x00000054, 0x0000002d, 0x000400a8,
173     0x00000019, 0x00000055, 0x00000054, 0x000300f7, 0x00000057, 0x00000000, 0x000400fa, 0x00000055, 0x00000056, 0x00000057,
174     0x000200f8, 0x00000056, 0x00050041, 0x00000016, 0x00000059, 0x00000013, 0x00000058, 0x0004003d, 0x00000006, 0x0000005a,
175     0x00000059, 0x00050080, 0x00000006, 0x0000005b, 0x0000005a, 0x00000048, 0x00050041, 0x00000016, 0x0000005c, 0x00000013,
176     0x00000058, 0x0003003e, 0x0000005c, 0x0000005b, 0x0004003d, 0x00000006, 0x0000005e, 0x0000001b, 0x00050041, 0x00000016,
177     0x0000005f, 0x00000013, 0x0000005d, 0x0003003e, 0x0000005f, 0x0000005e, 0x0004003d, 0x00000006, 0x00000061, 0x00000027,
178     0x00050041, 0x00000016, 0x00000062, 0x00000013, 0x00000060, 0x0003003e, 0x00000062, 0x00000061, 0x0004003d, 0x00000006,
179     0x00000063, 0x00000008, 0x00050041, 0x00000016, 0x00000064, 0x00000013, 0x00000024, 0x0004003d, 0x00000006, 0x00000065,
180     0x00000064, 0x00070041, 0x00000016, 0x00000066, 0x00000022, 0x00000015, 0x00000063, 0x00000024, 0x0003003e, 0x00000066,
181     0x00000065, 0x0004003d, 0x00000006, 0x00000067, 0x00000008, 0x00050041, 0x00000016, 0x00000068, 0x00000013, 0x00000029,
182     0x0004003d, 0x00000006, 0x00000069, 0x00000068, 0x00070041, 0x00000016, 0x0000006a, 0x00000022, 0x00000015, 0x00000067,
183     0x00000029, 0x0003003e, 0x0000006a, 0x00000069, 0x000200f9, 0x00000057, 0x000200f8, 0x00000057, 0x000200f9, 0x0000000d,
184     0x000200f8, 0x0000000d, 0x0004003d, 0x00000006, 0x0000006b, 0x00000008, 0x00050080, 0x00000006, 0x0000006c, 0x0000006b,
185     0x00000024, 0x0003003e, 0x00000008, 0x0000006c, 0x000200f9, 0x0000000a, 0x000200f8, 0x0000000c, 0x000100fd, 0x00010038};
186 
187 // Convenience function for reporting problems with setting up GPU Validation.
188 template <typename T>
ReportSetupProblem(T object,const char * const specific_message) const189 void GpuAssisted::ReportSetupProblem(T object, const char *const specific_message) const {
190     LogError(object, "UNASSIGNED-GPU-Assisted Validation Error. ", "Detail: (%s)", specific_message);
191 }
192 
CheckForDescriptorIndexing(DeviceFeatures enabled_features) const193 bool GpuAssisted::CheckForDescriptorIndexing(DeviceFeatures enabled_features) const {
194     bool result =
195         (IsExtEnabled(device_extensions.vk_ext_descriptor_indexing) &&
196          (enabled_features.core12.descriptorIndexing || enabled_features.core12.shaderInputAttachmentArrayDynamicIndexing ||
197           enabled_features.core12.shaderUniformTexelBufferArrayDynamicIndexing ||
198           enabled_features.core12.shaderStorageTexelBufferArrayDynamicIndexing ||
199           enabled_features.core12.shaderUniformBufferArrayNonUniformIndexing ||
200           enabled_features.core12.shaderSampledImageArrayNonUniformIndexing ||
201           enabled_features.core12.shaderStorageBufferArrayNonUniformIndexing ||
202           enabled_features.core12.shaderStorageImageArrayNonUniformIndexing ||
203           enabled_features.core12.shaderInputAttachmentArrayNonUniformIndexing ||
204           enabled_features.core12.shaderUniformTexelBufferArrayNonUniformIndexing ||
205           enabled_features.core12.shaderStorageTexelBufferArrayNonUniformIndexing ||
206           enabled_features.core12.descriptorBindingUniformBufferUpdateAfterBind ||
207           enabled_features.core12.descriptorBindingSampledImageUpdateAfterBind ||
208           enabled_features.core12.descriptorBindingStorageImageUpdateAfterBind ||
209           enabled_features.core12.descriptorBindingStorageBufferUpdateAfterBind ||
210           enabled_features.core12.descriptorBindingUniformTexelBufferUpdateAfterBind ||
211           enabled_features.core12.descriptorBindingStorageTexelBufferUpdateAfterBind ||
212           enabled_features.core12.descriptorBindingUpdateUnusedWhilePending ||
213           enabled_features.core12.descriptorBindingPartiallyBound ||
214           enabled_features.core12.descriptorBindingVariableDescriptorCount || enabled_features.core12.runtimeDescriptorArray));
215     return result;
216 }
217 
PreCallRecordCreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer,void * cb_state_data)218 void GpuAssisted::PreCallRecordCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
219                                             const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, void *cb_state_data) {
220     // Ray tracing acceleration structure instance buffers also need the storage buffer usage as
221     // acceleration structure build validation will find and replace invalid acceleration structure
222     // handles inside of a compute shader.
223     create_buffer_api_state *cb_state = reinterpret_cast<create_buffer_api_state *>(cb_state_data);
224     if (cb_state && cb_state->modified_create_info.usage & VK_BUFFER_USAGE_RAY_TRACING_BIT_NV) {
225         cb_state->modified_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
226     }
227 
228     // Validating DrawIndirectCount countBuffer will require validation shader to bind the count buffer as a storage buffer
229     if (validate_draw_indirect && cb_state && cb_state->modified_create_info.usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) {
230         cb_state->modified_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
231     }
232     ValidationStateTracker::PreCallRecordCreateBuffer(device, pCreateInfo, pAllocator, pBuffer, cb_state_data);
233 }
234 
PostCallRecordCreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer,VkResult result)235 void GpuAssisted::PostCallRecordCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
236                                              const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, VkResult result) {
237     ValidationStateTracker::PostCallRecordCreateBuffer(device, pCreateInfo, pAllocator, pBuffer, result);
238     if (pCreateInfo) {
239         const auto *opaque_capture_address = LvlFindInChain<VkBufferOpaqueCaptureAddressCreateInfo>(pCreateInfo->pNext);
240         if (opaque_capture_address) {
241             // Validate against the size requested when the buffer was created
242             buffer_map[opaque_capture_address->opaqueCaptureAddress] = pCreateInfo->size;
243         }
244     }
245 }
246 
247 // Turn on necessary device features.
PreCallRecordCreateDevice(VkPhysicalDevice gpu,const VkDeviceCreateInfo * create_info,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice,void * modified_create_info)248 void GpuAssisted::PreCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo *create_info,
249                                             const VkAllocationCallbacks *pAllocator, VkDevice *pDevice,
250                                             void *modified_create_info) {
251     DispatchGetPhysicalDeviceFeatures(gpu, &supported_features);
252     VkPhysicalDeviceFeatures features = {};
253     features.vertexPipelineStoresAndAtomics = true;
254     features.fragmentStoresAndAtomics = true;
255     features.shaderInt64 = true;
256     UtilPreCallRecordCreateDevice(gpu, reinterpret_cast<safe_VkDeviceCreateInfo *>(modified_create_info), supported_features,
257                                   features);
258     ValidationStateTracker::PreCallRecordCreateDevice(gpu, create_info, pAllocator, pDevice, modified_create_info);
259 }
260 // Perform initializations that can be done at Create Device time.
PostCallRecordCreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice,VkResult result)261 void GpuAssisted::PostCallRecordCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
262                                              const VkAllocationCallbacks *pAllocator, VkDevice *pDevice, VkResult result) {
263     // The state tracker sets up the device state
264     ValidationStateTracker::PostCallRecordCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice, result);
265 
266     ValidationObject *device_object = GetLayerDataPtr(get_dispatch_key(*pDevice), layer_data_map);
267     ValidationObject *validation_data = GetValidationObject(device_object->object_dispatch, this->container_type);
268     GpuAssisted *device_gpu_assisted = static_cast<GpuAssisted *>(validation_data);
269 
270     if (device_gpu_assisted->enabled_features.core.robustBufferAccess ||
271         device_gpu_assisted->enabled_features.robustness2_features.robustBufferAccess2) {
272         device_gpu_assisted->buffer_oob_enabled = false;
273     } else {
274         std::string bufferoob_string = getLayerOption("khronos_validation.gpuav_buffer_oob");
275         transform(bufferoob_string.begin(), bufferoob_string.end(), bufferoob_string.begin(), ::tolower);
276         device_gpu_assisted->buffer_oob_enabled = !bufferoob_string.empty() ? !bufferoob_string.compare("true") : true;
277     }
278     std::string descriptor_indexing_string = getLayerOption("khronos_validation.gpuav_descriptor_indexing");
279     transform(descriptor_indexing_string.begin(), descriptor_indexing_string.end(), descriptor_indexing_string.begin(), ::tolower);
280     bool validate_descriptor_indexing = !descriptor_indexing_string.empty() ? !descriptor_indexing_string.compare("true") : true;
281 
282     std::string draw_indirect_string = getLayerOption("khronos_validation.validate_draw_indirect");
283     transform(draw_indirect_string.begin(), draw_indirect_string.end(), draw_indirect_string.begin(), ::tolower);
284     device_gpu_assisted->validate_draw_indirect = !draw_indirect_string.empty() ? !draw_indirect_string.compare("true") : true;
285 
286     if (device_gpu_assisted->phys_dev_props.apiVersion < VK_API_VERSION_1_1) {
287         ReportSetupProblem(device, "GPU-Assisted validation requires Vulkan 1.1 or later.  GPU-Assisted Validation disabled.");
288         device_gpu_assisted->aborted = true;
289         return;
290     }
291 
292     if (!supported_features.fragmentStoresAndAtomics || !supported_features.vertexPipelineStoresAndAtomics) {
293         ReportSetupProblem(device,
294                            "GPU-Assisted validation requires fragmentStoresAndAtomics and vertexPipelineStoresAndAtomics.  "
295                            "GPU-Assisted Validation disabled.");
296         device_gpu_assisted->aborted = true;
297         return;
298     }
299 
300     if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
301          IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
302         !supported_features.shaderInt64) {
303         LogWarning(device, "UNASSIGNED-GPU-Assisted Validation Warning",
304                    "shaderInt64 feature is not available.  No buffer device address checking will be attempted");
305     }
306     device_gpu_assisted->shaderInt64 = supported_features.shaderInt64;
307     device_gpu_assisted->physicalDevice = physicalDevice;
308     device_gpu_assisted->device = *pDevice;
309     device_gpu_assisted->output_buffer_size = sizeof(uint32_t) * (spvtools::kInstMaxOutCnt + 1);
310     if (validate_descriptor_indexing) {
311         device_gpu_assisted->descriptor_indexing = CheckForDescriptorIndexing(device_gpu_assisted->enabled_features);
312     }
313     std::vector<VkDescriptorSetLayoutBinding> bindings;
314     VkDescriptorSetLayoutBinding binding = {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
315                                             VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT |
316                                                 VK_SHADER_STAGE_MESH_BIT_NV | VK_SHADER_STAGE_TASK_BIT_NV |
317                                                 kShaderStageAllRayTracing,
318                                             NULL};
319     bindings.push_back(binding);
320     for (auto i = 1; i < 3; i++) {
321         binding.binding = i;
322         bindings.push_back(binding);
323     }
324     UtilPostCallRecordCreateDevice(pCreateInfo, bindings, device_gpu_assisted, device_gpu_assisted->phys_dev_props);
325     CreateAccelerationStructureBuildValidationState(device_gpu_assisted);
326 }
327 
PostCallRecordGetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)328 void GpuAssisted::PostCallRecordGetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
329                                                        VkDeviceAddress address) {
330     auto buffer_state = Get<BUFFER_STATE>(pInfo->buffer);
331     // Validate against the size requested when the buffer was created
332     if (buffer_state) {
333         buffer_state->deviceAddress = address;
334         buffer_map[address] = buffer_state->createInfo.size;
335     }
336     ValidationStateTracker::PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
337 }
338 
PostCallRecordGetBufferDeviceAddressEXT(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)339 void GpuAssisted::PostCallRecordGetBufferDeviceAddressEXT(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
340                                                           VkDeviceAddress address) {
341     PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
342 }
343 
PostCallRecordGetBufferDeviceAddressKHR(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)344 void GpuAssisted::PostCallRecordGetBufferDeviceAddressKHR(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
345                                                           VkDeviceAddress address) {
346     PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
347 }
348 
PreCallRecordDestroyBuffer(VkDevice device,VkBuffer buffer,const VkAllocationCallbacks * pAllocator)349 void GpuAssisted::PreCallRecordDestroyBuffer(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator) {
350     auto buffer_state = Get<BUFFER_STATE>(buffer);
351     if (buffer_state) buffer_map.erase(buffer_state->deviceAddress);
352     ValidationStateTracker::PreCallRecordDestroyBuffer(device, buffer, pAllocator);
353 }
354 
355 // Clean up device-related resources
PreCallRecordDestroyDevice(VkDevice device,const VkAllocationCallbacks * pAllocator)356 void GpuAssisted::PreCallRecordDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) {
357     DestroyAccelerationStructureBuildValidationState();
358     UtilPreCallRecordDestroyDevice(this);
359     ValidationStateTracker::PreCallRecordDestroyDevice(device, pAllocator);
360     if (pre_draw_validation_state.globals_created) {
361         DispatchDestroyShaderModule(device, pre_draw_validation_state.validation_shader_module, nullptr);
362         DispatchDestroyDescriptorSetLayout(device, pre_draw_validation_state.validation_ds_layout, nullptr);
363         DispatchDestroyPipelineLayout(device, pre_draw_validation_state.validation_pipeline_layout, nullptr);
364         for (auto it = pre_draw_validation_state.renderpass_to_pipeline.begin();
365              it != pre_draw_validation_state.renderpass_to_pipeline.end(); ++it) {
366             DispatchDestroyPipeline(device, it->second, nullptr);
367         }
368         pre_draw_validation_state.renderpass_to_pipeline.clear();
369         pre_draw_validation_state.globals_created = false;
370     }
371     // State Tracker can end up making vma calls through callbacks - don't destroy allocator until ST is done
372     if (vmaAllocator) {
373         vmaDestroyAllocator(vmaAllocator);
374     }
375     desc_set_manager.reset();
376 }
377 
CreateAccelerationStructureBuildValidationState(GpuAssisted * device_gpuav)378 void GpuAssisted::CreateAccelerationStructureBuildValidationState(GpuAssisted *device_gpuav) {
379     if (device_gpuav->aborted) {
380         return;
381     }
382 
383     auto &as_validation_state = device_gpuav->acceleration_structure_validation_state;
384     if (as_validation_state.initialized) {
385         return;
386     }
387 
388     if (!IsExtEnabled(device_extensions.vk_nv_ray_tracing)) {
389         return;
390     }
391 
392     // Outline:
393     //   - Create valid bottom level acceleration structure which acts as replacement
394     //      - Create and load vertex buffer
395     //      - Create and load index buffer
396     //      - Create, allocate memory for, and bind memory for acceleration structure
397     //      - Query acceleration structure handle
398     //      - Create command pool and command buffer
399     //      - Record build acceleration structure command
400     //      - Submit command buffer and wait for completion
401     //      - Cleanup
402     //  - Create compute pipeline for validating instance buffers
403     //      - Create descriptor set layout
404     //      - Create pipeline layout
405     //      - Create pipeline
406     //      - Cleanup
407 
408     VkResult result = VK_SUCCESS;
409 
410     VkBuffer vbo = VK_NULL_HANDLE;
411     VmaAllocation vbo_allocation = VK_NULL_HANDLE;
412     if (result == VK_SUCCESS) {
413         auto vbo_ci = LvlInitStruct<VkBufferCreateInfo>();
414         vbo_ci.size = sizeof(float) * 9;
415         vbo_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
416 
417         VmaAllocationCreateInfo vbo_ai = {};
418         vbo_ai.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
419         vbo_ai.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
420 
421         result = vmaCreateBuffer(device_gpuav->vmaAllocator, &vbo_ci, &vbo_ai, &vbo, &vbo_allocation, nullptr);
422         if (result != VK_SUCCESS) {
423             ReportSetupProblem(device, "Failed to create vertex buffer for acceleration structure build validation.");
424         }
425     }
426 
427     if (result == VK_SUCCESS) {
428         uint8_t *mapped_vbo_buffer = nullptr;
429         result = vmaMapMemory(device_gpuav->vmaAllocator, vbo_allocation, reinterpret_cast<void **>(&mapped_vbo_buffer));
430         if (result != VK_SUCCESS) {
431             ReportSetupProblem(device, "Failed to map vertex buffer for acceleration structure build validation.");
432         } else {
433             const std::vector<float> vertices = {1.0f, 0.0f, 0.0f, 0.5f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f};
434             std::memcpy(mapped_vbo_buffer, (uint8_t *)vertices.data(), sizeof(float) * vertices.size());
435             vmaUnmapMemory(device_gpuav->vmaAllocator, vbo_allocation);
436         }
437     }
438 
439     VkBuffer ibo = VK_NULL_HANDLE;
440     VmaAllocation ibo_allocation = VK_NULL_HANDLE;
441     if (result == VK_SUCCESS) {
442         auto ibo_ci = LvlInitStruct<VkBufferCreateInfo>();
443         ibo_ci.size = sizeof(uint32_t) * 3;
444         ibo_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
445 
446         VmaAllocationCreateInfo ibo_ai = {};
447         ibo_ai.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
448         ibo_ai.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
449 
450         result = vmaCreateBuffer(device_gpuav->vmaAllocator, &ibo_ci, &ibo_ai, &ibo, &ibo_allocation, nullptr);
451         if (result != VK_SUCCESS) {
452             ReportSetupProblem(device, "Failed to create index buffer for acceleration structure build validation.");
453         }
454     }
455 
456     if (result == VK_SUCCESS) {
457         uint8_t *mapped_ibo_buffer = nullptr;
458         result = vmaMapMemory(device_gpuav->vmaAllocator, ibo_allocation, reinterpret_cast<void **>(&mapped_ibo_buffer));
459         if (result != VK_SUCCESS) {
460             ReportSetupProblem(device, "Failed to map index buffer for acceleration structure build validation.");
461         } else {
462             const std::vector<uint32_t> indicies = {0, 1, 2};
463             std::memcpy(mapped_ibo_buffer, (uint8_t *)indicies.data(), sizeof(uint32_t) * indicies.size());
464             vmaUnmapMemory(device_gpuav->vmaAllocator, ibo_allocation);
465         }
466     }
467 
468     auto geometry = LvlInitStruct<VkGeometryNV>();
469     geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV;
470     geometry.geometry.triangles = LvlInitStruct<VkGeometryTrianglesNV>();
471     geometry.geometry.triangles.vertexData = vbo;
472     geometry.geometry.triangles.vertexOffset = 0;
473     geometry.geometry.triangles.vertexCount = 3;
474     geometry.geometry.triangles.vertexStride = 12;
475     geometry.geometry.triangles.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
476     geometry.geometry.triangles.indexData = ibo;
477     geometry.geometry.triangles.indexOffset = 0;
478     geometry.geometry.triangles.indexCount = 3;
479     geometry.geometry.triangles.indexType = VK_INDEX_TYPE_UINT32;
480     geometry.geometry.triangles.transformData = VK_NULL_HANDLE;
481     geometry.geometry.triangles.transformOffset = 0;
482     geometry.geometry.aabbs = LvlInitStruct<VkGeometryAABBNV>();
483 
484     auto as_ci = LvlInitStruct<VkAccelerationStructureCreateInfoNV>();
485     as_ci.info = LvlInitStruct<VkAccelerationStructureInfoNV>();
486     as_ci.info.instanceCount = 0;
487     as_ci.info.geometryCount = 1;
488     as_ci.info.pGeometries = &geometry;
489     if (result == VK_SUCCESS) {
490         result = DispatchCreateAccelerationStructureNV(device_gpuav->device, &as_ci, nullptr, &as_validation_state.replacement_as);
491         if (result != VK_SUCCESS) {
492             ReportSetupProblem(device_gpuav->device,
493                                "Failed to create acceleration structure for acceleration structure build validation.");
494         }
495     }
496 
497     VkMemoryRequirements2 as_mem_requirements = {};
498     if (result == VK_SUCCESS) {
499         auto as_mem_requirements_info = LvlInitStruct<VkAccelerationStructureMemoryRequirementsInfoNV>();
500         as_mem_requirements_info.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV;
501         as_mem_requirements_info.accelerationStructure = as_validation_state.replacement_as;
502 
503         DispatchGetAccelerationStructureMemoryRequirementsNV(device_gpuav->device, &as_mem_requirements_info, &as_mem_requirements);
504     }
505 
506     VmaAllocationInfo as_memory_ai = {};
507     if (result == VK_SUCCESS) {
508         VmaAllocationCreateInfo as_memory_aci = {};
509         as_memory_aci.usage = VMA_MEMORY_USAGE_GPU_ONLY;
510 
511         result = vmaAllocateMemory(device_gpuav->vmaAllocator, &as_mem_requirements.memoryRequirements, &as_memory_aci,
512                                    &as_validation_state.replacement_as_allocation, &as_memory_ai);
513         if (result != VK_SUCCESS) {
514             ReportSetupProblem(device_gpuav->device,
515                                "Failed to alloc acceleration structure memory for acceleration structure build validation.");
516         }
517     }
518 
519     if (result == VK_SUCCESS) {
520         auto as_bind_info = LvlInitStruct<VkBindAccelerationStructureMemoryInfoNV>();
521         as_bind_info.accelerationStructure = as_validation_state.replacement_as;
522         as_bind_info.memory = as_memory_ai.deviceMemory;
523         as_bind_info.memoryOffset = as_memory_ai.offset;
524 
525         result = DispatchBindAccelerationStructureMemoryNV(device_gpuav->device, 1, &as_bind_info);
526         if (result != VK_SUCCESS) {
527             ReportSetupProblem(device_gpuav->device,
528                                "Failed to bind acceleration structure memory for acceleration structure build validation.");
529         }
530     }
531 
532     if (result == VK_SUCCESS) {
533         result = DispatchGetAccelerationStructureHandleNV(device_gpuav->device, as_validation_state.replacement_as,
534                                                           sizeof(uint64_t), &as_validation_state.replacement_as_handle);
535         if (result != VK_SUCCESS) {
536             ReportSetupProblem(device_gpuav->device,
537                                "Failed to get acceleration structure handle for acceleration structure build validation.");
538         }
539     }
540 
541     VkMemoryRequirements2 scratch_mem_requirements = {};
542     if (result == VK_SUCCESS) {
543         auto scratch_mem_requirements_info = LvlInitStruct<VkAccelerationStructureMemoryRequirementsInfoNV>();
544         scratch_mem_requirements_info.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
545         scratch_mem_requirements_info.accelerationStructure = as_validation_state.replacement_as;
546 
547         DispatchGetAccelerationStructureMemoryRequirementsNV(device_gpuav->device, &scratch_mem_requirements_info,
548                                                              &scratch_mem_requirements);
549     }
550 
551     VkBuffer scratch = VK_NULL_HANDLE;
552     VmaAllocation scratch_allocation = {};
553     if (result == VK_SUCCESS) {
554         auto scratch_ci = LvlInitStruct<VkBufferCreateInfo>();
555         scratch_ci.size = scratch_mem_requirements.memoryRequirements.size;
556         scratch_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
557         VmaAllocationCreateInfo scratch_aci = {};
558         scratch_aci.usage = VMA_MEMORY_USAGE_GPU_ONLY;
559 
560         result = vmaCreateBuffer(device_gpuav->vmaAllocator, &scratch_ci, &scratch_aci, &scratch, &scratch_allocation, nullptr);
561         if (result != VK_SUCCESS) {
562             ReportSetupProblem(device_gpuav->device,
563                                "Failed to create scratch buffer for acceleration structure build validation.");
564         }
565     }
566 
567     VkCommandPool command_pool = VK_NULL_HANDLE;
568     if (result == VK_SUCCESS) {
569         auto command_pool_ci = LvlInitStruct<VkCommandPoolCreateInfo>();
570         command_pool_ci.queueFamilyIndex = 0;
571 
572         result = DispatchCreateCommandPool(device_gpuav->device, &command_pool_ci, nullptr, &command_pool);
573         if (result != VK_SUCCESS) {
574             ReportSetupProblem(device_gpuav->device, "Failed to create command pool for acceleration structure build validation.");
575         }
576     }
577 
578     VkCommandBuffer command_buffer = VK_NULL_HANDLE;
579 
580     if (result == VK_SUCCESS) {
581         auto command_buffer_ai = LvlInitStruct<VkCommandBufferAllocateInfo>();
582         command_buffer_ai.commandPool = command_pool;
583         command_buffer_ai.commandBufferCount = 1;
584         command_buffer_ai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
585 
586         result = DispatchAllocateCommandBuffers(device_gpuav->device, &command_buffer_ai, &command_buffer);
587         if (result != VK_SUCCESS) {
588             ReportSetupProblem(device_gpuav->device,
589                                "Failed to create command buffer for acceleration structure build validation.");
590         }
591 
592         // Hook up command buffer dispatch
593         device_gpuav->vkSetDeviceLoaderData(device_gpuav->device, command_buffer);
594     }
595 
596     if (result == VK_SUCCESS) {
597         auto command_buffer_bi = LvlInitStruct<VkCommandBufferBeginInfo>();
598 
599         result = DispatchBeginCommandBuffer(command_buffer, &command_buffer_bi);
600         if (result != VK_SUCCESS) {
601             ReportSetupProblem(device_gpuav->device, "Failed to begin command buffer for acceleration structure build validation.");
602         }
603     }
604 
605     if (result == VK_SUCCESS) {
606         DispatchCmdBuildAccelerationStructureNV(command_buffer, &as_ci.info, VK_NULL_HANDLE, 0, VK_FALSE,
607                                                 as_validation_state.replacement_as, VK_NULL_HANDLE, scratch, 0);
608         DispatchEndCommandBuffer(command_buffer);
609     }
610 
611     VkQueue queue = VK_NULL_HANDLE;
612     if (result == VK_SUCCESS) {
613         DispatchGetDeviceQueue(device_gpuav->device, 0, 0, &queue);
614 
615         // Hook up queue dispatch
616         device_gpuav->vkSetDeviceLoaderData(device_gpuav->device, queue);
617 
618         auto submit_info = LvlInitStruct<VkSubmitInfo>();
619         submit_info.commandBufferCount = 1;
620         submit_info.pCommandBuffers = &command_buffer;
621         result = DispatchQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
622         if (result != VK_SUCCESS) {
623             ReportSetupProblem(device_gpuav->device,
624                                "Failed to submit command buffer for acceleration structure build validation.");
625         }
626     }
627 
628     if (result == VK_SUCCESS) {
629         result = DispatchQueueWaitIdle(queue);
630         if (result != VK_SUCCESS) {
631             ReportSetupProblem(device_gpuav->device, "Failed to wait for queue idle for acceleration structure build validation.");
632         }
633     }
634 
635     if (vbo != VK_NULL_HANDLE) {
636         vmaDestroyBuffer(device_gpuav->vmaAllocator, vbo, vbo_allocation);
637     }
638     if (ibo != VK_NULL_HANDLE) {
639         vmaDestroyBuffer(device_gpuav->vmaAllocator, ibo, ibo_allocation);
640     }
641     if (scratch != VK_NULL_HANDLE) {
642         vmaDestroyBuffer(device_gpuav->vmaAllocator, scratch, scratch_allocation);
643     }
644     if (command_pool != VK_NULL_HANDLE) {
645         DispatchDestroyCommandPool(device_gpuav->device, command_pool, nullptr);
646     }
647 
648     if (device_gpuav->debug_desc_layout == VK_NULL_HANDLE) {
649         ReportSetupProblem(device_gpuav->device,
650                            "Failed to find descriptor set layout for acceleration structure build validation.");
651         result = VK_INCOMPLETE;
652     }
653 
654     if (result == VK_SUCCESS) {
655         auto pipeline_layout_ci = LvlInitStruct<VkPipelineLayoutCreateInfo>();
656         pipeline_layout_ci.setLayoutCount = 1;
657         pipeline_layout_ci.pSetLayouts = &device_gpuav->debug_desc_layout;
658         result = DispatchCreatePipelineLayout(device_gpuav->device, &pipeline_layout_ci, 0, &as_validation_state.pipeline_layout);
659         if (result != VK_SUCCESS) {
660             ReportSetupProblem(device_gpuav->device,
661                                "Failed to create pipeline layout for acceleration structure build validation.");
662         }
663     }
664 
665     VkShaderModule shader_module = VK_NULL_HANDLE;
666     if (result == VK_SUCCESS) {
667         auto shader_module_ci = LvlInitStruct<VkShaderModuleCreateInfo>();
668         shader_module_ci.codeSize = sizeof(kComputeShaderSpirv);
669         shader_module_ci.pCode = (uint32_t *)kComputeShaderSpirv;
670 
671         result = DispatchCreateShaderModule(device_gpuav->device, &shader_module_ci, nullptr, &shader_module);
672         if (result != VK_SUCCESS) {
673             ReportSetupProblem(device_gpuav->device,
674                                "Failed to create compute shader module for acceleration structure build validation.");
675         }
676     }
677 
678     if (result == VK_SUCCESS) {
679         auto pipeline_stage_ci = LvlInitStruct<VkPipelineShaderStageCreateInfo>();
680         pipeline_stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
681         pipeline_stage_ci.module = shader_module;
682         pipeline_stage_ci.pName = "main";
683 
684         auto pipeline_ci = LvlInitStruct<VkComputePipelineCreateInfo>();
685         pipeline_ci.stage = pipeline_stage_ci;
686         pipeline_ci.layout = as_validation_state.pipeline_layout;
687 
688         result = DispatchCreateComputePipelines(device_gpuav->device, VK_NULL_HANDLE, 1, &pipeline_ci, nullptr,
689                                                 &as_validation_state.pipeline);
690         if (result != VK_SUCCESS) {
691             ReportSetupProblem(device_gpuav->device,
692                                "Failed to create compute pipeline for acceleration structure build validation.");
693         }
694     }
695 
696     if (shader_module != VK_NULL_HANDLE) {
697         DispatchDestroyShaderModule(device_gpuav->device, shader_module, nullptr);
698     }
699 
700     if (result == VK_SUCCESS) {
701         as_validation_state.initialized = true;
702         LogInfo(device_gpuav->device, "UNASSIGNED-GPU-Assisted Validation.",
703                 "Acceleration Structure Building GPU Validation Enabled.");
704     } else {
705         device_gpuav->aborted = true;
706     }
707 }
708 
DestroyAccelerationStructureBuildValidationState()709 void GpuAssisted::DestroyAccelerationStructureBuildValidationState() {
710     auto &as_validation_state = acceleration_structure_validation_state;
711     if (as_validation_state.pipeline != VK_NULL_HANDLE) {
712         DispatchDestroyPipeline(device, as_validation_state.pipeline, nullptr);
713     }
714     if (as_validation_state.pipeline_layout != VK_NULL_HANDLE) {
715         DispatchDestroyPipelineLayout(device, as_validation_state.pipeline_layout, nullptr);
716     }
717     if (as_validation_state.replacement_as != VK_NULL_HANDLE) {
718         DispatchDestroyAccelerationStructureNV(device, as_validation_state.replacement_as, nullptr);
719     }
720     if (as_validation_state.replacement_as_allocation != VK_NULL_HANDLE) {
721         vmaFreeMemory(vmaAllocator, as_validation_state.replacement_as_allocation);
722     }
723 }
724 
725 struct GPUAV_RESTORABLE_PIPELINE_STATE {
726     VkPipelineBindPoint pipeline_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;
727     VkPipeline pipeline = VK_NULL_HANDLE;
728     VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
729     std::vector<VkDescriptorSet> descriptor_sets;
730     std::vector<std::vector<uint32_t>> dynamic_offsets;
731     uint32_t push_descriptor_set_index = 0;
732     std::vector<safe_VkWriteDescriptorSet> push_descriptor_set_writes;
733     std::vector<uint8_t> push_constants_data;
734     PushConstantRangesId push_constants_ranges;
735 
CreateGPUAV_RESTORABLE_PIPELINE_STATE736     void Create(CMD_BUFFER_STATE *cb_state, VkPipelineBindPoint bind_point) {
737         pipeline_bind_point = bind_point;
738         const auto lv_bind_point = ConvertToLvlBindPoint(bind_point);
739 
740         LAST_BOUND_STATE &last_bound = cb_state->lastBound[lv_bind_point];
741         if (last_bound.pipeline_state) {
742             pipeline = last_bound.pipeline_state->pipeline();
743             pipeline_layout = last_bound.pipeline_layout;
744             descriptor_sets.reserve(last_bound.per_set.size());
745             for (std::size_t i = 0; i < last_bound.per_set.size(); i++) {
746                 const auto *bound_descriptor_set = last_bound.per_set[i].bound_descriptor_set;
747                 if (bound_descriptor_set) {
748                     descriptor_sets.push_back(bound_descriptor_set->GetSet());
749                     if (bound_descriptor_set->IsPushDescriptor()) {
750                         push_descriptor_set_index = static_cast<uint32_t>(i);
751                     }
752                     dynamic_offsets.push_back(last_bound.per_set[i].dynamicOffsets);
753                 }
754             }
755 
756             if (last_bound.push_descriptor_set) {
757                 push_descriptor_set_writes = last_bound.push_descriptor_set->GetWrites();
758             }
759             if (last_bound.pipeline_state->pipeline_layout->push_constant_ranges == cb_state->push_constant_data_ranges) {
760                 push_constants_data = cb_state->push_constant_data;
761                 push_constants_ranges = last_bound.pipeline_state->pipeline_layout->push_constant_ranges;
762             }
763         }
764     }
765 
RestoreGPUAV_RESTORABLE_PIPELINE_STATE766     void Restore(VkCommandBuffer command_buffer) const {
767         if (pipeline != VK_NULL_HANDLE) {
768             DispatchCmdBindPipeline(command_buffer, pipeline_bind_point, pipeline);
769             if (!descriptor_sets.empty()) {
770                 for (std::size_t i = 0; i < descriptor_sets.size(); i++) {
771                     VkDescriptorSet descriptor_set = descriptor_sets[i];
772                     if (descriptor_set != VK_NULL_HANDLE) {
773                         DispatchCmdBindDescriptorSets(command_buffer, pipeline_bind_point, pipeline_layout,
774                                                       static_cast<uint32_t>(i), 1, &descriptor_set,
775                                                       static_cast<uint32_t>(dynamic_offsets[i].size()), dynamic_offsets[i].data());
776                     }
777                 }
778             }
779             if (!push_descriptor_set_writes.empty()) {
780                 DispatchCmdPushDescriptorSetKHR(command_buffer, pipeline_bind_point, pipeline_layout, push_descriptor_set_index,
781                                                 static_cast<uint32_t>(push_descriptor_set_writes.size()),
782                                                 reinterpret_cast<const VkWriteDescriptorSet *>(push_descriptor_set_writes.data()));
783             }
784             if (!push_constants_data.empty()) {
785                 for (const auto &push_constant_range : *push_constants_ranges) {
786                     if (push_constant_range.size == 0) continue;
787                     DispatchCmdPushConstants(command_buffer, pipeline_layout, push_constant_range.stageFlags,
788                                              push_constant_range.offset, push_constant_range.size, push_constants_data.data());
789                 }
790             }
791         }
792     }
793 };
794 
PreCallRecordCmdBuildAccelerationStructureNV(VkCommandBuffer commandBuffer,const VkAccelerationStructureInfoNV * pInfo,VkBuffer instanceData,VkDeviceSize instanceOffset,VkBool32 update,VkAccelerationStructureNV dst,VkAccelerationStructureNV src,VkBuffer scratch,VkDeviceSize scratchOffset)795 void GpuAssisted::PreCallRecordCmdBuildAccelerationStructureNV(VkCommandBuffer commandBuffer,
796                                                                const VkAccelerationStructureInfoNV *pInfo, VkBuffer instanceData,
797                                                                VkDeviceSize instanceOffset, VkBool32 update,
798                                                                VkAccelerationStructureNV dst, VkAccelerationStructureNV src,
799                                                                VkBuffer scratch, VkDeviceSize scratchOffset) {
800     ValidationStateTracker::PreCallRecordCmdBuildAccelerationStructureNV(commandBuffer, pInfo, instanceData, instanceOffset, update,
801                                                                          dst, src, scratch, scratchOffset);
802     if (pInfo == nullptr || pInfo->type != VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV) {
803         return;
804     }
805 
806     auto &as_validation_state = acceleration_structure_validation_state;
807     if (!as_validation_state.initialized) {
808         return;
809     }
810 
811     // Empty acceleration structure is valid according to the spec.
812     if (pInfo->instanceCount == 0 || instanceData == VK_NULL_HANDLE) {
813         return;
814     }
815 
816     auto cb_state = GetCBState(commandBuffer);
817     assert(cb_state != nullptr);
818 
819     std::vector<uint64_t> current_valid_handles;
820     ForEach<ACCELERATION_STRUCTURE_STATE>([&current_valid_handles](const ACCELERATION_STRUCTURE_STATE &as_state) {
821         if (as_state.built && as_state.create_infoNV.info.type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV) {
822             current_valid_handles.push_back(as_state.opaque_handle);
823         }
824     });
825 
826     GpuAssistedAccelerationStructureBuildValidationBufferInfo as_validation_buffer_info = {};
827     as_validation_buffer_info.acceleration_structure = dst;
828 
829     const VkDeviceSize validation_buffer_size =
830         // One uint for number of instances to validate
831         4 +
832         // Two uint for the replacement acceleration structure handle
833         8 +
834         // One uint for number of invalid handles found
835         4 +
836         // Two uint for the first invalid handle found
837         8 +
838         // One uint for the number of current valid handles
839         4 +
840         // Two uint for each current valid handle
841         (8 * current_valid_handles.size());
842 
843     auto validation_buffer_create_info = LvlInitStruct<VkBufferCreateInfo>();
844     validation_buffer_create_info.size = validation_buffer_size;
845     validation_buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
846 
847     VmaAllocationCreateInfo validation_buffer_alloc_info = {};
848     validation_buffer_alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
849 
850     VkResult result = vmaCreateBuffer(vmaAllocator, &validation_buffer_create_info, &validation_buffer_alloc_info,
851                                       &as_validation_buffer_info.validation_buffer,
852                                       &as_validation_buffer_info.validation_buffer_allocation, nullptr);
853     if (result != VK_SUCCESS) {
854         ReportSetupProblem(device, "Unable to allocate device memory.  Device could become unstable.");
855         aborted = true;
856         return;
857     }
858 
859     GpuAccelerationStructureBuildValidationBuffer *mapped_validation_buffer = nullptr;
860     result = vmaMapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation,
861                           reinterpret_cast<void **>(&mapped_validation_buffer));
862     if (result != VK_SUCCESS) {
863         ReportSetupProblem(device, "Unable to allocate device memory for acceleration structure build val buffer.");
864         aborted = true;
865         return;
866     }
867 
868     mapped_validation_buffer->instances_to_validate = pInfo->instanceCount;
869     mapped_validation_buffer->replacement_handle_bits_0 =
870         reinterpret_cast<const uint32_t *>(&as_validation_state.replacement_as_handle)[0];
871     mapped_validation_buffer->replacement_handle_bits_1 =
872         reinterpret_cast<const uint32_t *>(&as_validation_state.replacement_as_handle)[1];
873     mapped_validation_buffer->invalid_handle_found = 0;
874     mapped_validation_buffer->invalid_handle_bits_0 = 0;
875     mapped_validation_buffer->invalid_handle_bits_1 = 0;
876     mapped_validation_buffer->valid_handles_count = static_cast<uint32_t>(current_valid_handles.size());
877 
878     uint32_t *mapped_valid_handles = reinterpret_cast<uint32_t *>(&mapped_validation_buffer[1]);
879     for (std::size_t i = 0; i < current_valid_handles.size(); i++) {
880         const uint64_t current_valid_handle = current_valid_handles[i];
881 
882         *mapped_valid_handles = reinterpret_cast<const uint32_t *>(&current_valid_handle)[0];
883         ++mapped_valid_handles;
884         *mapped_valid_handles = reinterpret_cast<const uint32_t *>(&current_valid_handle)[1];
885         ++mapped_valid_handles;
886     }
887 
888     vmaUnmapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation);
889 
890     static constexpr const VkDeviceSize k_instance_size = 64;
891     const VkDeviceSize instance_buffer_size = k_instance_size * pInfo->instanceCount;
892 
893     result = desc_set_manager->GetDescriptorSet(&as_validation_buffer_info.descriptor_pool, debug_desc_layout,
894                                                 &as_validation_buffer_info.descriptor_set);
895     if (result != VK_SUCCESS) {
896         ReportSetupProblem(device, "Unable to get descriptor set for acceleration structure build.");
897         aborted = true;
898         return;
899     }
900 
901     VkDescriptorBufferInfo descriptor_buffer_infos[2] = {};
902     descriptor_buffer_infos[0].buffer = instanceData;
903     descriptor_buffer_infos[0].offset = instanceOffset;
904     descriptor_buffer_infos[0].range = instance_buffer_size;
905     descriptor_buffer_infos[1].buffer = as_validation_buffer_info.validation_buffer;
906     descriptor_buffer_infos[1].offset = 0;
907     descriptor_buffer_infos[1].range = validation_buffer_size;
908 
909     VkWriteDescriptorSet descriptor_set_writes[2] = {
910         LvlInitStruct<VkWriteDescriptorSet>(),
911         LvlInitStruct<VkWriteDescriptorSet>(),
912     };
913     descriptor_set_writes[0].dstSet = as_validation_buffer_info.descriptor_set;
914     descriptor_set_writes[0].dstBinding = 0;
915     descriptor_set_writes[0].descriptorCount = 1;
916     descriptor_set_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
917     descriptor_set_writes[0].pBufferInfo = &descriptor_buffer_infos[0];
918     descriptor_set_writes[1].dstSet = as_validation_buffer_info.descriptor_set;
919     descriptor_set_writes[1].dstBinding = 1;
920     descriptor_set_writes[1].descriptorCount = 1;
921     descriptor_set_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
922     descriptor_set_writes[1].pBufferInfo = &descriptor_buffer_infos[1];
923 
924     DispatchUpdateDescriptorSets(device, 2, descriptor_set_writes, 0, nullptr);
925 
926     // Issue a memory barrier to make sure anything writing to the instance buffer has finished.
927     auto memory_barrier = LvlInitStruct<VkMemoryBarrier>();
928     memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
929     memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
930     DispatchCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
931                                &memory_barrier, 0, nullptr, 0, nullptr);
932 
933     // Save a copy of the compute pipeline state that needs to be restored.
934     GPUAV_RESTORABLE_PIPELINE_STATE restorable_state;
935     restorable_state.Create(cb_state.get(), VK_PIPELINE_BIND_POINT_COMPUTE);
936 
937     // Switch to and launch the validation compute shader to find, replace, and report invalid acceleration structure handles.
938     DispatchCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, as_validation_state.pipeline);
939     DispatchCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, as_validation_state.pipeline_layout, 0, 1,
940                                   &as_validation_buffer_info.descriptor_set, 0, nullptr);
941     DispatchCmdDispatch(commandBuffer, 1, 1, 1);
942 
943     // Issue a buffer memory barrier to make sure that any invalid bottom level acceleration structure handles
944     // have been replaced by the validation compute shader before any builds take place.
945     auto instance_buffer_barrier = LvlInitStruct<VkBufferMemoryBarrier>();
946     instance_buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
947     instance_buffer_barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV;
948     instance_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
949     instance_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
950     instance_buffer_barrier.buffer = instanceData;
951     instance_buffer_barrier.offset = instanceOffset;
952     instance_buffer_barrier.size = instance_buffer_size;
953     DispatchCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
954                                VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0, 0, nullptr, 1, &instance_buffer_barrier, 0,
955                                nullptr);
956 
957     // Restore the previous compute pipeline state.
958     restorable_state.Restore(commandBuffer);
959 
960     cb_state->as_validation_buffers.emplace_back(std::move(as_validation_buffer_info));
961 }
962 
ProcessAccelerationStructureBuildValidationBuffer(VkQueue queue,CMD_BUFFER_STATE_GPUAV * cb_node)963 void GpuAssisted::ProcessAccelerationStructureBuildValidationBuffer(VkQueue queue, CMD_BUFFER_STATE_GPUAV *cb_node) {
964     if (cb_node == nullptr || !cb_node->hasBuildAccelerationStructureCmd) {
965         return;
966     }
967 
968     for (const auto &as_validation_buffer_info : cb_node->as_validation_buffers) {
969         GpuAccelerationStructureBuildValidationBuffer *mapped_validation_buffer = nullptr;
970 
971         VkResult result = vmaMapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation,
972                                        reinterpret_cast<void **>(&mapped_validation_buffer));
973         if (result == VK_SUCCESS) {
974             if (mapped_validation_buffer->invalid_handle_found > 0) {
975                 uint64_t invalid_handle = 0;
976                 reinterpret_cast<uint32_t *>(&invalid_handle)[0] = mapped_validation_buffer->invalid_handle_bits_0;
977                 reinterpret_cast<uint32_t *>(&invalid_handle)[1] = mapped_validation_buffer->invalid_handle_bits_1;
978 
979                 LogError(as_validation_buffer_info.acceleration_structure, "UNASSIGNED-AccelerationStructure",
980                          "Attempted to build top level acceleration structure using invalid bottom level acceleration structure "
981                          "handle (%" PRIu64 ")",
982                          invalid_handle);
983             }
984             vmaUnmapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation);
985         }
986     }
987 }
988 
PostCallRecordBindAccelerationStructureMemoryNV(VkDevice device,uint32_t bindInfoCount,const VkBindAccelerationStructureMemoryInfoNV * pBindInfos,VkResult result)989 void GpuAssisted::PostCallRecordBindAccelerationStructureMemoryNV(VkDevice device, uint32_t bindInfoCount,
990                                                                   const VkBindAccelerationStructureMemoryInfoNV *pBindInfos,
991                                                                   VkResult result) {
992     if (VK_SUCCESS != result) return;
993     ValidationStateTracker::PostCallRecordBindAccelerationStructureMemoryNV(device, bindInfoCount, pBindInfos, result);
994     for (uint32_t i = 0; i < bindInfoCount; i++) {
995         const VkBindAccelerationStructureMemoryInfoNV &info = pBindInfos[i];
996         auto as_state = Get<ACCELERATION_STRUCTURE_STATE>(info.accelerationStructure);
997         if (as_state) {
998             DispatchGetAccelerationStructureHandleNV(device, info.accelerationStructure, 8, &as_state->opaque_handle);
999         }
1000     }
1001 }
1002 
1003 // Modify the pipeline layout to include our debug descriptor set and any needed padding with the dummy descriptor set.
PreCallRecordCreatePipelineLayout(VkDevice device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout,void * cpl_state_data)1004 void GpuAssisted::PreCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo,
1005                                                     const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout,
1006                                                     void *cpl_state_data) {
1007     if (aborted) {
1008         return;
1009     }
1010 
1011     create_pipeline_layout_api_state *cpl_state = reinterpret_cast<create_pipeline_layout_api_state *>(cpl_state_data);
1012 
1013     if (cpl_state->modified_create_info.setLayoutCount >= adjusted_max_desc_sets) {
1014         std::ostringstream strm;
1015         strm << "Pipeline Layout conflict with validation's descriptor set at slot " << desc_set_bind_index << ". "
1016              << "Application has too many descriptor sets in the pipeline layout to continue with gpu validation. "
1017              << "Validation is not modifying the pipeline layout. "
1018              << "Instrumented shaders are replaced with non-instrumented shaders.";
1019         ReportSetupProblem(device, strm.str().c_str());
1020     } else {
1021         UtilPreCallRecordCreatePipelineLayout(cpl_state, this, pCreateInfo);
1022     }
1023     ValidationStateTracker::PreCallRecordCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout, cpl_state_data);
1024 }
1025 
PostCallRecordCreatePipelineLayout(VkDevice device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout,VkResult result)1026 void GpuAssisted::PostCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo,
1027                                                      const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout,
1028                                                      VkResult result) {
1029     ValidationStateTracker::PostCallRecordCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout, result);
1030 
1031     if (result != VK_SUCCESS) {
1032         ReportSetupProblem(device, "Unable to create pipeline layout.  Device could become unstable.");
1033         aborted = true;
1034     }
1035 }
1036 
1037 // Free the device memory and descriptor set(s) associated with a command buffer.
DestroyBuffer(GpuAssistedBufferInfo & buffer_info)1038 void GpuAssisted::DestroyBuffer(GpuAssistedBufferInfo &buffer_info) {
1039     vmaDestroyBuffer(vmaAllocator, buffer_info.output_mem_block.buffer, buffer_info.output_mem_block.allocation);
1040     if (buffer_info.di_input_mem_block.buffer) {
1041         vmaDestroyBuffer(vmaAllocator, buffer_info.di_input_mem_block.buffer, buffer_info.di_input_mem_block.allocation);
1042     }
1043     if (buffer_info.bda_input_mem_block.buffer) {
1044         vmaDestroyBuffer(vmaAllocator, buffer_info.bda_input_mem_block.buffer, buffer_info.bda_input_mem_block.allocation);
1045     }
1046     if (buffer_info.desc_set != VK_NULL_HANDLE) {
1047         desc_set_manager->PutBackDescriptorSet(buffer_info.desc_pool, buffer_info.desc_set);
1048     }
1049     if (buffer_info.pre_draw_resources.desc_set != VK_NULL_HANDLE) {
1050         desc_set_manager->PutBackDescriptorSet(buffer_info.pre_draw_resources.desc_pool, buffer_info.pre_draw_resources.desc_set);
1051     }
1052 }
1053 
DestroyBuffer(GpuAssistedAccelerationStructureBuildValidationBufferInfo & as_validation_buffer_info)1054 void GpuAssisted::DestroyBuffer(GpuAssistedAccelerationStructureBuildValidationBufferInfo &as_validation_buffer_info) {
1055     vmaDestroyBuffer(vmaAllocator, as_validation_buffer_info.validation_buffer,
1056                      as_validation_buffer_info.validation_buffer_allocation);
1057 
1058     if (as_validation_buffer_info.descriptor_set != VK_NULL_HANDLE) {
1059         desc_set_manager->PutBackDescriptorSet(as_validation_buffer_info.descriptor_pool, as_validation_buffer_info.descriptor_set);
1060     }
1061 }
1062 
1063 // Just gives a warning about a possible deadlock.
PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers) const1064 bool GpuAssisted::PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1065                                                VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
1066                                                uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
1067                                                uint32_t bufferMemoryBarrierCount,
1068                                                const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
1069                                                const VkImageMemoryBarrier *pImageMemoryBarriers) const {
1070     if (srcStageMask & VK_PIPELINE_STAGE_HOST_BIT) {
1071         ReportSetupProblem(commandBuffer,
1072                            "CmdWaitEvents recorded with VK_PIPELINE_STAGE_HOST_BIT set. "
1073                            "GPU_Assisted validation waits on queue completion. "
1074                            "This wait could block the host's signaling of this event, resulting in deadlock.");
1075     }
1076     ValidationStateTracker::PreCallValidateCmdWaitEvents(commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask,
1077                                                          memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
1078                                                          pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
1079     return false;
1080 }
1081 
PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfoKHR * pDependencyInfos) const1082 bool GpuAssisted::PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1083                                                    const VkDependencyInfoKHR *pDependencyInfos) const {
1084     VkPipelineStageFlags2KHR srcStageMask = 0;
1085 
1086     for (uint32_t i = 0; i < eventCount; i++) {
1087         auto stage_masks = sync_utils::GetGlobalStageMasks(pDependencyInfos[i]);
1088         srcStageMask = stage_masks.src;
1089     }
1090 
1091     if (srcStageMask & VK_PIPELINE_STAGE_HOST_BIT) {
1092         ReportSetupProblem(commandBuffer,
1093                            "CmdWaitEvents2KHR recorded with VK_PIPELINE_STAGE_HOST_BIT set. "
1094                            "GPU_Assisted validation waits on queue completion. "
1095                            "This wait could block the host's signaling of this event, resulting in deadlock.");
1096     }
1097     ValidationStateTracker::PreCallValidateCmdWaitEvents2KHR(commandBuffer, eventCount, pEvents, pDependencyInfos);
1098     return false;
1099 }
1100 
PostCallRecordGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pPhysicalDeviceProperties)1101 void GpuAssisted::PostCallRecordGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1102                                                             VkPhysicalDeviceProperties *pPhysicalDeviceProperties) {
1103     // There is an implicit layer that can cause this call to return 0 for maxBoundDescriptorSets - Ignore such calls
1104     if (enabled[gpu_validation_reserve_binding_slot] && pPhysicalDeviceProperties->limits.maxBoundDescriptorSets > 0) {
1105         if (pPhysicalDeviceProperties->limits.maxBoundDescriptorSets > 1) {
1106             pPhysicalDeviceProperties->limits.maxBoundDescriptorSets -= 1;
1107         } else {
1108             LogWarning(physicalDevice, "UNASSIGNED-GPU-Assisted Validation Setup Error.",
1109                        "Unable to reserve descriptor binding slot on a device with only one slot.");
1110         }
1111     }
1112     ValidationStateTracker::PostCallRecordGetPhysicalDeviceProperties(physicalDevice, pPhysicalDeviceProperties);
1113 }
1114 
PostCallRecordGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pPhysicalDeviceProperties2)1115 void GpuAssisted::PostCallRecordGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1116                                                              VkPhysicalDeviceProperties2 *pPhysicalDeviceProperties2) {
1117     // There is an implicit layer that can cause this call to return 0 for maxBoundDescriptorSets - Ignore such calls
1118     if (enabled[gpu_validation_reserve_binding_slot] && pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets > 0) {
1119         if (pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets > 1) {
1120             pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets -= 1;
1121         } else {
1122             LogWarning(physicalDevice, "UNASSIGNED-GPU-Assisted Validation Setup Error.",
1123                        "Unable to reserve descriptor binding slot on a device with only one slot.");
1124         }
1125     }
1126     ValidationStateTracker::PostCallRecordGetPhysicalDeviceProperties2(physicalDevice, pPhysicalDeviceProperties2);
1127 }
1128 
PreCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * cgpl_state_data)1129 void GpuAssisted::PreCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1130                                                        const VkGraphicsPipelineCreateInfo *pCreateInfos,
1131                                                        const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1132                                                        void *cgpl_state_data) {
1133     if (aborted) return;
1134     std::vector<safe_VkGraphicsPipelineCreateInfo> new_pipeline_create_infos;
1135     create_graphics_pipeline_api_state *cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state *>(cgpl_state_data);
1136     UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, cgpl_state->pipe_state,
1137                                        &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_GRAPHICS, this);
1138     cgpl_state->gpu_create_infos = new_pipeline_create_infos;
1139     cgpl_state->pCreateInfos = reinterpret_cast<VkGraphicsPipelineCreateInfo *>(cgpl_state->gpu_create_infos.data());
1140     ValidationStateTracker::PreCallRecordCreateGraphicsPipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1141                                                                  cgpl_state_data);
1142 }
1143 
PreCallRecordCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * ccpl_state_data)1144 void GpuAssisted::PreCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1145                                                       const VkComputePipelineCreateInfo *pCreateInfos,
1146                                                       const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1147                                                       void *ccpl_state_data) {
1148     if (aborted) return;
1149     std::vector<safe_VkComputePipelineCreateInfo> new_pipeline_create_infos;
1150     auto *ccpl_state = reinterpret_cast<create_compute_pipeline_api_state *>(ccpl_state_data);
1151     UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, ccpl_state->pipe_state,
1152                                        &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_COMPUTE, this);
1153     ccpl_state->gpu_create_infos = new_pipeline_create_infos;
1154     ccpl_state->pCreateInfos = reinterpret_cast<VkComputePipelineCreateInfo *>(ccpl_state->gpu_create_infos.data());
1155     ValidationStateTracker::PreCallRecordCreateComputePipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1156                                                                 ccpl_state_data);
1157 }
1158 
PreCallRecordCreateRayTracingPipelinesNV(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoNV * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * crtpl_state_data)1159 void GpuAssisted::PreCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1160                                                            const VkRayTracingPipelineCreateInfoNV *pCreateInfos,
1161                                                            const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1162                                                            void *crtpl_state_data) {
1163     if (aborted) return;
1164     std::vector<safe_VkRayTracingPipelineCreateInfoCommon> new_pipeline_create_infos;
1165     auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_api_state *>(crtpl_state_data);
1166     UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, crtpl_state->pipe_state,
1167                                        &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, this);
1168     crtpl_state->gpu_create_infos = new_pipeline_create_infos;
1169     crtpl_state->pCreateInfos = reinterpret_cast<VkRayTracingPipelineCreateInfoNV *>(crtpl_state->gpu_create_infos.data());
1170     ValidationStateTracker::PreCallRecordCreateRayTracingPipelinesNV(device, pipelineCache, count, pCreateInfos, pAllocator,
1171                                                                      pPipelines, crtpl_state_data);
1172 }
1173 
PreCallRecordCreateRayTracingPipelinesKHR(VkDevice device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * crtpl_state_data)1174 void GpuAssisted::PreCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
1175                                                             VkPipelineCache pipelineCache, uint32_t count,
1176                                                             const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1177                                                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1178                                                             void *crtpl_state_data) {
1179     if (aborted) return;
1180     std::vector<safe_VkRayTracingPipelineCreateInfoCommon> new_pipeline_create_infos;
1181     auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1182     UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, crtpl_state->pipe_state,
1183                                        &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, this);
1184     crtpl_state->gpu_create_infos = new_pipeline_create_infos;
1185     crtpl_state->pCreateInfos = reinterpret_cast<VkRayTracingPipelineCreateInfoKHR *>(crtpl_state->gpu_create_infos.data());
1186     ValidationStateTracker::PreCallRecordCreateRayTracingPipelinesKHR(device, deferredOperation, pipelineCache, count, pCreateInfos,
1187                                                                       pAllocator, pPipelines, crtpl_state_data);
1188 }
1189 
PostCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * cgpl_state_data)1190 void GpuAssisted::PostCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1191                                                         const VkGraphicsPipelineCreateInfo *pCreateInfos,
1192                                                         const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1193                                                         VkResult result, void *cgpl_state_data) {
1194     ValidationStateTracker::PostCallRecordCreateGraphicsPipelines(device, pipelineCache, count, pCreateInfos, pAllocator,
1195                                                                   pPipelines, result, cgpl_state_data);
1196     if (aborted) return;
1197     create_graphics_pipeline_api_state *cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state *>(cgpl_state_data);
1198     UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, cgpl_state->gpu_create_infos.data());
1199     UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_GRAPHICS, this);
1200 }
1201 
PostCallRecordCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * ccpl_state_data)1202 void GpuAssisted::PostCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1203                                                        const VkComputePipelineCreateInfo *pCreateInfos,
1204                                                        const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1205                                                        VkResult result, void *ccpl_state_data) {
1206     ValidationStateTracker::PostCallRecordCreateComputePipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1207                                                                  result, ccpl_state_data);
1208     if (aborted) return;
1209     create_compute_pipeline_api_state *ccpl_state = reinterpret_cast<create_compute_pipeline_api_state *>(ccpl_state_data);
1210     UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, ccpl_state->gpu_create_infos.data());
1211     UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_COMPUTE, this);
1212 }
1213 
PostCallRecordCreateRayTracingPipelinesNV(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoNV * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * crtpl_state_data)1214 void GpuAssisted::PostCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1215                                                             const VkRayTracingPipelineCreateInfoNV *pCreateInfos,
1216                                                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1217                                                             VkResult result, void *crtpl_state_data) {
1218     auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1219     ValidationStateTracker::PostCallRecordCreateRayTracingPipelinesNV(device, pipelineCache, count, pCreateInfos, pAllocator,
1220                                                                       pPipelines, result, crtpl_state_data);
1221     if (aborted) return;
1222     UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, crtpl_state->gpu_create_infos.data());
1223     UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, this);
1224 }
1225 
PostCallRecordCreateRayTracingPipelinesKHR(VkDevice device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * crtpl_state_data)1226 void GpuAssisted::PostCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
1227                                                              VkPipelineCache pipelineCache, uint32_t count,
1228                                                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1229                                                              const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1230                                                              VkResult result, void *crtpl_state_data) {
1231     auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1232     ValidationStateTracker::PostCallRecordCreateRayTracingPipelinesKHR(
1233         device, deferredOperation, pipelineCache, count, pCreateInfos, pAllocator, pPipelines, result, crtpl_state_data);
1234     if (aborted) return;
1235     UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, crtpl_state->gpu_create_infos.data());
1236     UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, this);
1237 }
1238 
1239 // Remove all the shader trackers associated with this destroyed pipeline.
PreCallRecordDestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)1240 void GpuAssisted::PreCallRecordDestroyPipeline(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator) {
1241     for (auto it = shader_map.begin(); it != shader_map.end();) {
1242         if (it->second.pipeline == pipeline) {
1243             it = shader_map.erase(it);
1244         } else {
1245             ++it;
1246         }
1247     }
1248     ValidationStateTracker::PreCallRecordDestroyPipeline(device, pipeline, pAllocator);
1249 }
1250 
PreCallRecordDestroyRenderPass(VkDevice device,VkRenderPass renderPass,const VkAllocationCallbacks * pAllocator)1251 void GpuAssisted::PreCallRecordDestroyRenderPass(VkDevice device, VkRenderPass renderPass,
1252                                                  const VkAllocationCallbacks *pAllocator) {
1253     auto pipeline = pre_draw_validation_state.renderpass_to_pipeline.find(renderPass);
1254     if (pipeline != pre_draw_validation_state.renderpass_to_pipeline.end()) {
1255         DispatchDestroyPipeline(device, pipeline->second, nullptr);
1256         pre_draw_validation_state.renderpass_to_pipeline.erase(pipeline);
1257     }
1258     ValidationStateTracker::PreCallRecordDestroyRenderPass(device, renderPass, pAllocator);
1259 }
1260 
1261 // Call the SPIR-V Optimizer to run the instrumentation pass on the shader.
InstrumentShader(const VkShaderModuleCreateInfo * pCreateInfo,std::vector<unsigned int> & new_pgm,uint32_t * unique_shader_id)1262 bool GpuAssisted::InstrumentShader(const VkShaderModuleCreateInfo *pCreateInfo, std::vector<unsigned int> &new_pgm,
1263                                    uint32_t *unique_shader_id) {
1264     if (aborted) return false;
1265     if (pCreateInfo->pCode[0] != spv::MagicNumber) return false;
1266 
1267     const spvtools::MessageConsumer gpu_console_message_consumer =
1268         [this](spv_message_level_t level, const char *, const spv_position_t &position, const char *message) -> void {
1269         switch (level) {
1270             case SPV_MSG_FATAL:
1271             case SPV_MSG_INTERNAL_ERROR:
1272             case SPV_MSG_ERROR:
1273                 this->LogError(this->device, "UNASSIGNED-GPU-Assisted", "Error during shader instrumentation: line %zu: %s",
1274                                position.index, message);
1275                 break;
1276             default:
1277                 break;
1278         }
1279     };
1280 
1281     // Load original shader SPIR-V
1282     uint32_t num_words = static_cast<uint32_t>(pCreateInfo->codeSize / 4);
1283     new_pgm.clear();
1284     new_pgm.reserve(num_words);
1285     new_pgm.insert(new_pgm.end(), &pCreateInfo->pCode[0], &pCreateInfo->pCode[num_words]);
1286 
1287     // Call the optimizer to instrument the shader.
1288     // Use the unique_shader_module_id as a shader ID so we can look up its handle later in the shader_map.
1289     // If descriptor indexing is enabled, enable length checks and updated descriptor checks
1290     using namespace spvtools;
1291     spv_target_env target_env = PickSpirvEnv(api_version, IsExtEnabled(device_extensions.vk_khr_spirv_1_4));
1292     spvtools::ValidatorOptions val_options;
1293     AdjustValidatorOptions(device_extensions, enabled_features, val_options);
1294     spvtools::OptimizerOptions opt_options;
1295     opt_options.set_run_validator(true);
1296     opt_options.set_validator_options(val_options);
1297     Optimizer optimizer(target_env);
1298     optimizer.SetMessageConsumer(gpu_console_message_consumer);
1299     optimizer.RegisterPass(CreateInstBindlessCheckPass(desc_set_bind_index, unique_shader_module_id, descriptor_indexing,
1300                                                        descriptor_indexing, buffer_oob_enabled, buffer_oob_enabled));
1301     // Call CreateAggressiveDCEPass with preserve_interface == true
1302     optimizer.RegisterPass(CreateAggressiveDCEPass(true));
1303     if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
1304          IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
1305         shaderInt64 && enabled_features.core12.bufferDeviceAddress) {
1306         optimizer.RegisterPass(CreateInstBuffAddrCheckPass(desc_set_bind_index, unique_shader_module_id));
1307     }
1308     bool pass = optimizer.Run(new_pgm.data(), new_pgm.size(), &new_pgm, opt_options);
1309     if (!pass) {
1310         ReportSetupProblem(device, "Failure to instrument shader.  Proceeding with non-instrumented shader.");
1311     }
1312     *unique_shader_id = unique_shader_module_id++;
1313     return pass;
1314 }
1315 // Create the instrumented shader data to provide to the driver.
PreCallRecordCreateShaderModule(VkDevice device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule,void * csm_state_data)1316 void GpuAssisted::PreCallRecordCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo *pCreateInfo,
1317                                                   const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule,
1318                                                   void *csm_state_data) {
1319     create_shader_module_api_state *csm_state = reinterpret_cast<create_shader_module_api_state *>(csm_state_data);
1320     bool pass = InstrumentShader(pCreateInfo, csm_state->instrumented_pgm, &csm_state->unique_shader_id);
1321     if (pass) {
1322         csm_state->instrumented_create_info.pCode = csm_state->instrumented_pgm.data();
1323         csm_state->instrumented_create_info.codeSize = csm_state->instrumented_pgm.size() * sizeof(unsigned int);
1324     }
1325     ValidationStateTracker::PreCallRecordCreateShaderModule(device, pCreateInfo, pAllocator, pShaderModule, csm_state_data);
1326 }
1327 
1328 static const int kInstErrorPreDrawValidate = spvtools::kInstErrorMax + 1;
1329 static const int kPreDrawValidateSubError = spvtools::kInstValidationOutError + 1;
1330 // Generate the part of the message describing the violation.
GenerateValidationMessage(const uint32_t * debug_record,std::string & msg,std::string & vuid_msg,GpuAssistedBufferInfo buf_info,GpuAssisted * gpu_assisted)1331 bool GenerateValidationMessage(const uint32_t *debug_record, std::string &msg, std::string &vuid_msg, GpuAssistedBufferInfo buf_info, GpuAssisted *gpu_assisted) {
1332     using namespace spvtools;
1333     std::ostringstream strm;
1334     bool return_code = true;
1335     assert(kInstErrorPreDrawValidate == _kInstErrorPreDrawValidate);
1336     assert(kInstValidationOutError == _kInstValidationOutError);
1337     switch (debug_record[kInstValidationOutError]) {
1338         case kInstErrorBindlessBounds: {
1339             strm << "Index of " << debug_record[kInstBindlessBoundsOutDescIndex] << " used to index descriptor array of length "
1340                  << debug_record[kInstBindlessBoundsOutDescBound] << ". ";
1341             vuid_msg = "UNASSIGNED-Descriptor index out of bounds";
1342         } break;
1343         case kInstErrorBindlessUninit: {
1344             strm << "Descriptor index " << debug_record[kInstBindlessUninitOutDescIndex] << " is uninitialized.";
1345             vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1346         } break;
1347         case kInstErrorBuffAddrUnallocRef: {
1348             uint64_t *ptr = (uint64_t *)&debug_record[kInstBuffAddrUnallocOutDescPtrLo];
1349             strm << "Device address 0x" << std::hex << *ptr << " access out of bounds. ";
1350             vuid_msg = "UNASSIGNED-Device address out of bounds";
1351         } break;
1352         case kInstErrorBuffOOBUniform:
1353         case kInstErrorBuffOOBStorage: {
1354             auto size = debug_record[kInstBindlessBuffOOBOutBuffSize];
1355             if (size == 0) {
1356                 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex] << " is uninitialized.";
1357                 vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1358             } else {
1359                 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex]
1360                      << " access out of bounds. Descriptor size is " << debug_record[kInstBindlessBuffOOBOutBuffSize]
1361                      << " and highest byte accessed was " << debug_record[kInstBindlessBuffOOBOutBuffOff];
1362                 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1363                 if (debug_record[kInstValidationOutError] == kInstErrorBuffOOBUniform)
1364                     vuid_msg = vuid.uniform_access_oob;
1365                 else
1366                     vuid_msg = vuid.storage_access_oob;
1367             }
1368         } break;
1369         case kInstErrorBuffOOBUniformTexel:
1370         case kInstErrorBuffOOBStorageTexel: {
1371             auto size = debug_record[kInstBindlessBuffOOBOutBuffSize];
1372             if (size == 0) {
1373                 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex] << " is uninitialized.";
1374                 vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1375             } else {
1376                 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex]
1377                      << " access out of bounds. Descriptor size is " << debug_record[kInstBindlessBuffOOBOutBuffSize]
1378                      << " texels and highest texel accessed was " << debug_record[kInstBindlessBuffOOBOutBuffOff];
1379                 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1380                 if (debug_record[kInstValidationOutError] == kInstErrorBuffOOBUniformTexel)
1381                     vuid_msg = vuid.uniform_access_oob;
1382                 else
1383                     vuid_msg = vuid.storage_access_oob;
1384             }
1385         } break;
1386         case kInstErrorPreDrawValidate: {
1387             // Buffer size must be >= (stride * (drawCount - 1) + offset + sizeof(VkDrawIndexedIndirectCommand))
1388             if (debug_record[kPreDrawValidateSubError] == pre_draw_count_exceeds_bufsize_error) {
1389                 uint32_t count = debug_record[kPreDrawValidateSubError + 1];
1390                 uint32_t stride = buf_info.pre_draw_resources.stride;
1391                 uint32_t offset = static_cast<uint32_t>(buf_info.pre_draw_resources.offset);
1392                 uint32_t draw_size = (stride * (count - 1) + offset + sizeof(VkDrawIndexedIndirectCommand));
1393                 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1394                 strm << "Indirect draw count of " << count << " would exceed buffer size " << buf_info.pre_draw_resources.buf_size
1395                      << " of buffer " << buf_info.pre_draw_resources.buffer << " stride = " << stride << " offset = " << offset
1396                      << " (stride * (drawCount - 1) + offset + sizeof(VkDrawIndexedIndirectCommand)) = " << draw_size;
1397                 if (count == 1) {
1398                     vuid_msg = vuid.count_exceeds_bufsize_1;
1399                 } else {
1400                     vuid_msg = vuid.count_exceeds_bufsize;
1401                 }
1402             } else if (debug_record[kPreDrawValidateSubError] == pre_draw_count_exceeds_limit_error) {
1403                 uint32_t count = debug_record[kPreDrawValidateSubError + 1];
1404                 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1405                 strm << "Indirect draw count of " << count << " would exceed maxDrawIndirectCount limit of "
1406                      << gpu_assisted->phys_dev_props.limits.maxDrawIndirectCount;
1407                 vuid_msg = vuid.count_exceeds_device_limit;
1408             } else if (debug_record[kPreDrawValidateSubError] == pre_draw_first_instance_error) {
1409                 uint32_t index = debug_record[kPreDrawValidateSubError + 1];
1410                 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1411                 strm << "The drawIndirectFirstInstance feature is not enabled, but the firstInstance member of the "
1412                         "VkDrawIndirectCommand structure at index "
1413                      << index << " is not zero";
1414                 vuid_msg = vuid.first_instance_not_zero;
1415             }
1416             return_code = false;
1417         } break;
1418         default: {
1419             strm << "Internal Error (unexpected error type = " << debug_record[kInstValidationOutError] << "). ";
1420             vuid_msg = "UNASSIGNED-Internal Error";
1421             assert(false);
1422         } break;
1423     }
1424     msg = strm.str();
1425     return return_code;
1426 }
1427 
1428 // Pull together all the information from the debug record to build the error message strings,
1429 // and then assemble them into a single message string.
1430 // Retrieve the shader program referenced by the unique shader ID provided in the debug record.
1431 // We had to keep a copy of the shader program with the same lifecycle as the pipeline to make
1432 // sure it is available when the pipeline is submitted.  (The ShaderModule tracking object also
1433 // keeps a copy, but it can be destroyed after the pipeline is created and before it is submitted.)
1434 //
AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer,VkQueue queue,GpuAssistedBufferInfo & buffer_info,uint32_t operation_index,uint32_t * const debug_output_buffer)1435 void GpuAssisted::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQueue queue, GpuAssistedBufferInfo &buffer_info,
1436                                              uint32_t operation_index, uint32_t *const debug_output_buffer) {
1437     using namespace spvtools;
1438     const uint32_t total_words = debug_output_buffer[0];
1439     // A zero here means that the shader instrumentation didn't write anything.
1440     // If you have nothing to say, don't say it here.
1441     if (0 == total_words) {
1442         return;
1443     }
1444     // The first word in the debug output buffer is the number of words that would have
1445     // been written by the shader instrumentation, if there was enough room in the buffer we provided.
1446     // The number of words actually written by the shaders is determined by the size of the buffer
1447     // we provide via the descriptor.  So, we process only the number of words that can fit in the
1448     // buffer.
1449     // Each "report" written by the shader instrumentation is considered a "record".  This function
1450     // is hard-coded to process only one record because it expects the buffer to be large enough to
1451     // hold only one record.  If there is a desire to process more than one record, this function needs
1452     // to be modified to loop over records and the buffer size increased.
1453     std::string validation_message;
1454     std::string stage_message;
1455     std::string common_message;
1456     std::string filename_message;
1457     std::string source_message;
1458     std::string vuid_msg;
1459     VkShaderModule shader_module_handle = VK_NULL_HANDLE;
1460     VkPipeline pipeline_handle = VK_NULL_HANDLE;
1461     std::vector<unsigned int> pgm;
1462     // The first record starts at this offset after the total_words.
1463     const uint32_t *debug_record = &debug_output_buffer[kDebugOutputDataOffset];
1464     // Lookup the VkShaderModule handle and SPIR-V code used to create the shader, using the unique shader ID value returned
1465     // by the instrumented shader.
1466     auto it = shader_map.find(debug_record[kInstCommonOutShaderId]);
1467     if (it != shader_map.end()) {
1468         shader_module_handle = it->second.shader_module;
1469         pipeline_handle = it->second.pipeline;
1470         pgm = it->second.pgm;
1471     }
1472     bool gen_full_message = GenerateValidationMessage(debug_record, validation_message, vuid_msg, buffer_info, this);
1473     if (gen_full_message) {
1474         UtilGenerateStageMessage(debug_record, stage_message);
1475         UtilGenerateCommonMessage(report_data, command_buffer, debug_record, shader_module_handle, pipeline_handle,
1476             buffer_info.pipeline_bind_point, operation_index, common_message);
1477         UtilGenerateSourceMessages(pgm, debug_record, false, filename_message, source_message);
1478         LogError(queue, vuid_msg.c_str(), "%s %s %s %s%s", validation_message.c_str(), common_message.c_str(), stage_message.c_str(),
1479             filename_message.c_str(), source_message.c_str());
1480     }
1481     else {
1482         LogError(queue, vuid_msg.c_str(), "%s", validation_message.c_str());
1483     }
1484     // The debug record at word kInstCommonOutSize is the number of words in the record
1485     // written by the shader.  Clear the entire record plus the total_words word at the start.
1486     const uint32_t words_to_clear = 1 + std::min(debug_record[kInstCommonOutSize], static_cast<uint32_t>(kInstMaxOutCnt));
1487     memset(debug_output_buffer, 0, sizeof(uint32_t) * words_to_clear);
1488 }
1489 
SetDescriptorInitialized(uint32_t * pData,uint32_t index,const cvdescriptorset::Descriptor * descriptor)1490 void GpuAssisted::SetDescriptorInitialized(uint32_t *pData, uint32_t index, const cvdescriptorset::Descriptor *descriptor) {
1491     if (descriptor->GetClass() == cvdescriptorset::DescriptorClass::GeneralBuffer) {
1492         auto buffer = static_cast<const cvdescriptorset::BufferDescriptor *>(descriptor)->GetBuffer();
1493         if (buffer == VK_NULL_HANDLE) {
1494             pData[index] = UINT_MAX;
1495         } else {
1496             auto buffer_state = static_cast<const cvdescriptorset::BufferDescriptor *>(descriptor)->GetBufferState();
1497             pData[index] = static_cast<uint32_t>(buffer_state->createInfo.size);
1498         }
1499     } else if (descriptor->GetClass() == cvdescriptorset::DescriptorClass::TexelBuffer) {
1500         auto buffer_view = static_cast<const cvdescriptorset::TexelDescriptor *>(descriptor)->GetBufferView();
1501         if (buffer_view == VK_NULL_HANDLE) {
1502             pData[index] = UINT_MAX;
1503         } else {
1504             auto buffer_view_state = static_cast<const cvdescriptorset::TexelDescriptor *>(descriptor)->GetBufferViewState();
1505             pData[index] = static_cast<uint32_t>(buffer_view_state->buffer_state->createInfo.size);
1506         }
1507     } else {
1508         pData[index] = 1;
1509     }
1510 }
1511 
1512 // For the given command buffer, map its debug data buffers and update the status of any update after bind descriptors
UpdateInstrumentationBuffer(CMD_BUFFER_STATE_GPUAV * cb_node)1513 void GpuAssisted::UpdateInstrumentationBuffer(CMD_BUFFER_STATE_GPUAV *cb_node) {
1514     uint32_t *data;
1515     for (auto &buffer_info : cb_node->gpuav_buffer_list) {
1516         if (buffer_info.di_input_mem_block.update_at_submit.size() > 0) {
1517             VkResult result =
1518                 vmaMapMemory(vmaAllocator, buffer_info.di_input_mem_block.allocation, reinterpret_cast<void **>(&data));
1519             if (result == VK_SUCCESS) {
1520                 for (const auto &update : buffer_info.di_input_mem_block.update_at_submit) {
1521                     if (update.second->updated) {
1522                         SetDescriptorInitialized(data, update.first, update.second);
1523                     }
1524                 }
1525                 vmaUnmapMemory(vmaAllocator, buffer_info.di_input_mem_block.allocation);
1526             }
1527         }
1528     }
1529 }
1530 
PreRecordCommandBuffer(VkCommandBuffer command_buffer)1531 void GpuAssisted::PreRecordCommandBuffer(VkCommandBuffer command_buffer) {
1532     auto cb_node = GetCBState(command_buffer);
1533     UpdateInstrumentationBuffer(cb_node.get());
1534     for (auto *secondary_cmd_buffer : cb_node->linkedCommandBuffers) {
1535         UpdateInstrumentationBuffer(static_cast<CMD_BUFFER_STATE_GPUAV *>(secondary_cmd_buffer));
1536     }
1537 }
1538 
PreCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1539 void GpuAssisted::PreCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) {
1540     ValidationStateTracker::PreCallRecordQueueSubmit(queue, submitCount, pSubmits, fence);
1541     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1542         const VkSubmitInfo *submit = &pSubmits[submit_idx];
1543         for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1544             PreRecordCommandBuffer(submit->pCommandBuffers[i]);
1545         }
1546     }
1547 }
PreCallRecordQueueSubmit2KHR(VkQueue queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence fence)1548 void GpuAssisted::PreCallRecordQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR *pSubmits,
1549                                                VkFence fence) {
1550     ValidationStateTracker::PreCallRecordQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
1551     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1552         const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1553         for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1554             PreRecordCommandBuffer(submit->pCommandBufferInfos[i].commandBuffer);
1555         }
1556     }
1557 }
1558 
CommandBufferNeedsProcessing(VkCommandBuffer command_buffer)1559 bool GpuAssisted::CommandBufferNeedsProcessing(VkCommandBuffer command_buffer) {
1560     bool buffers_present = false;
1561     auto cb_node = GetCBState(command_buffer);
1562 
1563     if (cb_node->gpuav_buffer_list.size() || cb_node->hasBuildAccelerationStructureCmd) {
1564         buffers_present = true;
1565     }
1566     for (const auto *secondary : cb_node->linkedCommandBuffers) {
1567         auto secondary_cmd_buffer = static_cast<const CMD_BUFFER_STATE_GPUAV *>(secondary);
1568         if (secondary_cmd_buffer->gpuav_buffer_list.size() || cb_node->hasBuildAccelerationStructureCmd) {
1569             buffers_present = true;
1570         }
1571     }
1572     return buffers_present;
1573 }
1574 
ProcessCommandBuffer(VkQueue queue,VkCommandBuffer command_buffer)1575 void GpuAssisted::ProcessCommandBuffer(VkQueue queue, VkCommandBuffer command_buffer) {
1576     auto cb_node = GetCBState(command_buffer);
1577 
1578     UtilProcessInstrumentationBuffer(queue, cb_node.get(), this);
1579     ProcessAccelerationStructureBuildValidationBuffer(queue, cb_node.get());
1580     for (auto *secondary_cmd_buffer : cb_node->linkedCommandBuffers) {
1581         UtilProcessInstrumentationBuffer(queue, secondary_cmd_buffer, this);
1582         ProcessAccelerationStructureBuildValidationBuffer(queue, cb_node.get());
1583     }
1584 }
1585 
1586 // Issue a memory barrier to make GPU-written data available to host.
1587 // Wait for the queue to complete execution.
1588 // Check the debug buffers for all the command buffers that were submitted.
PostCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence,VkResult result)1589 void GpuAssisted::PostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence,
1590                                             VkResult result) {
1591     ValidationStateTracker::PostCallRecordQueueSubmit(queue, submitCount, pSubmits, fence, result);
1592 
1593     if (aborted || (result != VK_SUCCESS)) return;
1594     bool buffers_present = false;
1595     // Don't QueueWaitIdle if there's nothing to process
1596     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1597         const VkSubmitInfo *submit = &pSubmits[submit_idx];
1598         for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1599             buffers_present |= CommandBufferNeedsProcessing(submit->pCommandBuffers[i]);
1600         }
1601     }
1602     if (!buffers_present) return;
1603 
1604     UtilSubmitBarrier(queue, this);
1605 
1606     DispatchQueueWaitIdle(queue);
1607 
1608     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1609         const VkSubmitInfo *submit = &pSubmits[submit_idx];
1610         for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1611             ProcessCommandBuffer(queue, submit->pCommandBuffers[i]);
1612         }
1613     }
1614 }
1615 
PostCallRecordQueueSubmit2KHR(VkQueue queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence fence,VkResult result)1616 void GpuAssisted::PostCallRecordQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR *pSubmits,
1617                                                 VkFence fence, VkResult result) {
1618     ValidationStateTracker::PostCallRecordQueueSubmit2KHR(queue, submitCount, pSubmits, fence, result);
1619 
1620     if (aborted || (result != VK_SUCCESS)) return;
1621     bool buffers_present = false;
1622     // Don't QueueWaitIdle if there's nothing to process
1623     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1624         const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1625         for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1626             buffers_present |= CommandBufferNeedsProcessing(submit->pCommandBufferInfos[i].commandBuffer);
1627         }
1628     }
1629     if (!buffers_present) return;
1630 
1631     UtilSubmitBarrier(queue, this);
1632 
1633     DispatchQueueWaitIdle(queue);
1634 
1635     for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1636         const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1637         for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1638             ProcessCommandBuffer(queue, submit->pCommandBufferInfos[i].commandBuffer);
1639         }
1640     }
1641 }
1642 
PreCallRecordCmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)1643 void GpuAssisted::PreCallRecordCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
1644                                        uint32_t firstVertex, uint32_t firstInstance) {
1645     ValidationStateTracker::PreCallRecordCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
1646     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAW);
1647 }
1648 
PreCallRecordCmdDrawMultiEXT(VkCommandBuffer commandBuffer,uint32_t drawCount,const VkMultiDrawInfoEXT * pVertexInfo,uint32_t instanceCount,uint32_t firstInstance,uint32_t stride)1649 void GpuAssisted::PreCallRecordCmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount,
1650                                                const VkMultiDrawInfoEXT *pVertexInfo, uint32_t instanceCount,
1651                                                uint32_t firstInstance, uint32_t stride) {
1652     ValidationStateTracker::PreCallRecordCmdDrawMultiEXT(commandBuffer, drawCount, pVertexInfo, instanceCount, firstInstance,
1653                                                          stride);
1654     for (uint32_t i = 0; i < drawCount; i++) {
1655         AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMULTIEXT);
1656     }
1657 }
1658 
PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)1659 void GpuAssisted::PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
1660                                               uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) {
1661     ValidationStateTracker::PreCallRecordCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
1662                                                         firstInstance);
1663     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXED);
1664 }
1665 
PreCallRecordCmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,uint32_t drawCount,const VkMultiDrawIndexedInfoEXT * pIndexInfo,uint32_t instanceCount,uint32_t firstInstance,uint32_t stride,const int32_t * pVertexOffset)1666 void GpuAssisted::PreCallRecordCmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount,
1667                                                       const VkMultiDrawIndexedInfoEXT *pIndexInfo, uint32_t instanceCount,
1668                                                       uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset) {
1669     ValidationStateTracker::PreCallRecordCmdDrawMultiIndexedEXT(commandBuffer, drawCount, pIndexInfo, instanceCount, firstInstance,
1670                                                                 stride, pVertexOffset);
1671     for (uint32_t i = 0; i < drawCount; i++) {
1672         AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMULTIINDEXEDEXT);
1673     }
1674 }
1675 
PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)1676 void GpuAssisted::PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count,
1677                                                uint32_t stride) {
1678     ValidationStateTracker::PreCallRecordCmdDrawIndirect(commandBuffer, buffer, offset, count, stride);
1679     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, count, stride, 0, 0};
1680     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECT, &cdi_state);
1681 }
1682 
PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)1683 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1684                                                       uint32_t count, uint32_t stride) {
1685     ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirect(commandBuffer, buffer, offset, count, stride);
1686     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, count, stride, 0, 0};
1687     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECT, &cdi_state);
1688 }
1689 
PreCallRecordCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1690 void GpuAssisted::PreCallRecordCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1691                                                        VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1692                                                        uint32_t stride) {
1693     ValidationStateTracker::PreCallRecordCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1694                                                                  maxDrawCount, stride);
1695     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1696     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTCOUNTKHR, &cdi_state);
1697 }
1698 
PreCallRecordCmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1699 void GpuAssisted::PreCallRecordCmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1700                                                     VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1701 
1702                                                     uint32_t stride) {
1703     ValidationStateTracker::PreCallRecordCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1704                                                               maxDrawCount, stride);
1705     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1706     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTCOUNT, &cdi_state);
1707 }
1708 
PreCallRecordCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,uint32_t instanceCount,uint32_t firstInstance,VkBuffer counterBuffer,VkDeviceSize counterBufferOffset,uint32_t counterOffset,uint32_t vertexStride)1709 void GpuAssisted::PreCallRecordCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
1710                                                            uint32_t firstInstance, VkBuffer counterBuffer,
1711                                                            VkDeviceSize counterBufferOffset, uint32_t counterOffset,
1712                                                            uint32_t vertexStride) {
1713     ValidationStateTracker::PreCallRecordCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer,
1714                                                                      counterBufferOffset, counterOffset, vertexStride);
1715     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTBYTECOUNTEXT);
1716 }
1717 
PreCallRecordCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1718 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1719                                                               VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1720                                                               uint32_t maxDrawCount, uint32_t stride) {
1721     ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer,
1722                                                                         countBufferOffset, maxDrawCount, stride);
1723     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1724     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECTCOUNTKHR, &cdi_state);
1725 }
1726 
PreCallRecordCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1727 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1728                                                            VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1729                                                            uint32_t maxDrawCount, uint32_t stride) {
1730     ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1731                                                                      maxDrawCount, stride);
1732     GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1733     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECTCOUNT, &cdi_state);
1734 }
1735 
PreCallRecordCmdDrawMeshTasksNV(VkCommandBuffer commandBuffer,uint32_t taskCount,uint32_t firstTask)1736 void GpuAssisted::PreCallRecordCmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) {
1737     ValidationStateTracker::PreCallRecordCmdDrawMeshTasksNV(commandBuffer, taskCount, firstTask);
1738     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSNV);
1739 }
1740 
PreCallRecordCmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1741 void GpuAssisted::PreCallRecordCmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1742                                                           uint32_t drawCount, uint32_t stride) {
1743     ValidationStateTracker::PreCallRecordCmdDrawMeshTasksIndirectNV(commandBuffer, buffer, offset, drawCount, stride);
1744     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSINDIRECTNV);
1745 }
1746 
PreCallRecordCmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1747 void GpuAssisted::PreCallRecordCmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1748                                                                VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1749                                                                uint32_t maxDrawCount, uint32_t stride) {
1750     ValidationStateTracker::PreCallRecordCmdDrawMeshTasksIndirectCountNV(commandBuffer, buffer, offset, countBuffer,
1751                                                                          countBufferOffset, maxDrawCount, stride);
1752     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSINDIRECTCOUNTNV);
1753 }
1754 
PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1755 void GpuAssisted::PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) {
1756     ValidationStateTracker::PreCallRecordCmdDispatch(commandBuffer, x, y, z);
1757     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCH);
1758 }
1759 
PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)1760 void GpuAssisted::PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) {
1761     ValidationStateTracker::PreCallRecordCmdDispatchIndirect(commandBuffer, buffer, offset);
1762     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHINDIRECT);
1763 }
1764 
PreCallRecordCmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)1765 void GpuAssisted::PreCallRecordCmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY,
1766                                                uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY,
1767                                                uint32_t groupCountZ) {
1768     ValidationStateTracker::PreCallRecordCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX,
1769                                                          groupCountY, groupCountZ);
1770     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHBASE);
1771 }
1772 
PreCallRecordCmdDispatchBaseKHR(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)1773 void GpuAssisted::PreCallRecordCmdDispatchBaseKHR(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY,
1774                                                   uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY,
1775                                                   uint32_t groupCountZ) {
1776     ValidationStateTracker::PreCallRecordCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX,
1777                                                             groupCountY, groupCountZ);
1778     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHBASEKHR);
1779 }
1780 
PreCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer,VkBuffer raygenShaderBindingTableBuffer,VkDeviceSize raygenShaderBindingOffset,VkBuffer missShaderBindingTableBuffer,VkDeviceSize missShaderBindingOffset,VkDeviceSize missShaderBindingStride,VkBuffer hitShaderBindingTableBuffer,VkDeviceSize hitShaderBindingOffset,VkDeviceSize hitShaderBindingStride,VkBuffer callableShaderBindingTableBuffer,VkDeviceSize callableShaderBindingOffset,VkDeviceSize callableShaderBindingStride,uint32_t width,uint32_t height,uint32_t depth)1781 void GpuAssisted::PreCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
1782                                               VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
1783                                               VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
1784                                               VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
1785                                               VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
1786                                               VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
1787                                               uint32_t width, uint32_t height, uint32_t depth) {
1788     ValidationStateTracker::PreCallRecordCmdTraceRaysNV(
1789         commandBuffer, raygenShaderBindingTableBuffer, raygenShaderBindingOffset, missShaderBindingTableBuffer,
1790         missShaderBindingOffset, missShaderBindingStride, hitShaderBindingTableBuffer, hitShaderBindingOffset,
1791         hitShaderBindingStride, callableShaderBindingTableBuffer, callableShaderBindingOffset, callableShaderBindingStride, width,
1792         height, depth);
1793     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, CMD_TRACERAYSNV);
1794 }
1795 
PostCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer,VkBuffer raygenShaderBindingTableBuffer,VkDeviceSize raygenShaderBindingOffset,VkBuffer missShaderBindingTableBuffer,VkDeviceSize missShaderBindingOffset,VkDeviceSize missShaderBindingStride,VkBuffer hitShaderBindingTableBuffer,VkDeviceSize hitShaderBindingOffset,VkDeviceSize hitShaderBindingStride,VkBuffer callableShaderBindingTableBuffer,VkDeviceSize callableShaderBindingOffset,VkDeviceSize callableShaderBindingStride,uint32_t width,uint32_t height,uint32_t depth)1796 void GpuAssisted::PostCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
1797                                                VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
1798                                                VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
1799                                                VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
1800                                                VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
1801                                                VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
1802                                                uint32_t width, uint32_t height, uint32_t depth) {
1803     ValidationStateTracker::PostCallRecordCmdTraceRaysNV(
1804         commandBuffer, raygenShaderBindingTableBuffer, raygenShaderBindingOffset, missShaderBindingTableBuffer,
1805         missShaderBindingOffset, missShaderBindingStride, hitShaderBindingTableBuffer, hitShaderBindingOffset,
1806         hitShaderBindingStride, callableShaderBindingTableBuffer, callableShaderBindingOffset, callableShaderBindingStride, width,
1807         height, depth);
1808     auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1809     cb_state->hasTraceRaysCmd = true;
1810 }
1811 
PreCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1812 void GpuAssisted::PreCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
1813                                                const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1814                                                const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1815                                                const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1816                                                const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1817                                                uint32_t height, uint32_t depth) {
1818     ValidationStateTracker::PreCallRecordCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1819                                                          pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1820     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, CMD_TRACERAYSKHR);
1821 }
1822 
PostCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1823 void GpuAssisted::PostCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
1824                                                 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1825                                                 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1826                                                 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1827                                                 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1828                                                 uint32_t height, uint32_t depth) {
1829     ValidationStateTracker::PostCallRecordCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1830                                                           pHitShaderBindingTable, pCallableShaderBindingTable, width, height,
1831                                                           depth);
1832     auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1833     cb_state->hasTraceRaysCmd = true;
1834 }
1835 
PreCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1836 void GpuAssisted::PreCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1837                                                        const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1838                                                        const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1839                                                        const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1840                                                        const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1841                                                        VkDeviceAddress indirectDeviceAddress) {
1842     ValidationStateTracker::PreCallRecordCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1843                                                                  pHitShaderBindingTable, pCallableShaderBindingTable,
1844                                                                  indirectDeviceAddress);
1845     AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, CMD_TRACERAYSINDIRECTKHR);
1846 }
1847 
PostCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1848 void GpuAssisted::PostCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1849                                                         const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1850                                                         const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1851                                                         const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1852                                                         const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1853                                                         VkDeviceAddress indirectDeviceAddress) {
1854     ValidationStateTracker::PostCallRecordCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1855                                                                   pHitShaderBindingTable, pCallableShaderBindingTable,
1856                                                                   indirectDeviceAddress);
1857     auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1858     cb_state->hasTraceRaysCmd = true;
1859 }
1860 
1861 // To generate the pre draw validation shader, run the following from the repository base level
1862 // python ./scripts/generate_spirv.py --outfilename ./layers/generated/gpu_pre_draw_shader.h ./layers/gpu_pre_draw_shader.vert
1863 // ./External/glslang/build/install/bin/glslangValidator.exe
1864 #include "gpu_pre_draw_shader.h"
AllocatePreDrawValidationResources(GpuAssistedDeviceMemoryBlock output_block,GpuAssistedPreDrawResources & resources,const LAST_BOUND_STATE & state,VkPipeline * pPipeline,const GpuAssistedCmdDrawIndirectState * cdi_state)1865 void GpuAssisted::AllocatePreDrawValidationResources(GpuAssistedDeviceMemoryBlock output_block,
1866                                                      GpuAssistedPreDrawResources &resources, const LAST_BOUND_STATE &state,
1867                                                      VkPipeline *pPipeline, const GpuAssistedCmdDrawIndirectState *cdi_state) {
1868     VkResult result;
1869     if (!pre_draw_validation_state.globals_created) {
1870         auto shader_module_ci = LvlInitStruct<VkShaderModuleCreateInfo>();
1871         shader_module_ci.codeSize = sizeof(gpu_pre_draw_shader_vert);
1872         shader_module_ci.pCode = gpu_pre_draw_shader_vert;
1873         result =
1874             DispatchCreateShaderModule(device, &shader_module_ci, nullptr, &pre_draw_validation_state.validation_shader_module);
1875         if (result != VK_SUCCESS) {
1876             ReportSetupProblem(device, "Unable to create shader module.  Aborting GPU-AV");
1877             aborted = true;
1878             return;
1879         }
1880 
1881         std::vector<VkDescriptorSetLayoutBinding> bindings;
1882         VkDescriptorSetLayoutBinding binding = {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT, NULL};
1883         // 0 - output buffer, 1 - count buffer
1884         bindings.push_back(binding);
1885         binding.binding = 1;
1886         bindings.push_back(binding);
1887 
1888         VkDescriptorSetLayoutCreateInfo ds_layout_ci = {};
1889         ds_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1890         ds_layout_ci.bindingCount = static_cast<uint32_t>(bindings.size());
1891         ds_layout_ci.pBindings = bindings.data();
1892         result = DispatchCreateDescriptorSetLayout(device, &ds_layout_ci, nullptr, &pre_draw_validation_state.validation_ds_layout);
1893         if (result != VK_SUCCESS) {
1894             ReportSetupProblem(device, "Unable to create descriptor set layout.  Aborting GPU-AV");
1895             aborted = true;
1896             return;
1897         }
1898 
1899         const uint32_t push_constant_range_count = 1;
1900         VkPushConstantRange push_constant_ranges[push_constant_range_count] = {};
1901         push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
1902         push_constant_ranges[0].offset = 0;
1903         push_constant_ranges[0].size = 4 * sizeof(uint32_t);
1904         VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo[1] = {};
1905         pipelineLayoutCreateInfo[0].sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1906         pipelineLayoutCreateInfo[0].pNext = NULL;
1907         pipelineLayoutCreateInfo[0].pushConstantRangeCount = push_constant_range_count;
1908         pipelineLayoutCreateInfo[0].pPushConstantRanges = push_constant_ranges;
1909         pipelineLayoutCreateInfo[0].setLayoutCount = 1;
1910         pipelineLayoutCreateInfo[0].pSetLayouts = &pre_draw_validation_state.validation_ds_layout;
1911         result = DispatchCreatePipelineLayout(device, pipelineLayoutCreateInfo, NULL,
1912                                               &pre_draw_validation_state.validation_pipeline_layout);
1913         if (result != VK_SUCCESS) {
1914             ReportSetupProblem(device, "Unable to create pipeline layout.  Aborting GPU-AV");
1915             aborted = true;
1916             return;
1917         }
1918 
1919         pre_draw_validation_state.globals_created = true;
1920     }
1921     VkRenderPass render_pass = state.pipeline_state->rp_state->renderPass();
1922     assert(render_pass != VK_NULL_HANDLE);
1923     auto pipeline = pre_draw_validation_state.renderpass_to_pipeline.find(render_pass);
1924     if (pipeline == pre_draw_validation_state.renderpass_to_pipeline.end()) {
1925         auto pipeline_stage_ci = LvlInitStruct<VkPipelineShaderStageCreateInfo>();
1926         pipeline_stage_ci.stage = VK_SHADER_STAGE_VERTEX_BIT;
1927         pipeline_stage_ci.module = pre_draw_validation_state.validation_shader_module;
1928         pipeline_stage_ci.pName = "main";
1929 
1930         auto graphicsPipelineCreateInfo = LvlInitStruct<VkGraphicsPipelineCreateInfo>();
1931         auto vertexInputState = LvlInitStruct<VkPipelineVertexInputStateCreateInfo>();
1932         auto inputAssemblyState = LvlInitStruct<VkPipelineInputAssemblyStateCreateInfo>();
1933         inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1934         auto rasterizationState = LvlInitStruct<VkPipelineRasterizationStateCreateInfo>();
1935         rasterizationState.rasterizerDiscardEnable = VK_TRUE;
1936         auto colorBlendState = LvlInitStruct<VkPipelineColorBlendStateCreateInfo>();
1937 
1938         graphicsPipelineCreateInfo.pVertexInputState = &vertexInputState;
1939         graphicsPipelineCreateInfo.pInputAssemblyState = &inputAssemblyState;
1940         graphicsPipelineCreateInfo.pRasterizationState = &rasterizationState;
1941         graphicsPipelineCreateInfo.pColorBlendState = &colorBlendState;
1942         graphicsPipelineCreateInfo.renderPass = render_pass;
1943         graphicsPipelineCreateInfo.layout = pre_draw_validation_state.validation_pipeline_layout;
1944         graphicsPipelineCreateInfo.stageCount = 1;
1945         graphicsPipelineCreateInfo.pStages = &pipeline_stage_ci;
1946 
1947         VkPipeline new_pipeline = VK_NULL_HANDLE;
1948         result = DispatchCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &graphicsPipelineCreateInfo, nullptr, &new_pipeline);
1949         if (result != VK_SUCCESS) {
1950             ReportSetupProblem(device, "Unable to create graphics pipeline.  Aborting GPU-AV");
1951             aborted = true;
1952             return;
1953         }
1954 
1955         *pPipeline = new_pipeline;
1956         pre_draw_validation_state.renderpass_to_pipeline[render_pass] = new_pipeline;
1957     } else {
1958         *pPipeline = pipeline->second;
1959     }
1960 
1961     result = desc_set_manager->GetDescriptorSet(&resources.desc_pool, pre_draw_validation_state.validation_ds_layout,
1962                                                 &resources.desc_set);
1963     if (result != VK_SUCCESS) {
1964         ReportSetupProblem(device, "Unable to allocate descriptor set.  Aborting GPU-AV");
1965         aborted = true;
1966         return;
1967     }
1968 
1969     VkDescriptorBufferInfo buffer_infos[3] = {};
1970     // Error output buffer
1971     buffer_infos[0].buffer = output_block.buffer;
1972     buffer_infos[0].offset = 0;
1973     buffer_infos[0].range = VK_WHOLE_SIZE;
1974     if (cdi_state->count_buffer) {
1975         // Count buffer
1976         buffer_infos[1].buffer = cdi_state->count_buffer;
1977     } else {
1978         // Draw Buffer
1979         buffer_infos[1].buffer = cdi_state->buffer;
1980     }
1981     buffer_infos[1].offset = 0;
1982     buffer_infos[1].range = VK_WHOLE_SIZE;
1983 
1984     VkWriteDescriptorSet desc_writes[2] = {};
1985     for (auto i = 0; i < 2; i++) {
1986         desc_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1987         desc_writes[i].dstBinding = i;
1988         desc_writes[i].descriptorCount = 1;
1989         desc_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1990         desc_writes[i].pBufferInfo = &buffer_infos[i];
1991         desc_writes[i].dstSet = resources.desc_set;
1992     }
1993     DispatchUpdateDescriptorSets(device, 2, desc_writes, 0, NULL);
1994 }
1995 
AllocateValidationResources(const VkCommandBuffer cmd_buffer,const VkPipelineBindPoint bind_point,CMD_TYPE cmd_type,const GpuAssistedCmdDrawIndirectState * cdi_state)1996 void GpuAssisted::AllocateValidationResources(const VkCommandBuffer cmd_buffer, const VkPipelineBindPoint bind_point,
1997                                               CMD_TYPE cmd_type, const GpuAssistedCmdDrawIndirectState *cdi_state) {
1998     if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE &&
1999         bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) {
2000         return;
2001     }
2002     VkResult result;
2003 
2004     if (aborted) return;
2005 
2006     std::vector<VkDescriptorSet> desc_sets;
2007     VkDescriptorPool desc_pool = VK_NULL_HANDLE;
2008     result = desc_set_manager->GetDescriptorSets(1, &desc_pool, debug_desc_layout, &desc_sets);
2009     assert(result == VK_SUCCESS);
2010     if (result != VK_SUCCESS) {
2011         ReportSetupProblem(device, "Unable to allocate descriptor sets.  Device could become unstable.");
2012         aborted = true;
2013         return;
2014     }
2015 
2016     VkDescriptorBufferInfo output_desc_buffer_info = {};
2017     output_desc_buffer_info.range = output_buffer_size;
2018 
2019     auto cb_node = GetCBState(cmd_buffer);
2020     if (!cb_node) {
2021         ReportSetupProblem(device, "Unrecognized command buffer");
2022         aborted = true;
2023         return;
2024     }
2025 
2026     // Allocate memory for the output block that the gpu will use to return any error information
2027     GpuAssistedDeviceMemoryBlock output_block = {};
2028     VkBufferCreateInfo buffer_info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
2029     buffer_info.size = output_buffer_size;
2030     buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
2031     VmaAllocationCreateInfo alloc_info = {};
2032     alloc_info.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
2033     result = vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &output_block.buffer, &output_block.allocation, nullptr);
2034     if (result != VK_SUCCESS) {
2035         ReportSetupProblem(device, "Unable to allocate device memory.  Device could become unstable.");
2036         aborted = true;
2037         return;
2038     }
2039 
2040     // Clear the output block to zeros so that only error information from the gpu will be present
2041     uint32_t *data_ptr;
2042     result = vmaMapMemory(vmaAllocator, output_block.allocation, reinterpret_cast<void **>(&data_ptr));
2043     if (result == VK_SUCCESS) {
2044         memset(data_ptr, 0, output_buffer_size);
2045         vmaUnmapMemory(vmaAllocator, output_block.allocation);
2046     }
2047 
2048     GpuAssistedDeviceMemoryBlock di_input_block = {}, bda_input_block = {};
2049     VkDescriptorBufferInfo di_input_desc_buffer_info = {};
2050     VkDescriptorBufferInfo bda_input_desc_buffer_info = {};
2051     VkWriteDescriptorSet desc_writes[3] = {};
2052     GpuAssistedPreDrawResources pre_draw_resources = {};
2053     uint32_t desc_count = 1;
2054     const auto lv_bind_point = ConvertToLvlBindPoint(bind_point);
2055     auto const &state = cb_node->lastBound[lv_bind_point];
2056     uint32_t number_of_sets = static_cast<uint32_t>(state.per_set.size());
2057 
2058     if (validate_draw_indirect && ((cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR ||
2059                                     cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR) ||
2060                                    ((cmd_type == CMD_DRAWINDIRECT || cmd_type == CMD_DRAWINDEXEDINDIRECT) &&
2061                                     !(enabled_features.core.drawIndirectFirstInstance)))) {
2062         // Insert a draw that can examine some device memory right before the draw we're validating (Pre Draw Validation)
2063         //
2064         // NOTE that this validation does not attempt to abort invalid api calls as most other validation does.  A crash
2065         // or DEVICE_LOST resulting from the invalid call will prevent preceeding validation errors from being reported.
2066 
2067         assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
2068         assert(cdi_state != NULL);
2069         VkPipeline validation_pipeline;
2070         AllocatePreDrawValidationResources(output_block, pre_draw_resources, state, &validation_pipeline, cdi_state);
2071         if (aborted) return;
2072 
2073         // Save current graphics pipeline state
2074         GPUAV_RESTORABLE_PIPELINE_STATE restorable_state;
2075         restorable_state.Create(cb_node.get(), VK_PIPELINE_BIND_POINT_GRAPHICS);
2076 
2077         // Save parameters for error message
2078         pre_draw_resources.buffer = cdi_state->buffer;
2079         pre_draw_resources.offset = cdi_state->offset;
2080         pre_draw_resources.stride = cdi_state->stride;
2081 
2082         uint32_t pushConstants[4] = {};
2083         if (cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT ||
2084             cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR) {
2085             if (cdi_state->count_buffer_offset > std::numeric_limits<uint32_t>::max()) {
2086                 ReportSetupProblem(device,
2087                                    "Count buffer offset is larger than can be contained in an unsigned int.  Aborting GPU-AV");
2088                 aborted = true;
2089                 return;
2090             }
2091 
2092             // Buffer size must be >= (stride * (drawCount - 1) + offset + sizeof(VkDrawIndirectCommand))
2093             uint32_t struct_size;
2094             if (cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR) {
2095                 struct_size = sizeof(VkDrawIndirectCommand);
2096             } else {
2097                 assert(cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR);
2098                 struct_size = sizeof(VkDrawIndexedIndirectCommand);
2099             }
2100             auto buffer_state = Get<BUFFER_STATE>(cdi_state->buffer);
2101             uint32_t max_count;
2102             uint64_t bufsize = buffer_state->createInfo.size;
2103             uint64_t first_command_bytes = struct_size + cdi_state->offset;
2104             if (first_command_bytes > bufsize) {
2105                 max_count = 0;
2106             } else {
2107                 max_count = 1 + static_cast<uint32_t>(std::floor(((bufsize - first_command_bytes) / cdi_state->stride)));
2108             }
2109             pre_draw_resources.buf_size = buffer_state->createInfo.size;
2110 
2111             assert(phys_dev_props.limits.maxDrawIndirectCount > 0);
2112             pushConstants[0] = phys_dev_props.limits.maxDrawIndirectCount;
2113             pushConstants[1] = max_count;
2114             pushConstants[2] = static_cast<uint32_t>((cdi_state->count_buffer_offset / sizeof(uint32_t)));
2115         } else {
2116             pushConstants[0] = 0;  // firstInstance check instead of count buffer check
2117             pushConstants[1] = cdi_state->drawCount;
2118             if (cmd_type == CMD_DRAWINDIRECT) {
2119                 pushConstants[2] = static_cast<uint32_t>(
2120                     ((cdi_state->offset + offsetof(struct VkDrawIndirectCommand, firstInstance)) / sizeof(uint32_t)));
2121             } else {
2122                 assert(cmd_type == CMD_DRAWINDEXEDINDIRECT);
2123                 pushConstants[2] = static_cast<uint32_t>(
2124                     ((cdi_state->offset + offsetof(struct VkDrawIndexedIndirectCommand, firstInstance)) / sizeof(uint32_t)));
2125             }
2126             pushConstants[3] = (cdi_state->stride / sizeof(uint32_t));
2127         }
2128 
2129         // Insert diagnostic draw
2130         DispatchCmdBindPipeline(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, validation_pipeline);
2131         DispatchCmdPushConstants(cmd_buffer, pre_draw_validation_state.validation_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
2132                                  sizeof(pushConstants), pushConstants);
2133         DispatchCmdBindDescriptorSets(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
2134                                       pre_draw_validation_state.validation_pipeline_layout, 0, 1, &pre_draw_resources.desc_set, 0,
2135                                       nullptr);
2136         DispatchCmdDraw(cmd_buffer, 3, 1, 0, 0);
2137 
2138         // Restore the previous graphics pipeline state.
2139         restorable_state.Restore(cmd_buffer);
2140     }
2141 
2142     bool has_buffers = false;
2143     // Figure out how much memory we need for the input block based on how many sets and bindings there are
2144     // and how big each of the bindings is
2145     if (number_of_sets > 0 && (descriptor_indexing || buffer_oob_enabled)) {
2146         uint32_t descriptor_count = 0;  // Number of descriptors, including all array elements
2147         uint32_t binding_count = 0;     // Number of bindings based on the max binding number used
2148         for (const auto &s : state.per_set) {
2149             auto desc = s.bound_descriptor_set;
2150             if (desc && (desc->GetBindingCount() > 0)) {
2151                 auto bindings = desc->GetLayout()->GetSortedBindingSet();
2152                 binding_count += desc->GetLayout()->GetMaxBinding() + 1;
2153                 for (auto binding : bindings) {
2154                     // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline uniform
2155                     // blocks
2156                     auto descriptor_type = desc->GetLayout()->GetTypeFromBinding(binding);
2157                     if (descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
2158                         descriptor_count++;
2159                         LogWarning(device, "UNASSIGNED-GPU-Assisted Validation Warning",
2160                                    "VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT descriptors will not be validated by GPU assisted "
2161                                    "validation");
2162                     } else if (binding == desc->GetLayout()->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) {
2163                         descriptor_count += desc->GetVariableDescriptorCount();
2164                     } else {
2165                         descriptor_count += desc->GetDescriptorCountFromBinding(binding);
2166                     }
2167                     if (!has_buffers && (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
2168                                          descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ||
2169                                          descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
2170                                          descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2171                                          descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ||
2172                                          descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)) {
2173                         has_buffers = true;
2174                     }
2175                 }
2176             }
2177         }
2178 
2179         if (descriptor_indexing || has_buffers) {
2180             // Note that the size of the input buffer is dependent on the maximum binding number, which
2181             // can be very large.  This is because for (set = s, binding = b, index = i), the validation
2182             // code is going to dereference Input[ i + Input[ b + Input[ s + Input[ Input[0] ] ] ] ] to
2183             // see if descriptors have been written. In gpu_validation.md, we note this and advise
2184             // using densely packed bindings as a best practice when using gpu-av with descriptor indexing
2185             uint32_t words_needed;
2186             if (descriptor_indexing) {
2187                 words_needed = 1 + (number_of_sets * 2) + (binding_count * 2) + descriptor_count;
2188             } else {
2189                 words_needed = 1 + number_of_sets + binding_count + descriptor_count;
2190             }
2191             alloc_info.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
2192             buffer_info.size = words_needed * 4;
2193             result = vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &di_input_block.buffer, &di_input_block.allocation,
2194                                      nullptr);
2195             if (result != VK_SUCCESS) {
2196                 ReportSetupProblem(device, "Unable to allocate device memory.  Device could become unstable.");
2197                 aborted = true;
2198                 return;
2199             }
2200 
2201             // Populate input buffer first with the sizes of every descriptor in every set, then with whether
2202             // each element of each descriptor has been written or not.  See gpu_validation.md for a more thourough
2203             // outline of the input buffer format
2204             result = vmaMapMemory(vmaAllocator, di_input_block.allocation, reinterpret_cast<void **>(&data_ptr));
2205             memset(data_ptr, 0, static_cast<size_t>(buffer_info.size));
2206 
2207             // Descriptor indexing needs the number of descriptors at each binding.
2208             if (descriptor_indexing) {
2209                 // Pointer to a sets array that points into the sizes array
2210                 uint32_t *sets_to_sizes = data_ptr + 1;
2211                 // Pointer to the sizes array that contains the array size of the descriptor at each binding
2212                 uint32_t *sizes = sets_to_sizes + number_of_sets;
2213                 // Pointer to another sets array that points into the bindings array that points into the written array
2214                 uint32_t *sets_to_bindings = sizes + binding_count;
2215                 // Pointer to the bindings array that points at the start of the writes in the writes array for each binding
2216                 uint32_t *bindings_to_written = sets_to_bindings + number_of_sets;
2217                 // Index of the next entry in the written array to be updated
2218                 uint32_t written_index = 1 + (number_of_sets * 2) + (binding_count * 2);
2219                 uint32_t bind_counter = number_of_sets + 1;
2220                 // Index of the start of the sets_to_bindings array
2221                 data_ptr[0] = number_of_sets + binding_count + 1;
2222 
2223                 for (const auto &s : state.per_set) {
2224                     auto desc = s.bound_descriptor_set;
2225                     if (desc && (desc->GetBindingCount() > 0)) {
2226                         auto layout = desc->GetLayout();
2227                         auto bindings = layout->GetSortedBindingSet();
2228                         // For each set, fill in index of its bindings sizes in the sizes array
2229                         *sets_to_sizes++ = bind_counter;
2230                         // For each set, fill in the index of its bindings in the bindings_to_written array
2231                         *sets_to_bindings++ = bind_counter + number_of_sets + binding_count;
2232                         for (auto binding : bindings) {
2233                             // For each binding, fill in its size in the sizes array
2234                             // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2235                             // uniform blocks
2236                             if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2237                                 sizes[binding] = 1;
2238                             } else if (binding == layout->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) {
2239                                 sizes[binding] = desc->GetVariableDescriptorCount();
2240                             } else {
2241                                 sizes[binding] = desc->GetDescriptorCountFromBinding(binding);
2242                             }
2243                             // Fill in the starting index for this binding in the written array in the bindings_to_written array
2244                             bindings_to_written[binding] = written_index;
2245 
2246                             // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2247                             // uniform blocks
2248                             if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2249                                 data_ptr[written_index++] = UINT_MAX;
2250                                 continue;
2251                             }
2252 
2253                             auto index_range = desc->GetGlobalIndexRangeFromBinding(binding, true);
2254                             // For each array element in the binding, update the written array with whether it has been written
2255                             for (uint32_t i = index_range.start; i < index_range.end; ++i) {
2256                                 auto *descriptor = desc->GetDescriptorFromGlobalIndex(i);
2257                                 if (descriptor->updated) {
2258                                     SetDescriptorInitialized(data_ptr, written_index, descriptor);
2259                                 } else if (desc->IsUpdateAfterBind(binding)) {
2260                                     // If it hasn't been written now and it's update after bind, put it in a list to check at
2261                                     // QueueSubmit
2262                                     di_input_block.update_at_submit[written_index] = descriptor;
2263                                 }
2264                                 written_index++;
2265                             }
2266                         }
2267                         auto last = desc->GetLayout()->GetMaxBinding();
2268                         bindings_to_written += last + 1;
2269                         bind_counter += last + 1;
2270                         sizes += last + 1;
2271                     } else {
2272                         *sets_to_sizes++ = 0;
2273                         *sets_to_bindings++ = 0;
2274                     }
2275                 }
2276             } else {
2277                 // If no descriptor indexing, we don't need number of descriptors at each binding, so
2278                 // no sets_to_sizes or sizes arrays, just sets_to_bindings, bindings_to_written and written_index
2279 
2280                 // Pointer to sets array that points into the bindings array that points into the written array
2281                 uint32_t *sets_to_bindings = data_ptr + 1;
2282                 // Pointer to the bindings array that points at the start of the writes in the writes array for each binding
2283                 uint32_t *bindings_to_written = sets_to_bindings + number_of_sets;
2284                 // Index of the next entry in the written array to be updated
2285                 uint32_t written_index = 1 + number_of_sets + binding_count;
2286                 uint32_t bind_counter = number_of_sets + 1;
2287                 data_ptr[0] = 1;
2288 
2289                 for (const auto &s : state.per_set) {
2290                     auto desc = s.bound_descriptor_set;
2291                     if (desc && (desc->GetBindingCount() > 0)) {
2292                         auto layout = desc->GetLayout();
2293                         auto bindings = layout->GetSortedBindingSet();
2294                         *sets_to_bindings++ = bind_counter;
2295                         for (auto binding : bindings) {
2296                             // Fill in the starting index for this binding in the written array in the bindings_to_written array
2297                             bindings_to_written[binding] = written_index;
2298 
2299                             // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2300                             // uniform blocks
2301                             if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2302                                 data_ptr[written_index++] = UINT_MAX;
2303                                 continue;
2304                             }
2305 
2306                             auto index_range = desc->GetGlobalIndexRangeFromBinding(binding, true);
2307 
2308                             // For each array element in the binding, update the written array with whether it has been written
2309                             for (uint32_t i = index_range.start; i < index_range.end; ++i) {
2310                                 auto *descriptor = desc->GetDescriptorFromGlobalIndex(i);
2311                                 if (descriptor->updated) {
2312                                     SetDescriptorInitialized(data_ptr, written_index, descriptor);
2313                                 } else if (desc->IsUpdateAfterBind(binding)) {
2314                                     // If it hasn't been written now and it's update after bind, put it in a list to check at
2315                                     // QueueSubmit
2316                                     di_input_block.update_at_submit[written_index] = descriptor;
2317                                 }
2318                                 written_index++;
2319                             }
2320                         }
2321                         auto last = desc->GetLayout()->GetMaxBinding();
2322                         bindings_to_written += last + 1;
2323                         bind_counter += last + 1;
2324                     } else {
2325                         *sets_to_bindings++ = 0;
2326                     }
2327                 }
2328             }
2329             vmaUnmapMemory(vmaAllocator, di_input_block.allocation);
2330 
2331             di_input_desc_buffer_info.range = (words_needed * 4);
2332             di_input_desc_buffer_info.buffer = di_input_block.buffer;
2333             di_input_desc_buffer_info.offset = 0;
2334 
2335             desc_writes[1] = LvlInitStruct<VkWriteDescriptorSet>();
2336             desc_writes[1].dstBinding = 1;
2337             desc_writes[1].descriptorCount = 1;
2338             desc_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2339             desc_writes[1].pBufferInfo = &di_input_desc_buffer_info;
2340             desc_writes[1].dstSet = desc_sets[0];
2341 
2342             desc_count = 2;
2343         }
2344     }
2345 
2346     if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
2347          IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
2348         buffer_map.size() && shaderInt64 && enabled_features.core12.bufferDeviceAddress) {
2349         // Example BDA input buffer assuming 2 buffers using BDA:
2350         // Word 0 | Index of start of buffer sizes (in this case 5)
2351         // Word 1 | 0x0000000000000000
2352         // Word 2 | Device Address of first buffer  (Addresses sorted in ascending order)
2353         // Word 3 | Device Address of second buffer
2354         // Word 4 | 0xffffffffffffffff
2355         // Word 5 | 0 (size of pretend buffer at word 1)
2356         // Word 6 | Size in bytes of first buffer
2357         // Word 7 | Size in bytes of second buffer
2358         // Word 8 | 0 (size of pretend buffer in word 4)
2359 
2360         uint32_t num_buffers = static_cast<uint32_t>(buffer_map.size());
2361         uint32_t words_needed = (num_buffers + 3) + (num_buffers + 2);
2362         alloc_info.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
2363         buffer_info.size = words_needed * 8;  // 64 bit words
2364         result =
2365             vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &bda_input_block.buffer, &bda_input_block.allocation, nullptr);
2366         if (result != VK_SUCCESS) {
2367             ReportSetupProblem(device, "Unable to allocate device memory.  Device could become unstable.");
2368             aborted = true;
2369             return;
2370         }
2371         uint64_t *bda_data;
2372         result = vmaMapMemory(vmaAllocator, bda_input_block.allocation, reinterpret_cast<void **>(&bda_data));
2373         uint32_t address_index = 1;
2374         uint32_t size_index = 3 + num_buffers;
2375         memset(bda_data, 0, static_cast<size_t>(buffer_info.size));
2376         bda_data[0] = size_index;       // Start of buffer sizes
2377         bda_data[address_index++] = 0;  // NULL address
2378         bda_data[size_index++] = 0;
2379 
2380         for (const auto &value : buffer_map) {
2381             bda_data[address_index++] = value.first;
2382             bda_data[size_index++] = value.second;
2383         }
2384         bda_data[address_index] = UINTPTR_MAX;
2385         bda_data[size_index] = 0;
2386         vmaUnmapMemory(vmaAllocator, bda_input_block.allocation);
2387 
2388         bda_input_desc_buffer_info.range = (words_needed * 8);
2389         bda_input_desc_buffer_info.buffer = bda_input_block.buffer;
2390         bda_input_desc_buffer_info.offset = 0;
2391 
2392         desc_writes[desc_count] = LvlInitStruct<VkWriteDescriptorSet>();
2393         desc_writes[desc_count].dstBinding = 2;
2394         desc_writes[desc_count].descriptorCount = 1;
2395         desc_writes[desc_count].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2396         desc_writes[desc_count].pBufferInfo = &bda_input_desc_buffer_info;
2397         desc_writes[desc_count].dstSet = desc_sets[0];
2398         desc_count++;
2399     }
2400 
2401     // Write the descriptor
2402     output_desc_buffer_info.buffer = output_block.buffer;
2403     output_desc_buffer_info.offset = 0;
2404 
2405     desc_writes[0] = LvlInitStruct<VkWriteDescriptorSet>();
2406     desc_writes[0].descriptorCount = 1;
2407     desc_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2408     desc_writes[0].pBufferInfo = &output_desc_buffer_info;
2409     desc_writes[0].dstSet = desc_sets[0];
2410     DispatchUpdateDescriptorSets(device, desc_count, desc_writes, 0, NULL);
2411 
2412     const auto *pipeline_state = state.pipeline_state;
2413     if (pipeline_state) {
2414         if ((pipeline_state->pipeline_layout->set_layouts.size() <= desc_set_bind_index) &&
2415             !pipeline_state->pipeline_layout->Destroyed()) {
2416             DispatchCmdBindDescriptorSets(cmd_buffer, bind_point, pipeline_state->pipeline_layout->layout(), desc_set_bind_index, 1,
2417                                           desc_sets.data(), 0, nullptr);
2418         }
2419         if (pipeline_state->pipeline_layout->Destroyed()) {
2420             ReportSetupProblem(device, "Pipeline layout has been destroyed, aborting GPU-AV");
2421             aborted = true;
2422         } else {
2423             // Record buffer and memory info in CB state tracking
2424             cb_node->gpuav_buffer_list.emplace_back(output_block, di_input_block, bda_input_block, pre_draw_resources, desc_sets[0],
2425                                                     desc_pool, bind_point, cmd_type);
2426         }
2427     } else {
2428         ReportSetupProblem(device, "Unable to find pipeline state");
2429         aborted = true;
2430     }
2431     if (aborted) {
2432         vmaDestroyBuffer(vmaAllocator, di_input_block.buffer, di_input_block.allocation);
2433         vmaDestroyBuffer(vmaAllocator, bda_input_block.buffer, bda_input_block.allocation);
2434         vmaDestroyBuffer(vmaAllocator, output_block.buffer, output_block.allocation);
2435         return;
2436     }
2437 }
2438 
CreateCmdBufferState(VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)2439 std::shared_ptr<CMD_BUFFER_STATE> GpuAssisted::CreateCmdBufferState(VkCommandBuffer cb,
2440                                                                     const VkCommandBufferAllocateInfo *pCreateInfo,
2441                                                                     const COMMAND_POOL_STATE *pool) {
2442     return std::static_pointer_cast<CMD_BUFFER_STATE>(std::make_shared<CMD_BUFFER_STATE_GPUAV>(this, cb, pCreateInfo, pool));
2443 }
2444 
CMD_BUFFER_STATE_GPUAV(GpuAssisted * ga,VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)2445 CMD_BUFFER_STATE_GPUAV::CMD_BUFFER_STATE_GPUAV(GpuAssisted *ga, VkCommandBuffer cb, const VkCommandBufferAllocateInfo *pCreateInfo,
2446                                                const COMMAND_POOL_STATE *pool)
2447     : CMD_BUFFER_STATE(ga, cb, pCreateInfo, pool) {}
2448 
Reset()2449 void CMD_BUFFER_STATE_GPUAV::Reset() {
2450     CMD_BUFFER_STATE::Reset();
2451     auto gpuav = static_cast<GpuAssisted *>(dev_data);
2452     // Free the device memory and descriptor set(s) associated with a command buffer.
2453     if (gpuav->aborted) {
2454         return;
2455     }
2456     for (auto &buffer_info : gpuav_buffer_list) {
2457         gpuav->DestroyBuffer(buffer_info);
2458     }
2459     gpuav_buffer_list.clear();
2460 
2461     for (auto &as_validation_buffer_info : as_validation_buffers) {
2462         gpuav->DestroyBuffer(as_validation_buffer_info);
2463     }
2464     as_validation_buffers.clear();
2465 }
2466