1 /* Copyright (c) 2018-2021 The Khronos Group Inc.
2 * Copyright (c) 2018-2021 Valve Corporation
3 * Copyright (c) 2018-2021 LunarG, Inc.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * Author: Karl Schultz <karl@lunarg.com>
18 * Author: Tony Barbour <tony@lunarg.com>
19 */
20
21 #include <climits>
22 #include <cmath>
23 #include "gpu_validation.h"
24 #include "spirv-tools/optimizer.hpp"
25 #include "spirv-tools/instrument.hpp"
26 #include "layer_chassis_dispatch.h"
27 #include "gpu_vuids.h"
28 #include "gpu_pre_draw_constants.h"
29 #include "sync_utils.h"
30 #include "buffer_state.h"
31 #include "cmd_buffer_state.h"
32 #include "render_pass_state.h"
33
34 static const VkShaderStageFlags kShaderStageAllRayTracing =
35 VK_SHADER_STAGE_ANY_HIT_BIT_NV | VK_SHADER_STAGE_CALLABLE_BIT_NV | VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV |
36 VK_SHADER_STAGE_INTERSECTION_BIT_NV | VK_SHADER_STAGE_MISS_BIT_NV | VK_SHADER_STAGE_RAYGEN_BIT_NV;
37
38 // Keep in sync with the GLSL shader below.
39 struct GpuAccelerationStructureBuildValidationBuffer {
40 uint32_t instances_to_validate;
41 uint32_t replacement_handle_bits_0;
42 uint32_t replacement_handle_bits_1;
43 uint32_t invalid_handle_found;
44 uint32_t invalid_handle_bits_0;
45 uint32_t invalid_handle_bits_1;
46 uint32_t valid_handles_count;
47 };
48
49 // This is the GLSL source for the compute shader that is used during ray tracing acceleration structure
50 // building validation which inspects instance buffers for top level acceleration structure builds and
51 // reports and replaces invalid bottom level acceleration structure handles with good bottom level
52 // acceleration structure handle so that applications can continue without undefined behavior long enough
53 // to report errors.
54 //
55 // #version 450
56 // layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
57 // struct VkGeometryInstanceNV {
58 // uint unused[14];
59 // uint handle_bits_0;
60 // uint handle_bits_1;
61 // };
62 // layout(set=0, binding=0, std430) buffer InstanceBuffer {
63 // VkGeometryInstanceNV instances[];
64 // };
65 // layout(set=0, binding=1, std430) buffer ValidationBuffer {
66 // uint instances_to_validate;
67 // uint replacement_handle_bits_0;
68 // uint replacement_handle_bits_1;
69 // uint invalid_handle_found;
70 // uint invalid_handle_bits_0;
71 // uint invalid_handle_bits_1;
72 // uint valid_handles_count;
73 // uint valid_handles[];
74 // };
75 // void main() {
76 // for (uint instance_index = 0; instance_index < instances_to_validate; instance_index++) {
77 // uint instance_handle_bits_0 = instances[instance_index].handle_bits_0;
78 // uint instance_handle_bits_1 = instances[instance_index].handle_bits_1;
79 // bool valid = false;
80 // for (uint valid_handle_index = 0; valid_handle_index < valid_handles_count; valid_handle_index++) {
81 // if (instance_handle_bits_0 == valid_handles[2*valid_handle_index+0] &&
82 // instance_handle_bits_1 == valid_handles[2*valid_handle_index+1]) {
83 // valid = true;
84 // break;
85 // }
86 // }
87 // if (!valid) {
88 // invalid_handle_found += 1;
89 // invalid_handle_bits_0 = instance_handle_bits_0;
90 // invalid_handle_bits_1 = instance_handle_bits_1;
91 // instances[instance_index].handle_bits_0 = replacement_handle_bits_0;
92 // instances[instance_index].handle_bits_1 = replacement_handle_bits_1;
93 // }
94 // }
95 // }
96 //
97 // To regenerate the spirv below:
98 // 1. Save the above GLSL source to a file called validation_shader.comp.
99 // 2. Run in terminal
100 //
101 // glslangValidator.exe -x -V validation_shader.comp -o validation_shader.comp.spv
102 //
103 // 4. Copy-paste the contents of validation_shader.comp.spv here (clang-format will fix up the alignment).
104 static const uint32_t kComputeShaderSpirv[] = {
105 0x07230203, 0x00010000, 0x00080007, 0x0000006d, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001, 0x4c534c47,
106 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0005000f, 0x00000005, 0x00000004, 0x6e69616d,
107 0x00000000, 0x00060010, 0x00000004, 0x00000011, 0x00000001, 0x00000001, 0x00000001, 0x00030003, 0x00000002, 0x000001c2,
108 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00060005, 0x00000008, 0x74736e69, 0x65636e61, 0x646e695f, 0x00007865,
109 0x00070005, 0x00000011, 0x696c6156, 0x69746164, 0x75426e6f, 0x72656666, 0x00000000, 0x00090006, 0x00000011, 0x00000000,
110 0x74736e69, 0x65636e61, 0x6f745f73, 0x6c61765f, 0x74616469, 0x00000065, 0x000a0006, 0x00000011, 0x00000001, 0x6c706572,
111 0x6d656361, 0x5f746e65, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x000a0006, 0x00000011, 0x00000002, 0x6c706572,
112 0x6d656361, 0x5f746e65, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000031, 0x00090006, 0x00000011, 0x00000003, 0x61766e69,
113 0x5f64696c, 0x646e6168, 0x665f656c, 0x646e756f, 0x00000000, 0x00090006, 0x00000011, 0x00000004, 0x61766e69, 0x5f64696c,
114 0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x00090006, 0x00000011, 0x00000005, 0x61766e69, 0x5f64696c, 0x646e6168,
115 0x625f656c, 0x5f737469, 0x00000031, 0x00080006, 0x00000011, 0x00000006, 0x696c6176, 0x61685f64, 0x656c646e, 0x6f635f73,
116 0x00746e75, 0x00070006, 0x00000011, 0x00000007, 0x696c6176, 0x61685f64, 0x656c646e, 0x00000073, 0x00030005, 0x00000013,
117 0x00000000, 0x00080005, 0x0000001b, 0x74736e69, 0x65636e61, 0x6e61685f, 0x5f656c64, 0x73746962, 0x0000305f, 0x00080005,
118 0x0000001e, 0x65476b56, 0x74656d6f, 0x6e497972, 0x6e617473, 0x564e6563, 0x00000000, 0x00050006, 0x0000001e, 0x00000000,
119 0x73756e75, 0x00006465, 0x00070006, 0x0000001e, 0x00000001, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000030, 0x00070006,
120 0x0000001e, 0x00000002, 0x646e6168, 0x625f656c, 0x5f737469, 0x00000031, 0x00060005, 0x00000020, 0x74736e49, 0x65636e61,
121 0x66667542, 0x00007265, 0x00060006, 0x00000020, 0x00000000, 0x74736e69, 0x65636e61, 0x00000073, 0x00030005, 0x00000022,
122 0x00000000, 0x00080005, 0x00000027, 0x74736e69, 0x65636e61, 0x6e61685f, 0x5f656c64, 0x73746962, 0x0000315f, 0x00040005,
123 0x0000002d, 0x696c6176, 0x00000064, 0x00070005, 0x0000002f, 0x696c6176, 0x61685f64, 0x656c646e, 0x646e695f, 0x00007865,
124 0x00040047, 0x00000010, 0x00000006, 0x00000004, 0x00050048, 0x00000011, 0x00000000, 0x00000023, 0x00000000, 0x00050048,
125 0x00000011, 0x00000001, 0x00000023, 0x00000004, 0x00050048, 0x00000011, 0x00000002, 0x00000023, 0x00000008, 0x00050048,
126 0x00000011, 0x00000003, 0x00000023, 0x0000000c, 0x00050048, 0x00000011, 0x00000004, 0x00000023, 0x00000010, 0x00050048,
127 0x00000011, 0x00000005, 0x00000023, 0x00000014, 0x00050048, 0x00000011, 0x00000006, 0x00000023, 0x00000018, 0x00050048,
128 0x00000011, 0x00000007, 0x00000023, 0x0000001c, 0x00030047, 0x00000011, 0x00000003, 0x00040047, 0x00000013, 0x00000022,
129 0x00000000, 0x00040047, 0x00000013, 0x00000021, 0x00000001, 0x00040047, 0x0000001d, 0x00000006, 0x00000004, 0x00050048,
130 0x0000001e, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000001e, 0x00000001, 0x00000023, 0x00000038, 0x00050048,
131 0x0000001e, 0x00000002, 0x00000023, 0x0000003c, 0x00040047, 0x0000001f, 0x00000006, 0x00000040, 0x00050048, 0x00000020,
132 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x00000020, 0x00000003, 0x00040047, 0x00000022, 0x00000022, 0x00000000,
133 0x00040047, 0x00000022, 0x00000021, 0x00000000, 0x00020013, 0x00000002, 0x00030021, 0x00000003, 0x00000002, 0x00040015,
134 0x00000006, 0x00000020, 0x00000000, 0x00040020, 0x00000007, 0x00000007, 0x00000006, 0x0004002b, 0x00000006, 0x00000009,
135 0x00000000, 0x0003001d, 0x00000010, 0x00000006, 0x000a001e, 0x00000011, 0x00000006, 0x00000006, 0x00000006, 0x00000006,
136 0x00000006, 0x00000006, 0x00000006, 0x00000010, 0x00040020, 0x00000012, 0x00000002, 0x00000011, 0x0004003b, 0x00000012,
137 0x00000013, 0x00000002, 0x00040015, 0x00000014, 0x00000020, 0x00000001, 0x0004002b, 0x00000014, 0x00000015, 0x00000000,
138 0x00040020, 0x00000016, 0x00000002, 0x00000006, 0x00020014, 0x00000019, 0x0004002b, 0x00000006, 0x0000001c, 0x0000000e,
139 0x0004001c, 0x0000001d, 0x00000006, 0x0000001c, 0x0005001e, 0x0000001e, 0x0000001d, 0x00000006, 0x00000006, 0x0003001d,
140 0x0000001f, 0x0000001e, 0x0003001e, 0x00000020, 0x0000001f, 0x00040020, 0x00000021, 0x00000002, 0x00000020, 0x0004003b,
141 0x00000021, 0x00000022, 0x00000002, 0x0004002b, 0x00000014, 0x00000024, 0x00000001, 0x0004002b, 0x00000014, 0x00000029,
142 0x00000002, 0x00040020, 0x0000002c, 0x00000007, 0x00000019, 0x0003002a, 0x00000019, 0x0000002e, 0x0004002b, 0x00000014,
143 0x00000036, 0x00000006, 0x0004002b, 0x00000014, 0x0000003b, 0x00000007, 0x0004002b, 0x00000006, 0x0000003c, 0x00000002,
144 0x0004002b, 0x00000006, 0x00000048, 0x00000001, 0x00030029, 0x00000019, 0x00000050, 0x0004002b, 0x00000014, 0x00000058,
145 0x00000003, 0x0004002b, 0x00000014, 0x0000005d, 0x00000004, 0x0004002b, 0x00000014, 0x00000060, 0x00000005, 0x00050036,
146 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005, 0x0004003b, 0x00000007, 0x00000008, 0x00000007,
147 0x0004003b, 0x00000007, 0x0000001b, 0x00000007, 0x0004003b, 0x00000007, 0x00000027, 0x00000007, 0x0004003b, 0x0000002c,
148 0x0000002d, 0x00000007, 0x0004003b, 0x00000007, 0x0000002f, 0x00000007, 0x0003003e, 0x00000008, 0x00000009, 0x000200f9,
149 0x0000000a, 0x000200f8, 0x0000000a, 0x000400f6, 0x0000000c, 0x0000000d, 0x00000000, 0x000200f9, 0x0000000e, 0x000200f8,
150 0x0000000e, 0x0004003d, 0x00000006, 0x0000000f, 0x00000008, 0x00050041, 0x00000016, 0x00000017, 0x00000013, 0x00000015,
151 0x0004003d, 0x00000006, 0x00000018, 0x00000017, 0x000500b0, 0x00000019, 0x0000001a, 0x0000000f, 0x00000018, 0x000400fa,
152 0x0000001a, 0x0000000b, 0x0000000c, 0x000200f8, 0x0000000b, 0x0004003d, 0x00000006, 0x00000023, 0x00000008, 0x00070041,
153 0x00000016, 0x00000025, 0x00000022, 0x00000015, 0x00000023, 0x00000024, 0x0004003d, 0x00000006, 0x00000026, 0x00000025,
154 0x0003003e, 0x0000001b, 0x00000026, 0x0004003d, 0x00000006, 0x00000028, 0x00000008, 0x00070041, 0x00000016, 0x0000002a,
155 0x00000022, 0x00000015, 0x00000028, 0x00000029, 0x0004003d, 0x00000006, 0x0000002b, 0x0000002a, 0x0003003e, 0x00000027,
156 0x0000002b, 0x0003003e, 0x0000002d, 0x0000002e, 0x0003003e, 0x0000002f, 0x00000009, 0x000200f9, 0x00000030, 0x000200f8,
157 0x00000030, 0x000400f6, 0x00000032, 0x00000033, 0x00000000, 0x000200f9, 0x00000034, 0x000200f8, 0x00000034, 0x0004003d,
158 0x00000006, 0x00000035, 0x0000002f, 0x00050041, 0x00000016, 0x00000037, 0x00000013, 0x00000036, 0x0004003d, 0x00000006,
159 0x00000038, 0x00000037, 0x000500b0, 0x00000019, 0x00000039, 0x00000035, 0x00000038, 0x000400fa, 0x00000039, 0x00000031,
160 0x00000032, 0x000200f8, 0x00000031, 0x0004003d, 0x00000006, 0x0000003a, 0x0000001b, 0x0004003d, 0x00000006, 0x0000003d,
161 0x0000002f, 0x00050084, 0x00000006, 0x0000003e, 0x0000003c, 0x0000003d, 0x00050080, 0x00000006, 0x0000003f, 0x0000003e,
162 0x00000009, 0x00060041, 0x00000016, 0x00000040, 0x00000013, 0x0000003b, 0x0000003f, 0x0004003d, 0x00000006, 0x00000041,
163 0x00000040, 0x000500aa, 0x00000019, 0x00000042, 0x0000003a, 0x00000041, 0x000300f7, 0x00000044, 0x00000000, 0x000400fa,
164 0x00000042, 0x00000043, 0x00000044, 0x000200f8, 0x00000043, 0x0004003d, 0x00000006, 0x00000045, 0x00000027, 0x0004003d,
165 0x00000006, 0x00000046, 0x0000002f, 0x00050084, 0x00000006, 0x00000047, 0x0000003c, 0x00000046, 0x00050080, 0x00000006,
166 0x00000049, 0x00000047, 0x00000048, 0x00060041, 0x00000016, 0x0000004a, 0x00000013, 0x0000003b, 0x00000049, 0x0004003d,
167 0x00000006, 0x0000004b, 0x0000004a, 0x000500aa, 0x00000019, 0x0000004c, 0x00000045, 0x0000004b, 0x000200f9, 0x00000044,
168 0x000200f8, 0x00000044, 0x000700f5, 0x00000019, 0x0000004d, 0x00000042, 0x00000031, 0x0000004c, 0x00000043, 0x000300f7,
169 0x0000004f, 0x00000000, 0x000400fa, 0x0000004d, 0x0000004e, 0x0000004f, 0x000200f8, 0x0000004e, 0x0003003e, 0x0000002d,
170 0x00000050, 0x000200f9, 0x00000032, 0x000200f8, 0x0000004f, 0x000200f9, 0x00000033, 0x000200f8, 0x00000033, 0x0004003d,
171 0x00000006, 0x00000052, 0x0000002f, 0x00050080, 0x00000006, 0x00000053, 0x00000052, 0x00000024, 0x0003003e, 0x0000002f,
172 0x00000053, 0x000200f9, 0x00000030, 0x000200f8, 0x00000032, 0x0004003d, 0x00000019, 0x00000054, 0x0000002d, 0x000400a8,
173 0x00000019, 0x00000055, 0x00000054, 0x000300f7, 0x00000057, 0x00000000, 0x000400fa, 0x00000055, 0x00000056, 0x00000057,
174 0x000200f8, 0x00000056, 0x00050041, 0x00000016, 0x00000059, 0x00000013, 0x00000058, 0x0004003d, 0x00000006, 0x0000005a,
175 0x00000059, 0x00050080, 0x00000006, 0x0000005b, 0x0000005a, 0x00000048, 0x00050041, 0x00000016, 0x0000005c, 0x00000013,
176 0x00000058, 0x0003003e, 0x0000005c, 0x0000005b, 0x0004003d, 0x00000006, 0x0000005e, 0x0000001b, 0x00050041, 0x00000016,
177 0x0000005f, 0x00000013, 0x0000005d, 0x0003003e, 0x0000005f, 0x0000005e, 0x0004003d, 0x00000006, 0x00000061, 0x00000027,
178 0x00050041, 0x00000016, 0x00000062, 0x00000013, 0x00000060, 0x0003003e, 0x00000062, 0x00000061, 0x0004003d, 0x00000006,
179 0x00000063, 0x00000008, 0x00050041, 0x00000016, 0x00000064, 0x00000013, 0x00000024, 0x0004003d, 0x00000006, 0x00000065,
180 0x00000064, 0x00070041, 0x00000016, 0x00000066, 0x00000022, 0x00000015, 0x00000063, 0x00000024, 0x0003003e, 0x00000066,
181 0x00000065, 0x0004003d, 0x00000006, 0x00000067, 0x00000008, 0x00050041, 0x00000016, 0x00000068, 0x00000013, 0x00000029,
182 0x0004003d, 0x00000006, 0x00000069, 0x00000068, 0x00070041, 0x00000016, 0x0000006a, 0x00000022, 0x00000015, 0x00000067,
183 0x00000029, 0x0003003e, 0x0000006a, 0x00000069, 0x000200f9, 0x00000057, 0x000200f8, 0x00000057, 0x000200f9, 0x0000000d,
184 0x000200f8, 0x0000000d, 0x0004003d, 0x00000006, 0x0000006b, 0x00000008, 0x00050080, 0x00000006, 0x0000006c, 0x0000006b,
185 0x00000024, 0x0003003e, 0x00000008, 0x0000006c, 0x000200f9, 0x0000000a, 0x000200f8, 0x0000000c, 0x000100fd, 0x00010038};
186
187 // Convenience function for reporting problems with setting up GPU Validation.
188 template <typename T>
ReportSetupProblem(T object,const char * const specific_message) const189 void GpuAssisted::ReportSetupProblem(T object, const char *const specific_message) const {
190 LogError(object, "UNASSIGNED-GPU-Assisted Validation Error. ", "Detail: (%s)", specific_message);
191 }
192
CheckForDescriptorIndexing(DeviceFeatures enabled_features) const193 bool GpuAssisted::CheckForDescriptorIndexing(DeviceFeatures enabled_features) const {
194 bool result =
195 (IsExtEnabled(device_extensions.vk_ext_descriptor_indexing) &&
196 (enabled_features.core12.descriptorIndexing || enabled_features.core12.shaderInputAttachmentArrayDynamicIndexing ||
197 enabled_features.core12.shaderUniformTexelBufferArrayDynamicIndexing ||
198 enabled_features.core12.shaderStorageTexelBufferArrayDynamicIndexing ||
199 enabled_features.core12.shaderUniformBufferArrayNonUniformIndexing ||
200 enabled_features.core12.shaderSampledImageArrayNonUniformIndexing ||
201 enabled_features.core12.shaderStorageBufferArrayNonUniformIndexing ||
202 enabled_features.core12.shaderStorageImageArrayNonUniformIndexing ||
203 enabled_features.core12.shaderInputAttachmentArrayNonUniformIndexing ||
204 enabled_features.core12.shaderUniformTexelBufferArrayNonUniformIndexing ||
205 enabled_features.core12.shaderStorageTexelBufferArrayNonUniformIndexing ||
206 enabled_features.core12.descriptorBindingUniformBufferUpdateAfterBind ||
207 enabled_features.core12.descriptorBindingSampledImageUpdateAfterBind ||
208 enabled_features.core12.descriptorBindingStorageImageUpdateAfterBind ||
209 enabled_features.core12.descriptorBindingStorageBufferUpdateAfterBind ||
210 enabled_features.core12.descriptorBindingUniformTexelBufferUpdateAfterBind ||
211 enabled_features.core12.descriptorBindingStorageTexelBufferUpdateAfterBind ||
212 enabled_features.core12.descriptorBindingUpdateUnusedWhilePending ||
213 enabled_features.core12.descriptorBindingPartiallyBound ||
214 enabled_features.core12.descriptorBindingVariableDescriptorCount || enabled_features.core12.runtimeDescriptorArray));
215 return result;
216 }
217
PreCallRecordCreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer,void * cb_state_data)218 void GpuAssisted::PreCallRecordCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
219 const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, void *cb_state_data) {
220 // Ray tracing acceleration structure instance buffers also need the storage buffer usage as
221 // acceleration structure build validation will find and replace invalid acceleration structure
222 // handles inside of a compute shader.
223 create_buffer_api_state *cb_state = reinterpret_cast<create_buffer_api_state *>(cb_state_data);
224 if (cb_state && cb_state->modified_create_info.usage & VK_BUFFER_USAGE_RAY_TRACING_BIT_NV) {
225 cb_state->modified_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
226 }
227
228 // Validating DrawIndirectCount countBuffer will require validation shader to bind the count buffer as a storage buffer
229 if (validate_draw_indirect && cb_state && cb_state->modified_create_info.usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) {
230 cb_state->modified_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
231 }
232 ValidationStateTracker::PreCallRecordCreateBuffer(device, pCreateInfo, pAllocator, pBuffer, cb_state_data);
233 }
234
PostCallRecordCreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer,VkResult result)235 void GpuAssisted::PostCallRecordCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
236 const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, VkResult result) {
237 ValidationStateTracker::PostCallRecordCreateBuffer(device, pCreateInfo, pAllocator, pBuffer, result);
238 if (pCreateInfo) {
239 const auto *opaque_capture_address = LvlFindInChain<VkBufferOpaqueCaptureAddressCreateInfo>(pCreateInfo->pNext);
240 if (opaque_capture_address) {
241 // Validate against the size requested when the buffer was created
242 buffer_map[opaque_capture_address->opaqueCaptureAddress] = pCreateInfo->size;
243 }
244 }
245 }
246
247 // Turn on necessary device features.
PreCallRecordCreateDevice(VkPhysicalDevice gpu,const VkDeviceCreateInfo * create_info,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice,void * modified_create_info)248 void GpuAssisted::PreCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo *create_info,
249 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice,
250 void *modified_create_info) {
251 DispatchGetPhysicalDeviceFeatures(gpu, &supported_features);
252 VkPhysicalDeviceFeatures features = {};
253 features.vertexPipelineStoresAndAtomics = true;
254 features.fragmentStoresAndAtomics = true;
255 features.shaderInt64 = true;
256 UtilPreCallRecordCreateDevice(gpu, reinterpret_cast<safe_VkDeviceCreateInfo *>(modified_create_info), supported_features,
257 features);
258 ValidationStateTracker::PreCallRecordCreateDevice(gpu, create_info, pAllocator, pDevice, modified_create_info);
259 }
260 // Perform initializations that can be done at Create Device time.
PostCallRecordCreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice,VkResult result)261 void GpuAssisted::PostCallRecordCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
262 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice, VkResult result) {
263 // The state tracker sets up the device state
264 ValidationStateTracker::PostCallRecordCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice, result);
265
266 ValidationObject *device_object = GetLayerDataPtr(get_dispatch_key(*pDevice), layer_data_map);
267 ValidationObject *validation_data = GetValidationObject(device_object->object_dispatch, this->container_type);
268 GpuAssisted *device_gpu_assisted = static_cast<GpuAssisted *>(validation_data);
269
270 if (device_gpu_assisted->enabled_features.core.robustBufferAccess ||
271 device_gpu_assisted->enabled_features.robustness2_features.robustBufferAccess2) {
272 device_gpu_assisted->buffer_oob_enabled = false;
273 } else {
274 std::string bufferoob_string = getLayerOption("khronos_validation.gpuav_buffer_oob");
275 transform(bufferoob_string.begin(), bufferoob_string.end(), bufferoob_string.begin(), ::tolower);
276 device_gpu_assisted->buffer_oob_enabled = !bufferoob_string.empty() ? !bufferoob_string.compare("true") : true;
277 }
278 std::string descriptor_indexing_string = getLayerOption("khronos_validation.gpuav_descriptor_indexing");
279 transform(descriptor_indexing_string.begin(), descriptor_indexing_string.end(), descriptor_indexing_string.begin(), ::tolower);
280 bool validate_descriptor_indexing = !descriptor_indexing_string.empty() ? !descriptor_indexing_string.compare("true") : true;
281
282 std::string draw_indirect_string = getLayerOption("khronos_validation.validate_draw_indirect");
283 transform(draw_indirect_string.begin(), draw_indirect_string.end(), draw_indirect_string.begin(), ::tolower);
284 device_gpu_assisted->validate_draw_indirect = !draw_indirect_string.empty() ? !draw_indirect_string.compare("true") : true;
285
286 if (device_gpu_assisted->phys_dev_props.apiVersion < VK_API_VERSION_1_1) {
287 ReportSetupProblem(device, "GPU-Assisted validation requires Vulkan 1.1 or later. GPU-Assisted Validation disabled.");
288 device_gpu_assisted->aborted = true;
289 return;
290 }
291
292 if (!supported_features.fragmentStoresAndAtomics || !supported_features.vertexPipelineStoresAndAtomics) {
293 ReportSetupProblem(device,
294 "GPU-Assisted validation requires fragmentStoresAndAtomics and vertexPipelineStoresAndAtomics. "
295 "GPU-Assisted Validation disabled.");
296 device_gpu_assisted->aborted = true;
297 return;
298 }
299
300 if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
301 IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
302 !supported_features.shaderInt64) {
303 LogWarning(device, "UNASSIGNED-GPU-Assisted Validation Warning",
304 "shaderInt64 feature is not available. No buffer device address checking will be attempted");
305 }
306 device_gpu_assisted->shaderInt64 = supported_features.shaderInt64;
307 device_gpu_assisted->physicalDevice = physicalDevice;
308 device_gpu_assisted->device = *pDevice;
309 device_gpu_assisted->output_buffer_size = sizeof(uint32_t) * (spvtools::kInstMaxOutCnt + 1);
310 if (validate_descriptor_indexing) {
311 device_gpu_assisted->descriptor_indexing = CheckForDescriptorIndexing(device_gpu_assisted->enabled_features);
312 }
313 std::vector<VkDescriptorSetLayoutBinding> bindings;
314 VkDescriptorSetLayoutBinding binding = {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
315 VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT |
316 VK_SHADER_STAGE_MESH_BIT_NV | VK_SHADER_STAGE_TASK_BIT_NV |
317 kShaderStageAllRayTracing,
318 NULL};
319 bindings.push_back(binding);
320 for (auto i = 1; i < 3; i++) {
321 binding.binding = i;
322 bindings.push_back(binding);
323 }
324 UtilPostCallRecordCreateDevice(pCreateInfo, bindings, device_gpu_assisted, device_gpu_assisted->phys_dev_props);
325 CreateAccelerationStructureBuildValidationState(device_gpu_assisted);
326 }
327
PostCallRecordGetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)328 void GpuAssisted::PostCallRecordGetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
329 VkDeviceAddress address) {
330 auto buffer_state = Get<BUFFER_STATE>(pInfo->buffer);
331 // Validate against the size requested when the buffer was created
332 if (buffer_state) {
333 buffer_state->deviceAddress = address;
334 buffer_map[address] = buffer_state->createInfo.size;
335 }
336 ValidationStateTracker::PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
337 }
338
PostCallRecordGetBufferDeviceAddressEXT(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)339 void GpuAssisted::PostCallRecordGetBufferDeviceAddressEXT(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
340 VkDeviceAddress address) {
341 PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
342 }
343
PostCallRecordGetBufferDeviceAddressKHR(VkDevice device,const VkBufferDeviceAddressInfo * pInfo,VkDeviceAddress address)344 void GpuAssisted::PostCallRecordGetBufferDeviceAddressKHR(VkDevice device, const VkBufferDeviceAddressInfo *pInfo,
345 VkDeviceAddress address) {
346 PostCallRecordGetBufferDeviceAddress(device, pInfo, address);
347 }
348
PreCallRecordDestroyBuffer(VkDevice device,VkBuffer buffer,const VkAllocationCallbacks * pAllocator)349 void GpuAssisted::PreCallRecordDestroyBuffer(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator) {
350 auto buffer_state = Get<BUFFER_STATE>(buffer);
351 if (buffer_state) buffer_map.erase(buffer_state->deviceAddress);
352 ValidationStateTracker::PreCallRecordDestroyBuffer(device, buffer, pAllocator);
353 }
354
355 // Clean up device-related resources
PreCallRecordDestroyDevice(VkDevice device,const VkAllocationCallbacks * pAllocator)356 void GpuAssisted::PreCallRecordDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) {
357 DestroyAccelerationStructureBuildValidationState();
358 UtilPreCallRecordDestroyDevice(this);
359 ValidationStateTracker::PreCallRecordDestroyDevice(device, pAllocator);
360 if (pre_draw_validation_state.globals_created) {
361 DispatchDestroyShaderModule(device, pre_draw_validation_state.validation_shader_module, nullptr);
362 DispatchDestroyDescriptorSetLayout(device, pre_draw_validation_state.validation_ds_layout, nullptr);
363 DispatchDestroyPipelineLayout(device, pre_draw_validation_state.validation_pipeline_layout, nullptr);
364 for (auto it = pre_draw_validation_state.renderpass_to_pipeline.begin();
365 it != pre_draw_validation_state.renderpass_to_pipeline.end(); ++it) {
366 DispatchDestroyPipeline(device, it->second, nullptr);
367 }
368 pre_draw_validation_state.renderpass_to_pipeline.clear();
369 pre_draw_validation_state.globals_created = false;
370 }
371 // State Tracker can end up making vma calls through callbacks - don't destroy allocator until ST is done
372 if (vmaAllocator) {
373 vmaDestroyAllocator(vmaAllocator);
374 }
375 desc_set_manager.reset();
376 }
377
CreateAccelerationStructureBuildValidationState(GpuAssisted * device_gpuav)378 void GpuAssisted::CreateAccelerationStructureBuildValidationState(GpuAssisted *device_gpuav) {
379 if (device_gpuav->aborted) {
380 return;
381 }
382
383 auto &as_validation_state = device_gpuav->acceleration_structure_validation_state;
384 if (as_validation_state.initialized) {
385 return;
386 }
387
388 if (!IsExtEnabled(device_extensions.vk_nv_ray_tracing)) {
389 return;
390 }
391
392 // Outline:
393 // - Create valid bottom level acceleration structure which acts as replacement
394 // - Create and load vertex buffer
395 // - Create and load index buffer
396 // - Create, allocate memory for, and bind memory for acceleration structure
397 // - Query acceleration structure handle
398 // - Create command pool and command buffer
399 // - Record build acceleration structure command
400 // - Submit command buffer and wait for completion
401 // - Cleanup
402 // - Create compute pipeline for validating instance buffers
403 // - Create descriptor set layout
404 // - Create pipeline layout
405 // - Create pipeline
406 // - Cleanup
407
408 VkResult result = VK_SUCCESS;
409
410 VkBuffer vbo = VK_NULL_HANDLE;
411 VmaAllocation vbo_allocation = VK_NULL_HANDLE;
412 if (result == VK_SUCCESS) {
413 auto vbo_ci = LvlInitStruct<VkBufferCreateInfo>();
414 vbo_ci.size = sizeof(float) * 9;
415 vbo_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
416
417 VmaAllocationCreateInfo vbo_ai = {};
418 vbo_ai.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
419 vbo_ai.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
420
421 result = vmaCreateBuffer(device_gpuav->vmaAllocator, &vbo_ci, &vbo_ai, &vbo, &vbo_allocation, nullptr);
422 if (result != VK_SUCCESS) {
423 ReportSetupProblem(device, "Failed to create vertex buffer for acceleration structure build validation.");
424 }
425 }
426
427 if (result == VK_SUCCESS) {
428 uint8_t *mapped_vbo_buffer = nullptr;
429 result = vmaMapMemory(device_gpuav->vmaAllocator, vbo_allocation, reinterpret_cast<void **>(&mapped_vbo_buffer));
430 if (result != VK_SUCCESS) {
431 ReportSetupProblem(device, "Failed to map vertex buffer for acceleration structure build validation.");
432 } else {
433 const std::vector<float> vertices = {1.0f, 0.0f, 0.0f, 0.5f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f};
434 std::memcpy(mapped_vbo_buffer, (uint8_t *)vertices.data(), sizeof(float) * vertices.size());
435 vmaUnmapMemory(device_gpuav->vmaAllocator, vbo_allocation);
436 }
437 }
438
439 VkBuffer ibo = VK_NULL_HANDLE;
440 VmaAllocation ibo_allocation = VK_NULL_HANDLE;
441 if (result == VK_SUCCESS) {
442 auto ibo_ci = LvlInitStruct<VkBufferCreateInfo>();
443 ibo_ci.size = sizeof(uint32_t) * 3;
444 ibo_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
445
446 VmaAllocationCreateInfo ibo_ai = {};
447 ibo_ai.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
448 ibo_ai.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
449
450 result = vmaCreateBuffer(device_gpuav->vmaAllocator, &ibo_ci, &ibo_ai, &ibo, &ibo_allocation, nullptr);
451 if (result != VK_SUCCESS) {
452 ReportSetupProblem(device, "Failed to create index buffer for acceleration structure build validation.");
453 }
454 }
455
456 if (result == VK_SUCCESS) {
457 uint8_t *mapped_ibo_buffer = nullptr;
458 result = vmaMapMemory(device_gpuav->vmaAllocator, ibo_allocation, reinterpret_cast<void **>(&mapped_ibo_buffer));
459 if (result != VK_SUCCESS) {
460 ReportSetupProblem(device, "Failed to map index buffer for acceleration structure build validation.");
461 } else {
462 const std::vector<uint32_t> indicies = {0, 1, 2};
463 std::memcpy(mapped_ibo_buffer, (uint8_t *)indicies.data(), sizeof(uint32_t) * indicies.size());
464 vmaUnmapMemory(device_gpuav->vmaAllocator, ibo_allocation);
465 }
466 }
467
468 auto geometry = LvlInitStruct<VkGeometryNV>();
469 geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV;
470 geometry.geometry.triangles = LvlInitStruct<VkGeometryTrianglesNV>();
471 geometry.geometry.triangles.vertexData = vbo;
472 geometry.geometry.triangles.vertexOffset = 0;
473 geometry.geometry.triangles.vertexCount = 3;
474 geometry.geometry.triangles.vertexStride = 12;
475 geometry.geometry.triangles.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
476 geometry.geometry.triangles.indexData = ibo;
477 geometry.geometry.triangles.indexOffset = 0;
478 geometry.geometry.triangles.indexCount = 3;
479 geometry.geometry.triangles.indexType = VK_INDEX_TYPE_UINT32;
480 geometry.geometry.triangles.transformData = VK_NULL_HANDLE;
481 geometry.geometry.triangles.transformOffset = 0;
482 geometry.geometry.aabbs = LvlInitStruct<VkGeometryAABBNV>();
483
484 auto as_ci = LvlInitStruct<VkAccelerationStructureCreateInfoNV>();
485 as_ci.info = LvlInitStruct<VkAccelerationStructureInfoNV>();
486 as_ci.info.instanceCount = 0;
487 as_ci.info.geometryCount = 1;
488 as_ci.info.pGeometries = &geometry;
489 if (result == VK_SUCCESS) {
490 result = DispatchCreateAccelerationStructureNV(device_gpuav->device, &as_ci, nullptr, &as_validation_state.replacement_as);
491 if (result != VK_SUCCESS) {
492 ReportSetupProblem(device_gpuav->device,
493 "Failed to create acceleration structure for acceleration structure build validation.");
494 }
495 }
496
497 VkMemoryRequirements2 as_mem_requirements = {};
498 if (result == VK_SUCCESS) {
499 auto as_mem_requirements_info = LvlInitStruct<VkAccelerationStructureMemoryRequirementsInfoNV>();
500 as_mem_requirements_info.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV;
501 as_mem_requirements_info.accelerationStructure = as_validation_state.replacement_as;
502
503 DispatchGetAccelerationStructureMemoryRequirementsNV(device_gpuav->device, &as_mem_requirements_info, &as_mem_requirements);
504 }
505
506 VmaAllocationInfo as_memory_ai = {};
507 if (result == VK_SUCCESS) {
508 VmaAllocationCreateInfo as_memory_aci = {};
509 as_memory_aci.usage = VMA_MEMORY_USAGE_GPU_ONLY;
510
511 result = vmaAllocateMemory(device_gpuav->vmaAllocator, &as_mem_requirements.memoryRequirements, &as_memory_aci,
512 &as_validation_state.replacement_as_allocation, &as_memory_ai);
513 if (result != VK_SUCCESS) {
514 ReportSetupProblem(device_gpuav->device,
515 "Failed to alloc acceleration structure memory for acceleration structure build validation.");
516 }
517 }
518
519 if (result == VK_SUCCESS) {
520 auto as_bind_info = LvlInitStruct<VkBindAccelerationStructureMemoryInfoNV>();
521 as_bind_info.accelerationStructure = as_validation_state.replacement_as;
522 as_bind_info.memory = as_memory_ai.deviceMemory;
523 as_bind_info.memoryOffset = as_memory_ai.offset;
524
525 result = DispatchBindAccelerationStructureMemoryNV(device_gpuav->device, 1, &as_bind_info);
526 if (result != VK_SUCCESS) {
527 ReportSetupProblem(device_gpuav->device,
528 "Failed to bind acceleration structure memory for acceleration structure build validation.");
529 }
530 }
531
532 if (result == VK_SUCCESS) {
533 result = DispatchGetAccelerationStructureHandleNV(device_gpuav->device, as_validation_state.replacement_as,
534 sizeof(uint64_t), &as_validation_state.replacement_as_handle);
535 if (result != VK_SUCCESS) {
536 ReportSetupProblem(device_gpuav->device,
537 "Failed to get acceleration structure handle for acceleration structure build validation.");
538 }
539 }
540
541 VkMemoryRequirements2 scratch_mem_requirements = {};
542 if (result == VK_SUCCESS) {
543 auto scratch_mem_requirements_info = LvlInitStruct<VkAccelerationStructureMemoryRequirementsInfoNV>();
544 scratch_mem_requirements_info.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
545 scratch_mem_requirements_info.accelerationStructure = as_validation_state.replacement_as;
546
547 DispatchGetAccelerationStructureMemoryRequirementsNV(device_gpuav->device, &scratch_mem_requirements_info,
548 &scratch_mem_requirements);
549 }
550
551 VkBuffer scratch = VK_NULL_HANDLE;
552 VmaAllocation scratch_allocation = {};
553 if (result == VK_SUCCESS) {
554 auto scratch_ci = LvlInitStruct<VkBufferCreateInfo>();
555 scratch_ci.size = scratch_mem_requirements.memoryRequirements.size;
556 scratch_ci.usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV;
557 VmaAllocationCreateInfo scratch_aci = {};
558 scratch_aci.usage = VMA_MEMORY_USAGE_GPU_ONLY;
559
560 result = vmaCreateBuffer(device_gpuav->vmaAllocator, &scratch_ci, &scratch_aci, &scratch, &scratch_allocation, nullptr);
561 if (result != VK_SUCCESS) {
562 ReportSetupProblem(device_gpuav->device,
563 "Failed to create scratch buffer for acceleration structure build validation.");
564 }
565 }
566
567 VkCommandPool command_pool = VK_NULL_HANDLE;
568 if (result == VK_SUCCESS) {
569 auto command_pool_ci = LvlInitStruct<VkCommandPoolCreateInfo>();
570 command_pool_ci.queueFamilyIndex = 0;
571
572 result = DispatchCreateCommandPool(device_gpuav->device, &command_pool_ci, nullptr, &command_pool);
573 if (result != VK_SUCCESS) {
574 ReportSetupProblem(device_gpuav->device, "Failed to create command pool for acceleration structure build validation.");
575 }
576 }
577
578 VkCommandBuffer command_buffer = VK_NULL_HANDLE;
579
580 if (result == VK_SUCCESS) {
581 auto command_buffer_ai = LvlInitStruct<VkCommandBufferAllocateInfo>();
582 command_buffer_ai.commandPool = command_pool;
583 command_buffer_ai.commandBufferCount = 1;
584 command_buffer_ai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
585
586 result = DispatchAllocateCommandBuffers(device_gpuav->device, &command_buffer_ai, &command_buffer);
587 if (result != VK_SUCCESS) {
588 ReportSetupProblem(device_gpuav->device,
589 "Failed to create command buffer for acceleration structure build validation.");
590 }
591
592 // Hook up command buffer dispatch
593 device_gpuav->vkSetDeviceLoaderData(device_gpuav->device, command_buffer);
594 }
595
596 if (result == VK_SUCCESS) {
597 auto command_buffer_bi = LvlInitStruct<VkCommandBufferBeginInfo>();
598
599 result = DispatchBeginCommandBuffer(command_buffer, &command_buffer_bi);
600 if (result != VK_SUCCESS) {
601 ReportSetupProblem(device_gpuav->device, "Failed to begin command buffer for acceleration structure build validation.");
602 }
603 }
604
605 if (result == VK_SUCCESS) {
606 DispatchCmdBuildAccelerationStructureNV(command_buffer, &as_ci.info, VK_NULL_HANDLE, 0, VK_FALSE,
607 as_validation_state.replacement_as, VK_NULL_HANDLE, scratch, 0);
608 DispatchEndCommandBuffer(command_buffer);
609 }
610
611 VkQueue queue = VK_NULL_HANDLE;
612 if (result == VK_SUCCESS) {
613 DispatchGetDeviceQueue(device_gpuav->device, 0, 0, &queue);
614
615 // Hook up queue dispatch
616 device_gpuav->vkSetDeviceLoaderData(device_gpuav->device, queue);
617
618 auto submit_info = LvlInitStruct<VkSubmitInfo>();
619 submit_info.commandBufferCount = 1;
620 submit_info.pCommandBuffers = &command_buffer;
621 result = DispatchQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
622 if (result != VK_SUCCESS) {
623 ReportSetupProblem(device_gpuav->device,
624 "Failed to submit command buffer for acceleration structure build validation.");
625 }
626 }
627
628 if (result == VK_SUCCESS) {
629 result = DispatchQueueWaitIdle(queue);
630 if (result != VK_SUCCESS) {
631 ReportSetupProblem(device_gpuav->device, "Failed to wait for queue idle for acceleration structure build validation.");
632 }
633 }
634
635 if (vbo != VK_NULL_HANDLE) {
636 vmaDestroyBuffer(device_gpuav->vmaAllocator, vbo, vbo_allocation);
637 }
638 if (ibo != VK_NULL_HANDLE) {
639 vmaDestroyBuffer(device_gpuav->vmaAllocator, ibo, ibo_allocation);
640 }
641 if (scratch != VK_NULL_HANDLE) {
642 vmaDestroyBuffer(device_gpuav->vmaAllocator, scratch, scratch_allocation);
643 }
644 if (command_pool != VK_NULL_HANDLE) {
645 DispatchDestroyCommandPool(device_gpuav->device, command_pool, nullptr);
646 }
647
648 if (device_gpuav->debug_desc_layout == VK_NULL_HANDLE) {
649 ReportSetupProblem(device_gpuav->device,
650 "Failed to find descriptor set layout for acceleration structure build validation.");
651 result = VK_INCOMPLETE;
652 }
653
654 if (result == VK_SUCCESS) {
655 auto pipeline_layout_ci = LvlInitStruct<VkPipelineLayoutCreateInfo>();
656 pipeline_layout_ci.setLayoutCount = 1;
657 pipeline_layout_ci.pSetLayouts = &device_gpuav->debug_desc_layout;
658 result = DispatchCreatePipelineLayout(device_gpuav->device, &pipeline_layout_ci, 0, &as_validation_state.pipeline_layout);
659 if (result != VK_SUCCESS) {
660 ReportSetupProblem(device_gpuav->device,
661 "Failed to create pipeline layout for acceleration structure build validation.");
662 }
663 }
664
665 VkShaderModule shader_module = VK_NULL_HANDLE;
666 if (result == VK_SUCCESS) {
667 auto shader_module_ci = LvlInitStruct<VkShaderModuleCreateInfo>();
668 shader_module_ci.codeSize = sizeof(kComputeShaderSpirv);
669 shader_module_ci.pCode = (uint32_t *)kComputeShaderSpirv;
670
671 result = DispatchCreateShaderModule(device_gpuav->device, &shader_module_ci, nullptr, &shader_module);
672 if (result != VK_SUCCESS) {
673 ReportSetupProblem(device_gpuav->device,
674 "Failed to create compute shader module for acceleration structure build validation.");
675 }
676 }
677
678 if (result == VK_SUCCESS) {
679 auto pipeline_stage_ci = LvlInitStruct<VkPipelineShaderStageCreateInfo>();
680 pipeline_stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
681 pipeline_stage_ci.module = shader_module;
682 pipeline_stage_ci.pName = "main";
683
684 auto pipeline_ci = LvlInitStruct<VkComputePipelineCreateInfo>();
685 pipeline_ci.stage = pipeline_stage_ci;
686 pipeline_ci.layout = as_validation_state.pipeline_layout;
687
688 result = DispatchCreateComputePipelines(device_gpuav->device, VK_NULL_HANDLE, 1, &pipeline_ci, nullptr,
689 &as_validation_state.pipeline);
690 if (result != VK_SUCCESS) {
691 ReportSetupProblem(device_gpuav->device,
692 "Failed to create compute pipeline for acceleration structure build validation.");
693 }
694 }
695
696 if (shader_module != VK_NULL_HANDLE) {
697 DispatchDestroyShaderModule(device_gpuav->device, shader_module, nullptr);
698 }
699
700 if (result == VK_SUCCESS) {
701 as_validation_state.initialized = true;
702 LogInfo(device_gpuav->device, "UNASSIGNED-GPU-Assisted Validation.",
703 "Acceleration Structure Building GPU Validation Enabled.");
704 } else {
705 device_gpuav->aborted = true;
706 }
707 }
708
DestroyAccelerationStructureBuildValidationState()709 void GpuAssisted::DestroyAccelerationStructureBuildValidationState() {
710 auto &as_validation_state = acceleration_structure_validation_state;
711 if (as_validation_state.pipeline != VK_NULL_HANDLE) {
712 DispatchDestroyPipeline(device, as_validation_state.pipeline, nullptr);
713 }
714 if (as_validation_state.pipeline_layout != VK_NULL_HANDLE) {
715 DispatchDestroyPipelineLayout(device, as_validation_state.pipeline_layout, nullptr);
716 }
717 if (as_validation_state.replacement_as != VK_NULL_HANDLE) {
718 DispatchDestroyAccelerationStructureNV(device, as_validation_state.replacement_as, nullptr);
719 }
720 if (as_validation_state.replacement_as_allocation != VK_NULL_HANDLE) {
721 vmaFreeMemory(vmaAllocator, as_validation_state.replacement_as_allocation);
722 }
723 }
724
725 struct GPUAV_RESTORABLE_PIPELINE_STATE {
726 VkPipelineBindPoint pipeline_bind_point = VK_PIPELINE_BIND_POINT_MAX_ENUM;
727 VkPipeline pipeline = VK_NULL_HANDLE;
728 VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
729 std::vector<VkDescriptorSet> descriptor_sets;
730 std::vector<std::vector<uint32_t>> dynamic_offsets;
731 uint32_t push_descriptor_set_index = 0;
732 std::vector<safe_VkWriteDescriptorSet> push_descriptor_set_writes;
733 std::vector<uint8_t> push_constants_data;
734 PushConstantRangesId push_constants_ranges;
735
CreateGPUAV_RESTORABLE_PIPELINE_STATE736 void Create(CMD_BUFFER_STATE *cb_state, VkPipelineBindPoint bind_point) {
737 pipeline_bind_point = bind_point;
738 const auto lv_bind_point = ConvertToLvlBindPoint(bind_point);
739
740 LAST_BOUND_STATE &last_bound = cb_state->lastBound[lv_bind_point];
741 if (last_bound.pipeline_state) {
742 pipeline = last_bound.pipeline_state->pipeline();
743 pipeline_layout = last_bound.pipeline_layout;
744 descriptor_sets.reserve(last_bound.per_set.size());
745 for (std::size_t i = 0; i < last_bound.per_set.size(); i++) {
746 const auto *bound_descriptor_set = last_bound.per_set[i].bound_descriptor_set;
747 if (bound_descriptor_set) {
748 descriptor_sets.push_back(bound_descriptor_set->GetSet());
749 if (bound_descriptor_set->IsPushDescriptor()) {
750 push_descriptor_set_index = static_cast<uint32_t>(i);
751 }
752 dynamic_offsets.push_back(last_bound.per_set[i].dynamicOffsets);
753 }
754 }
755
756 if (last_bound.push_descriptor_set) {
757 push_descriptor_set_writes = last_bound.push_descriptor_set->GetWrites();
758 }
759 if (last_bound.pipeline_state->pipeline_layout->push_constant_ranges == cb_state->push_constant_data_ranges) {
760 push_constants_data = cb_state->push_constant_data;
761 push_constants_ranges = last_bound.pipeline_state->pipeline_layout->push_constant_ranges;
762 }
763 }
764 }
765
RestoreGPUAV_RESTORABLE_PIPELINE_STATE766 void Restore(VkCommandBuffer command_buffer) const {
767 if (pipeline != VK_NULL_HANDLE) {
768 DispatchCmdBindPipeline(command_buffer, pipeline_bind_point, pipeline);
769 if (!descriptor_sets.empty()) {
770 for (std::size_t i = 0; i < descriptor_sets.size(); i++) {
771 VkDescriptorSet descriptor_set = descriptor_sets[i];
772 if (descriptor_set != VK_NULL_HANDLE) {
773 DispatchCmdBindDescriptorSets(command_buffer, pipeline_bind_point, pipeline_layout,
774 static_cast<uint32_t>(i), 1, &descriptor_set,
775 static_cast<uint32_t>(dynamic_offsets[i].size()), dynamic_offsets[i].data());
776 }
777 }
778 }
779 if (!push_descriptor_set_writes.empty()) {
780 DispatchCmdPushDescriptorSetKHR(command_buffer, pipeline_bind_point, pipeline_layout, push_descriptor_set_index,
781 static_cast<uint32_t>(push_descriptor_set_writes.size()),
782 reinterpret_cast<const VkWriteDescriptorSet *>(push_descriptor_set_writes.data()));
783 }
784 if (!push_constants_data.empty()) {
785 for (const auto &push_constant_range : *push_constants_ranges) {
786 if (push_constant_range.size == 0) continue;
787 DispatchCmdPushConstants(command_buffer, pipeline_layout, push_constant_range.stageFlags,
788 push_constant_range.offset, push_constant_range.size, push_constants_data.data());
789 }
790 }
791 }
792 }
793 };
794
PreCallRecordCmdBuildAccelerationStructureNV(VkCommandBuffer commandBuffer,const VkAccelerationStructureInfoNV * pInfo,VkBuffer instanceData,VkDeviceSize instanceOffset,VkBool32 update,VkAccelerationStructureNV dst,VkAccelerationStructureNV src,VkBuffer scratch,VkDeviceSize scratchOffset)795 void GpuAssisted::PreCallRecordCmdBuildAccelerationStructureNV(VkCommandBuffer commandBuffer,
796 const VkAccelerationStructureInfoNV *pInfo, VkBuffer instanceData,
797 VkDeviceSize instanceOffset, VkBool32 update,
798 VkAccelerationStructureNV dst, VkAccelerationStructureNV src,
799 VkBuffer scratch, VkDeviceSize scratchOffset) {
800 ValidationStateTracker::PreCallRecordCmdBuildAccelerationStructureNV(commandBuffer, pInfo, instanceData, instanceOffset, update,
801 dst, src, scratch, scratchOffset);
802 if (pInfo == nullptr || pInfo->type != VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV) {
803 return;
804 }
805
806 auto &as_validation_state = acceleration_structure_validation_state;
807 if (!as_validation_state.initialized) {
808 return;
809 }
810
811 // Empty acceleration structure is valid according to the spec.
812 if (pInfo->instanceCount == 0 || instanceData == VK_NULL_HANDLE) {
813 return;
814 }
815
816 auto cb_state = GetCBState(commandBuffer);
817 assert(cb_state != nullptr);
818
819 std::vector<uint64_t> current_valid_handles;
820 ForEach<ACCELERATION_STRUCTURE_STATE>([¤t_valid_handles](const ACCELERATION_STRUCTURE_STATE &as_state) {
821 if (as_state.built && as_state.create_infoNV.info.type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV) {
822 current_valid_handles.push_back(as_state.opaque_handle);
823 }
824 });
825
826 GpuAssistedAccelerationStructureBuildValidationBufferInfo as_validation_buffer_info = {};
827 as_validation_buffer_info.acceleration_structure = dst;
828
829 const VkDeviceSize validation_buffer_size =
830 // One uint for number of instances to validate
831 4 +
832 // Two uint for the replacement acceleration structure handle
833 8 +
834 // One uint for number of invalid handles found
835 4 +
836 // Two uint for the first invalid handle found
837 8 +
838 // One uint for the number of current valid handles
839 4 +
840 // Two uint for each current valid handle
841 (8 * current_valid_handles.size());
842
843 auto validation_buffer_create_info = LvlInitStruct<VkBufferCreateInfo>();
844 validation_buffer_create_info.size = validation_buffer_size;
845 validation_buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
846
847 VmaAllocationCreateInfo validation_buffer_alloc_info = {};
848 validation_buffer_alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
849
850 VkResult result = vmaCreateBuffer(vmaAllocator, &validation_buffer_create_info, &validation_buffer_alloc_info,
851 &as_validation_buffer_info.validation_buffer,
852 &as_validation_buffer_info.validation_buffer_allocation, nullptr);
853 if (result != VK_SUCCESS) {
854 ReportSetupProblem(device, "Unable to allocate device memory. Device could become unstable.");
855 aborted = true;
856 return;
857 }
858
859 GpuAccelerationStructureBuildValidationBuffer *mapped_validation_buffer = nullptr;
860 result = vmaMapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation,
861 reinterpret_cast<void **>(&mapped_validation_buffer));
862 if (result != VK_SUCCESS) {
863 ReportSetupProblem(device, "Unable to allocate device memory for acceleration structure build val buffer.");
864 aborted = true;
865 return;
866 }
867
868 mapped_validation_buffer->instances_to_validate = pInfo->instanceCount;
869 mapped_validation_buffer->replacement_handle_bits_0 =
870 reinterpret_cast<const uint32_t *>(&as_validation_state.replacement_as_handle)[0];
871 mapped_validation_buffer->replacement_handle_bits_1 =
872 reinterpret_cast<const uint32_t *>(&as_validation_state.replacement_as_handle)[1];
873 mapped_validation_buffer->invalid_handle_found = 0;
874 mapped_validation_buffer->invalid_handle_bits_0 = 0;
875 mapped_validation_buffer->invalid_handle_bits_1 = 0;
876 mapped_validation_buffer->valid_handles_count = static_cast<uint32_t>(current_valid_handles.size());
877
878 uint32_t *mapped_valid_handles = reinterpret_cast<uint32_t *>(&mapped_validation_buffer[1]);
879 for (std::size_t i = 0; i < current_valid_handles.size(); i++) {
880 const uint64_t current_valid_handle = current_valid_handles[i];
881
882 *mapped_valid_handles = reinterpret_cast<const uint32_t *>(¤t_valid_handle)[0];
883 ++mapped_valid_handles;
884 *mapped_valid_handles = reinterpret_cast<const uint32_t *>(¤t_valid_handle)[1];
885 ++mapped_valid_handles;
886 }
887
888 vmaUnmapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation);
889
890 static constexpr const VkDeviceSize k_instance_size = 64;
891 const VkDeviceSize instance_buffer_size = k_instance_size * pInfo->instanceCount;
892
893 result = desc_set_manager->GetDescriptorSet(&as_validation_buffer_info.descriptor_pool, debug_desc_layout,
894 &as_validation_buffer_info.descriptor_set);
895 if (result != VK_SUCCESS) {
896 ReportSetupProblem(device, "Unable to get descriptor set for acceleration structure build.");
897 aborted = true;
898 return;
899 }
900
901 VkDescriptorBufferInfo descriptor_buffer_infos[2] = {};
902 descriptor_buffer_infos[0].buffer = instanceData;
903 descriptor_buffer_infos[0].offset = instanceOffset;
904 descriptor_buffer_infos[0].range = instance_buffer_size;
905 descriptor_buffer_infos[1].buffer = as_validation_buffer_info.validation_buffer;
906 descriptor_buffer_infos[1].offset = 0;
907 descriptor_buffer_infos[1].range = validation_buffer_size;
908
909 VkWriteDescriptorSet descriptor_set_writes[2] = {
910 LvlInitStruct<VkWriteDescriptorSet>(),
911 LvlInitStruct<VkWriteDescriptorSet>(),
912 };
913 descriptor_set_writes[0].dstSet = as_validation_buffer_info.descriptor_set;
914 descriptor_set_writes[0].dstBinding = 0;
915 descriptor_set_writes[0].descriptorCount = 1;
916 descriptor_set_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
917 descriptor_set_writes[0].pBufferInfo = &descriptor_buffer_infos[0];
918 descriptor_set_writes[1].dstSet = as_validation_buffer_info.descriptor_set;
919 descriptor_set_writes[1].dstBinding = 1;
920 descriptor_set_writes[1].descriptorCount = 1;
921 descriptor_set_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
922 descriptor_set_writes[1].pBufferInfo = &descriptor_buffer_infos[1];
923
924 DispatchUpdateDescriptorSets(device, 2, descriptor_set_writes, 0, nullptr);
925
926 // Issue a memory barrier to make sure anything writing to the instance buffer has finished.
927 auto memory_barrier = LvlInitStruct<VkMemoryBarrier>();
928 memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
929 memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
930 DispatchCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
931 &memory_barrier, 0, nullptr, 0, nullptr);
932
933 // Save a copy of the compute pipeline state that needs to be restored.
934 GPUAV_RESTORABLE_PIPELINE_STATE restorable_state;
935 restorable_state.Create(cb_state.get(), VK_PIPELINE_BIND_POINT_COMPUTE);
936
937 // Switch to and launch the validation compute shader to find, replace, and report invalid acceleration structure handles.
938 DispatchCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, as_validation_state.pipeline);
939 DispatchCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, as_validation_state.pipeline_layout, 0, 1,
940 &as_validation_buffer_info.descriptor_set, 0, nullptr);
941 DispatchCmdDispatch(commandBuffer, 1, 1, 1);
942
943 // Issue a buffer memory barrier to make sure that any invalid bottom level acceleration structure handles
944 // have been replaced by the validation compute shader before any builds take place.
945 auto instance_buffer_barrier = LvlInitStruct<VkBufferMemoryBarrier>();
946 instance_buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
947 instance_buffer_barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV;
948 instance_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
949 instance_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
950 instance_buffer_barrier.buffer = instanceData;
951 instance_buffer_barrier.offset = instanceOffset;
952 instance_buffer_barrier.size = instance_buffer_size;
953 DispatchCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
954 VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0, 0, nullptr, 1, &instance_buffer_barrier, 0,
955 nullptr);
956
957 // Restore the previous compute pipeline state.
958 restorable_state.Restore(commandBuffer);
959
960 cb_state->as_validation_buffers.emplace_back(std::move(as_validation_buffer_info));
961 }
962
ProcessAccelerationStructureBuildValidationBuffer(VkQueue queue,CMD_BUFFER_STATE_GPUAV * cb_node)963 void GpuAssisted::ProcessAccelerationStructureBuildValidationBuffer(VkQueue queue, CMD_BUFFER_STATE_GPUAV *cb_node) {
964 if (cb_node == nullptr || !cb_node->hasBuildAccelerationStructureCmd) {
965 return;
966 }
967
968 for (const auto &as_validation_buffer_info : cb_node->as_validation_buffers) {
969 GpuAccelerationStructureBuildValidationBuffer *mapped_validation_buffer = nullptr;
970
971 VkResult result = vmaMapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation,
972 reinterpret_cast<void **>(&mapped_validation_buffer));
973 if (result == VK_SUCCESS) {
974 if (mapped_validation_buffer->invalid_handle_found > 0) {
975 uint64_t invalid_handle = 0;
976 reinterpret_cast<uint32_t *>(&invalid_handle)[0] = mapped_validation_buffer->invalid_handle_bits_0;
977 reinterpret_cast<uint32_t *>(&invalid_handle)[1] = mapped_validation_buffer->invalid_handle_bits_1;
978
979 LogError(as_validation_buffer_info.acceleration_structure, "UNASSIGNED-AccelerationStructure",
980 "Attempted to build top level acceleration structure using invalid bottom level acceleration structure "
981 "handle (%" PRIu64 ")",
982 invalid_handle);
983 }
984 vmaUnmapMemory(vmaAllocator, as_validation_buffer_info.validation_buffer_allocation);
985 }
986 }
987 }
988
PostCallRecordBindAccelerationStructureMemoryNV(VkDevice device,uint32_t bindInfoCount,const VkBindAccelerationStructureMemoryInfoNV * pBindInfos,VkResult result)989 void GpuAssisted::PostCallRecordBindAccelerationStructureMemoryNV(VkDevice device, uint32_t bindInfoCount,
990 const VkBindAccelerationStructureMemoryInfoNV *pBindInfos,
991 VkResult result) {
992 if (VK_SUCCESS != result) return;
993 ValidationStateTracker::PostCallRecordBindAccelerationStructureMemoryNV(device, bindInfoCount, pBindInfos, result);
994 for (uint32_t i = 0; i < bindInfoCount; i++) {
995 const VkBindAccelerationStructureMemoryInfoNV &info = pBindInfos[i];
996 auto as_state = Get<ACCELERATION_STRUCTURE_STATE>(info.accelerationStructure);
997 if (as_state) {
998 DispatchGetAccelerationStructureHandleNV(device, info.accelerationStructure, 8, &as_state->opaque_handle);
999 }
1000 }
1001 }
1002
1003 // Modify the pipeline layout to include our debug descriptor set and any needed padding with the dummy descriptor set.
PreCallRecordCreatePipelineLayout(VkDevice device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout,void * cpl_state_data)1004 void GpuAssisted::PreCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo,
1005 const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout,
1006 void *cpl_state_data) {
1007 if (aborted) {
1008 return;
1009 }
1010
1011 create_pipeline_layout_api_state *cpl_state = reinterpret_cast<create_pipeline_layout_api_state *>(cpl_state_data);
1012
1013 if (cpl_state->modified_create_info.setLayoutCount >= adjusted_max_desc_sets) {
1014 std::ostringstream strm;
1015 strm << "Pipeline Layout conflict with validation's descriptor set at slot " << desc_set_bind_index << ". "
1016 << "Application has too many descriptor sets in the pipeline layout to continue with gpu validation. "
1017 << "Validation is not modifying the pipeline layout. "
1018 << "Instrumented shaders are replaced with non-instrumented shaders.";
1019 ReportSetupProblem(device, strm.str().c_str());
1020 } else {
1021 UtilPreCallRecordCreatePipelineLayout(cpl_state, this, pCreateInfo);
1022 }
1023 ValidationStateTracker::PreCallRecordCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout, cpl_state_data);
1024 }
1025
PostCallRecordCreatePipelineLayout(VkDevice device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout,VkResult result)1026 void GpuAssisted::PostCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo,
1027 const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout,
1028 VkResult result) {
1029 ValidationStateTracker::PostCallRecordCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout, result);
1030
1031 if (result != VK_SUCCESS) {
1032 ReportSetupProblem(device, "Unable to create pipeline layout. Device could become unstable.");
1033 aborted = true;
1034 }
1035 }
1036
1037 // Free the device memory and descriptor set(s) associated with a command buffer.
DestroyBuffer(GpuAssistedBufferInfo & buffer_info)1038 void GpuAssisted::DestroyBuffer(GpuAssistedBufferInfo &buffer_info) {
1039 vmaDestroyBuffer(vmaAllocator, buffer_info.output_mem_block.buffer, buffer_info.output_mem_block.allocation);
1040 if (buffer_info.di_input_mem_block.buffer) {
1041 vmaDestroyBuffer(vmaAllocator, buffer_info.di_input_mem_block.buffer, buffer_info.di_input_mem_block.allocation);
1042 }
1043 if (buffer_info.bda_input_mem_block.buffer) {
1044 vmaDestroyBuffer(vmaAllocator, buffer_info.bda_input_mem_block.buffer, buffer_info.bda_input_mem_block.allocation);
1045 }
1046 if (buffer_info.desc_set != VK_NULL_HANDLE) {
1047 desc_set_manager->PutBackDescriptorSet(buffer_info.desc_pool, buffer_info.desc_set);
1048 }
1049 if (buffer_info.pre_draw_resources.desc_set != VK_NULL_HANDLE) {
1050 desc_set_manager->PutBackDescriptorSet(buffer_info.pre_draw_resources.desc_pool, buffer_info.pre_draw_resources.desc_set);
1051 }
1052 }
1053
DestroyBuffer(GpuAssistedAccelerationStructureBuildValidationBufferInfo & as_validation_buffer_info)1054 void GpuAssisted::DestroyBuffer(GpuAssistedAccelerationStructureBuildValidationBufferInfo &as_validation_buffer_info) {
1055 vmaDestroyBuffer(vmaAllocator, as_validation_buffer_info.validation_buffer,
1056 as_validation_buffer_info.validation_buffer_allocation);
1057
1058 if (as_validation_buffer_info.descriptor_set != VK_NULL_HANDLE) {
1059 desc_set_manager->PutBackDescriptorSet(as_validation_buffer_info.descriptor_pool, as_validation_buffer_info.descriptor_set);
1060 }
1061 }
1062
1063 // Just gives a warning about a possible deadlock.
PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers) const1064 bool GpuAssisted::PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1065 VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
1066 uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
1067 uint32_t bufferMemoryBarrierCount,
1068 const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
1069 const VkImageMemoryBarrier *pImageMemoryBarriers) const {
1070 if (srcStageMask & VK_PIPELINE_STAGE_HOST_BIT) {
1071 ReportSetupProblem(commandBuffer,
1072 "CmdWaitEvents recorded with VK_PIPELINE_STAGE_HOST_BIT set. "
1073 "GPU_Assisted validation waits on queue completion. "
1074 "This wait could block the host's signaling of this event, resulting in deadlock.");
1075 }
1076 ValidationStateTracker::PreCallValidateCmdWaitEvents(commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask,
1077 memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
1078 pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
1079 return false;
1080 }
1081
PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfoKHR * pDependencyInfos) const1082 bool GpuAssisted::PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1083 const VkDependencyInfoKHR *pDependencyInfos) const {
1084 VkPipelineStageFlags2KHR srcStageMask = 0;
1085
1086 for (uint32_t i = 0; i < eventCount; i++) {
1087 auto stage_masks = sync_utils::GetGlobalStageMasks(pDependencyInfos[i]);
1088 srcStageMask = stage_masks.src;
1089 }
1090
1091 if (srcStageMask & VK_PIPELINE_STAGE_HOST_BIT) {
1092 ReportSetupProblem(commandBuffer,
1093 "CmdWaitEvents2KHR recorded with VK_PIPELINE_STAGE_HOST_BIT set. "
1094 "GPU_Assisted validation waits on queue completion. "
1095 "This wait could block the host's signaling of this event, resulting in deadlock.");
1096 }
1097 ValidationStateTracker::PreCallValidateCmdWaitEvents2KHR(commandBuffer, eventCount, pEvents, pDependencyInfos);
1098 return false;
1099 }
1100
PostCallRecordGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pPhysicalDeviceProperties)1101 void GpuAssisted::PostCallRecordGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1102 VkPhysicalDeviceProperties *pPhysicalDeviceProperties) {
1103 // There is an implicit layer that can cause this call to return 0 for maxBoundDescriptorSets - Ignore such calls
1104 if (enabled[gpu_validation_reserve_binding_slot] && pPhysicalDeviceProperties->limits.maxBoundDescriptorSets > 0) {
1105 if (pPhysicalDeviceProperties->limits.maxBoundDescriptorSets > 1) {
1106 pPhysicalDeviceProperties->limits.maxBoundDescriptorSets -= 1;
1107 } else {
1108 LogWarning(physicalDevice, "UNASSIGNED-GPU-Assisted Validation Setup Error.",
1109 "Unable to reserve descriptor binding slot on a device with only one slot.");
1110 }
1111 }
1112 ValidationStateTracker::PostCallRecordGetPhysicalDeviceProperties(physicalDevice, pPhysicalDeviceProperties);
1113 }
1114
PostCallRecordGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pPhysicalDeviceProperties2)1115 void GpuAssisted::PostCallRecordGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1116 VkPhysicalDeviceProperties2 *pPhysicalDeviceProperties2) {
1117 // There is an implicit layer that can cause this call to return 0 for maxBoundDescriptorSets - Ignore such calls
1118 if (enabled[gpu_validation_reserve_binding_slot] && pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets > 0) {
1119 if (pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets > 1) {
1120 pPhysicalDeviceProperties2->properties.limits.maxBoundDescriptorSets -= 1;
1121 } else {
1122 LogWarning(physicalDevice, "UNASSIGNED-GPU-Assisted Validation Setup Error.",
1123 "Unable to reserve descriptor binding slot on a device with only one slot.");
1124 }
1125 }
1126 ValidationStateTracker::PostCallRecordGetPhysicalDeviceProperties2(physicalDevice, pPhysicalDeviceProperties2);
1127 }
1128
PreCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * cgpl_state_data)1129 void GpuAssisted::PreCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1130 const VkGraphicsPipelineCreateInfo *pCreateInfos,
1131 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1132 void *cgpl_state_data) {
1133 if (aborted) return;
1134 std::vector<safe_VkGraphicsPipelineCreateInfo> new_pipeline_create_infos;
1135 create_graphics_pipeline_api_state *cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state *>(cgpl_state_data);
1136 UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, cgpl_state->pipe_state,
1137 &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_GRAPHICS, this);
1138 cgpl_state->gpu_create_infos = new_pipeline_create_infos;
1139 cgpl_state->pCreateInfos = reinterpret_cast<VkGraphicsPipelineCreateInfo *>(cgpl_state->gpu_create_infos.data());
1140 ValidationStateTracker::PreCallRecordCreateGraphicsPipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1141 cgpl_state_data);
1142 }
1143
PreCallRecordCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * ccpl_state_data)1144 void GpuAssisted::PreCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1145 const VkComputePipelineCreateInfo *pCreateInfos,
1146 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1147 void *ccpl_state_data) {
1148 if (aborted) return;
1149 std::vector<safe_VkComputePipelineCreateInfo> new_pipeline_create_infos;
1150 auto *ccpl_state = reinterpret_cast<create_compute_pipeline_api_state *>(ccpl_state_data);
1151 UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, ccpl_state->pipe_state,
1152 &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_COMPUTE, this);
1153 ccpl_state->gpu_create_infos = new_pipeline_create_infos;
1154 ccpl_state->pCreateInfos = reinterpret_cast<VkComputePipelineCreateInfo *>(ccpl_state->gpu_create_infos.data());
1155 ValidationStateTracker::PreCallRecordCreateComputePipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1156 ccpl_state_data);
1157 }
1158
PreCallRecordCreateRayTracingPipelinesNV(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoNV * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * crtpl_state_data)1159 void GpuAssisted::PreCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1160 const VkRayTracingPipelineCreateInfoNV *pCreateInfos,
1161 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1162 void *crtpl_state_data) {
1163 if (aborted) return;
1164 std::vector<safe_VkRayTracingPipelineCreateInfoCommon> new_pipeline_create_infos;
1165 auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_api_state *>(crtpl_state_data);
1166 UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, crtpl_state->pipe_state,
1167 &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, this);
1168 crtpl_state->gpu_create_infos = new_pipeline_create_infos;
1169 crtpl_state->pCreateInfos = reinterpret_cast<VkRayTracingPipelineCreateInfoNV *>(crtpl_state->gpu_create_infos.data());
1170 ValidationStateTracker::PreCallRecordCreateRayTracingPipelinesNV(device, pipelineCache, count, pCreateInfos, pAllocator,
1171 pPipelines, crtpl_state_data);
1172 }
1173
PreCallRecordCreateRayTracingPipelinesKHR(VkDevice device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,void * crtpl_state_data)1174 void GpuAssisted::PreCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
1175 VkPipelineCache pipelineCache, uint32_t count,
1176 const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1177 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1178 void *crtpl_state_data) {
1179 if (aborted) return;
1180 std::vector<safe_VkRayTracingPipelineCreateInfoCommon> new_pipeline_create_infos;
1181 auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1182 UtilPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, crtpl_state->pipe_state,
1183 &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, this);
1184 crtpl_state->gpu_create_infos = new_pipeline_create_infos;
1185 crtpl_state->pCreateInfos = reinterpret_cast<VkRayTracingPipelineCreateInfoKHR *>(crtpl_state->gpu_create_infos.data());
1186 ValidationStateTracker::PreCallRecordCreateRayTracingPipelinesKHR(device, deferredOperation, pipelineCache, count, pCreateInfos,
1187 pAllocator, pPipelines, crtpl_state_data);
1188 }
1189
PostCallRecordCreateGraphicsPipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * cgpl_state_data)1190 void GpuAssisted::PostCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1191 const VkGraphicsPipelineCreateInfo *pCreateInfos,
1192 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1193 VkResult result, void *cgpl_state_data) {
1194 ValidationStateTracker::PostCallRecordCreateGraphicsPipelines(device, pipelineCache, count, pCreateInfos, pAllocator,
1195 pPipelines, result, cgpl_state_data);
1196 if (aborted) return;
1197 create_graphics_pipeline_api_state *cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state *>(cgpl_state_data);
1198 UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, cgpl_state->gpu_create_infos.data());
1199 UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_GRAPHICS, this);
1200 }
1201
PostCallRecordCreateComputePipelines(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * ccpl_state_data)1202 void GpuAssisted::PostCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1203 const VkComputePipelineCreateInfo *pCreateInfos,
1204 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1205 VkResult result, void *ccpl_state_data) {
1206 ValidationStateTracker::PostCallRecordCreateComputePipelines(device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines,
1207 result, ccpl_state_data);
1208 if (aborted) return;
1209 create_compute_pipeline_api_state *ccpl_state = reinterpret_cast<create_compute_pipeline_api_state *>(ccpl_state_data);
1210 UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, ccpl_state->gpu_create_infos.data());
1211 UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_COMPUTE, this);
1212 }
1213
PostCallRecordCreateRayTracingPipelinesNV(VkDevice device,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoNV * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * crtpl_state_data)1214 void GpuAssisted::PostCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
1215 const VkRayTracingPipelineCreateInfoNV *pCreateInfos,
1216 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1217 VkResult result, void *crtpl_state_data) {
1218 auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1219 ValidationStateTracker::PostCallRecordCreateRayTracingPipelinesNV(device, pipelineCache, count, pCreateInfos, pAllocator,
1220 pPipelines, result, crtpl_state_data);
1221 if (aborted) return;
1222 UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, crtpl_state->gpu_create_infos.data());
1223 UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, this);
1224 }
1225
PostCallRecordCreateRayTracingPipelinesKHR(VkDevice device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines,VkResult result,void * crtpl_state_data)1226 void GpuAssisted::PostCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
1227 VkPipelineCache pipelineCache, uint32_t count,
1228 const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1229 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines,
1230 VkResult result, void *crtpl_state_data) {
1231 auto *crtpl_state = reinterpret_cast<create_ray_tracing_pipeline_khr_api_state *>(crtpl_state_data);
1232 ValidationStateTracker::PostCallRecordCreateRayTracingPipelinesKHR(
1233 device, deferredOperation, pipelineCache, count, pCreateInfos, pAllocator, pPipelines, result, crtpl_state_data);
1234 if (aborted) return;
1235 UtilCopyCreatePipelineFeedbackData(count, pCreateInfos, crtpl_state->gpu_create_infos.data());
1236 UtilPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, this);
1237 }
1238
1239 // Remove all the shader trackers associated with this destroyed pipeline.
PreCallRecordDestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)1240 void GpuAssisted::PreCallRecordDestroyPipeline(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator) {
1241 for (auto it = shader_map.begin(); it != shader_map.end();) {
1242 if (it->second.pipeline == pipeline) {
1243 it = shader_map.erase(it);
1244 } else {
1245 ++it;
1246 }
1247 }
1248 ValidationStateTracker::PreCallRecordDestroyPipeline(device, pipeline, pAllocator);
1249 }
1250
PreCallRecordDestroyRenderPass(VkDevice device,VkRenderPass renderPass,const VkAllocationCallbacks * pAllocator)1251 void GpuAssisted::PreCallRecordDestroyRenderPass(VkDevice device, VkRenderPass renderPass,
1252 const VkAllocationCallbacks *pAllocator) {
1253 auto pipeline = pre_draw_validation_state.renderpass_to_pipeline.find(renderPass);
1254 if (pipeline != pre_draw_validation_state.renderpass_to_pipeline.end()) {
1255 DispatchDestroyPipeline(device, pipeline->second, nullptr);
1256 pre_draw_validation_state.renderpass_to_pipeline.erase(pipeline);
1257 }
1258 ValidationStateTracker::PreCallRecordDestroyRenderPass(device, renderPass, pAllocator);
1259 }
1260
1261 // Call the SPIR-V Optimizer to run the instrumentation pass on the shader.
InstrumentShader(const VkShaderModuleCreateInfo * pCreateInfo,std::vector<unsigned int> & new_pgm,uint32_t * unique_shader_id)1262 bool GpuAssisted::InstrumentShader(const VkShaderModuleCreateInfo *pCreateInfo, std::vector<unsigned int> &new_pgm,
1263 uint32_t *unique_shader_id) {
1264 if (aborted) return false;
1265 if (pCreateInfo->pCode[0] != spv::MagicNumber) return false;
1266
1267 const spvtools::MessageConsumer gpu_console_message_consumer =
1268 [this](spv_message_level_t level, const char *, const spv_position_t &position, const char *message) -> void {
1269 switch (level) {
1270 case SPV_MSG_FATAL:
1271 case SPV_MSG_INTERNAL_ERROR:
1272 case SPV_MSG_ERROR:
1273 this->LogError(this->device, "UNASSIGNED-GPU-Assisted", "Error during shader instrumentation: line %zu: %s",
1274 position.index, message);
1275 break;
1276 default:
1277 break;
1278 }
1279 };
1280
1281 // Load original shader SPIR-V
1282 uint32_t num_words = static_cast<uint32_t>(pCreateInfo->codeSize / 4);
1283 new_pgm.clear();
1284 new_pgm.reserve(num_words);
1285 new_pgm.insert(new_pgm.end(), &pCreateInfo->pCode[0], &pCreateInfo->pCode[num_words]);
1286
1287 // Call the optimizer to instrument the shader.
1288 // Use the unique_shader_module_id as a shader ID so we can look up its handle later in the shader_map.
1289 // If descriptor indexing is enabled, enable length checks and updated descriptor checks
1290 using namespace spvtools;
1291 spv_target_env target_env = PickSpirvEnv(api_version, IsExtEnabled(device_extensions.vk_khr_spirv_1_4));
1292 spvtools::ValidatorOptions val_options;
1293 AdjustValidatorOptions(device_extensions, enabled_features, val_options);
1294 spvtools::OptimizerOptions opt_options;
1295 opt_options.set_run_validator(true);
1296 opt_options.set_validator_options(val_options);
1297 Optimizer optimizer(target_env);
1298 optimizer.SetMessageConsumer(gpu_console_message_consumer);
1299 optimizer.RegisterPass(CreateInstBindlessCheckPass(desc_set_bind_index, unique_shader_module_id, descriptor_indexing,
1300 descriptor_indexing, buffer_oob_enabled, buffer_oob_enabled));
1301 // Call CreateAggressiveDCEPass with preserve_interface == true
1302 optimizer.RegisterPass(CreateAggressiveDCEPass(true));
1303 if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
1304 IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
1305 shaderInt64 && enabled_features.core12.bufferDeviceAddress) {
1306 optimizer.RegisterPass(CreateInstBuffAddrCheckPass(desc_set_bind_index, unique_shader_module_id));
1307 }
1308 bool pass = optimizer.Run(new_pgm.data(), new_pgm.size(), &new_pgm, opt_options);
1309 if (!pass) {
1310 ReportSetupProblem(device, "Failure to instrument shader. Proceeding with non-instrumented shader.");
1311 }
1312 *unique_shader_id = unique_shader_module_id++;
1313 return pass;
1314 }
1315 // Create the instrumented shader data to provide to the driver.
PreCallRecordCreateShaderModule(VkDevice device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule,void * csm_state_data)1316 void GpuAssisted::PreCallRecordCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo *pCreateInfo,
1317 const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule,
1318 void *csm_state_data) {
1319 create_shader_module_api_state *csm_state = reinterpret_cast<create_shader_module_api_state *>(csm_state_data);
1320 bool pass = InstrumentShader(pCreateInfo, csm_state->instrumented_pgm, &csm_state->unique_shader_id);
1321 if (pass) {
1322 csm_state->instrumented_create_info.pCode = csm_state->instrumented_pgm.data();
1323 csm_state->instrumented_create_info.codeSize = csm_state->instrumented_pgm.size() * sizeof(unsigned int);
1324 }
1325 ValidationStateTracker::PreCallRecordCreateShaderModule(device, pCreateInfo, pAllocator, pShaderModule, csm_state_data);
1326 }
1327
1328 static const int kInstErrorPreDrawValidate = spvtools::kInstErrorMax + 1;
1329 static const int kPreDrawValidateSubError = spvtools::kInstValidationOutError + 1;
1330 // Generate the part of the message describing the violation.
GenerateValidationMessage(const uint32_t * debug_record,std::string & msg,std::string & vuid_msg,GpuAssistedBufferInfo buf_info,GpuAssisted * gpu_assisted)1331 bool GenerateValidationMessage(const uint32_t *debug_record, std::string &msg, std::string &vuid_msg, GpuAssistedBufferInfo buf_info, GpuAssisted *gpu_assisted) {
1332 using namespace spvtools;
1333 std::ostringstream strm;
1334 bool return_code = true;
1335 assert(kInstErrorPreDrawValidate == _kInstErrorPreDrawValidate);
1336 assert(kInstValidationOutError == _kInstValidationOutError);
1337 switch (debug_record[kInstValidationOutError]) {
1338 case kInstErrorBindlessBounds: {
1339 strm << "Index of " << debug_record[kInstBindlessBoundsOutDescIndex] << " used to index descriptor array of length "
1340 << debug_record[kInstBindlessBoundsOutDescBound] << ". ";
1341 vuid_msg = "UNASSIGNED-Descriptor index out of bounds";
1342 } break;
1343 case kInstErrorBindlessUninit: {
1344 strm << "Descriptor index " << debug_record[kInstBindlessUninitOutDescIndex] << " is uninitialized.";
1345 vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1346 } break;
1347 case kInstErrorBuffAddrUnallocRef: {
1348 uint64_t *ptr = (uint64_t *)&debug_record[kInstBuffAddrUnallocOutDescPtrLo];
1349 strm << "Device address 0x" << std::hex << *ptr << " access out of bounds. ";
1350 vuid_msg = "UNASSIGNED-Device address out of bounds";
1351 } break;
1352 case kInstErrorBuffOOBUniform:
1353 case kInstErrorBuffOOBStorage: {
1354 auto size = debug_record[kInstBindlessBuffOOBOutBuffSize];
1355 if (size == 0) {
1356 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex] << " is uninitialized.";
1357 vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1358 } else {
1359 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex]
1360 << " access out of bounds. Descriptor size is " << debug_record[kInstBindlessBuffOOBOutBuffSize]
1361 << " and highest byte accessed was " << debug_record[kInstBindlessBuffOOBOutBuffOff];
1362 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1363 if (debug_record[kInstValidationOutError] == kInstErrorBuffOOBUniform)
1364 vuid_msg = vuid.uniform_access_oob;
1365 else
1366 vuid_msg = vuid.storage_access_oob;
1367 }
1368 } break;
1369 case kInstErrorBuffOOBUniformTexel:
1370 case kInstErrorBuffOOBStorageTexel: {
1371 auto size = debug_record[kInstBindlessBuffOOBOutBuffSize];
1372 if (size == 0) {
1373 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex] << " is uninitialized.";
1374 vuid_msg = "UNASSIGNED-Descriptor uninitialized";
1375 } else {
1376 strm << "Descriptor index " << debug_record[kInstBindlessBuffOOBOutDescIndex]
1377 << " access out of bounds. Descriptor size is " << debug_record[kInstBindlessBuffOOBOutBuffSize]
1378 << " texels and highest texel accessed was " << debug_record[kInstBindlessBuffOOBOutBuffOff];
1379 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1380 if (debug_record[kInstValidationOutError] == kInstErrorBuffOOBUniformTexel)
1381 vuid_msg = vuid.uniform_access_oob;
1382 else
1383 vuid_msg = vuid.storage_access_oob;
1384 }
1385 } break;
1386 case kInstErrorPreDrawValidate: {
1387 // Buffer size must be >= (stride * (drawCount - 1) + offset + sizeof(VkDrawIndexedIndirectCommand))
1388 if (debug_record[kPreDrawValidateSubError] == pre_draw_count_exceeds_bufsize_error) {
1389 uint32_t count = debug_record[kPreDrawValidateSubError + 1];
1390 uint32_t stride = buf_info.pre_draw_resources.stride;
1391 uint32_t offset = static_cast<uint32_t>(buf_info.pre_draw_resources.offset);
1392 uint32_t draw_size = (stride * (count - 1) + offset + sizeof(VkDrawIndexedIndirectCommand));
1393 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1394 strm << "Indirect draw count of " << count << " would exceed buffer size " << buf_info.pre_draw_resources.buf_size
1395 << " of buffer " << buf_info.pre_draw_resources.buffer << " stride = " << stride << " offset = " << offset
1396 << " (stride * (drawCount - 1) + offset + sizeof(VkDrawIndexedIndirectCommand)) = " << draw_size;
1397 if (count == 1) {
1398 vuid_msg = vuid.count_exceeds_bufsize_1;
1399 } else {
1400 vuid_msg = vuid.count_exceeds_bufsize;
1401 }
1402 } else if (debug_record[kPreDrawValidateSubError] == pre_draw_count_exceeds_limit_error) {
1403 uint32_t count = debug_record[kPreDrawValidateSubError + 1];
1404 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1405 strm << "Indirect draw count of " << count << " would exceed maxDrawIndirectCount limit of "
1406 << gpu_assisted->phys_dev_props.limits.maxDrawIndirectCount;
1407 vuid_msg = vuid.count_exceeds_device_limit;
1408 } else if (debug_record[kPreDrawValidateSubError] == pre_draw_first_instance_error) {
1409 uint32_t index = debug_record[kPreDrawValidateSubError + 1];
1410 const GpuVuid vuid = GetGpuVuid(buf_info.cmd_type);
1411 strm << "The drawIndirectFirstInstance feature is not enabled, but the firstInstance member of the "
1412 "VkDrawIndirectCommand structure at index "
1413 << index << " is not zero";
1414 vuid_msg = vuid.first_instance_not_zero;
1415 }
1416 return_code = false;
1417 } break;
1418 default: {
1419 strm << "Internal Error (unexpected error type = " << debug_record[kInstValidationOutError] << "). ";
1420 vuid_msg = "UNASSIGNED-Internal Error";
1421 assert(false);
1422 } break;
1423 }
1424 msg = strm.str();
1425 return return_code;
1426 }
1427
1428 // Pull together all the information from the debug record to build the error message strings,
1429 // and then assemble them into a single message string.
1430 // Retrieve the shader program referenced by the unique shader ID provided in the debug record.
1431 // We had to keep a copy of the shader program with the same lifecycle as the pipeline to make
1432 // sure it is available when the pipeline is submitted. (The ShaderModule tracking object also
1433 // keeps a copy, but it can be destroyed after the pipeline is created and before it is submitted.)
1434 //
AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer,VkQueue queue,GpuAssistedBufferInfo & buffer_info,uint32_t operation_index,uint32_t * const debug_output_buffer)1435 void GpuAssisted::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQueue queue, GpuAssistedBufferInfo &buffer_info,
1436 uint32_t operation_index, uint32_t *const debug_output_buffer) {
1437 using namespace spvtools;
1438 const uint32_t total_words = debug_output_buffer[0];
1439 // A zero here means that the shader instrumentation didn't write anything.
1440 // If you have nothing to say, don't say it here.
1441 if (0 == total_words) {
1442 return;
1443 }
1444 // The first word in the debug output buffer is the number of words that would have
1445 // been written by the shader instrumentation, if there was enough room in the buffer we provided.
1446 // The number of words actually written by the shaders is determined by the size of the buffer
1447 // we provide via the descriptor. So, we process only the number of words that can fit in the
1448 // buffer.
1449 // Each "report" written by the shader instrumentation is considered a "record". This function
1450 // is hard-coded to process only one record because it expects the buffer to be large enough to
1451 // hold only one record. If there is a desire to process more than one record, this function needs
1452 // to be modified to loop over records and the buffer size increased.
1453 std::string validation_message;
1454 std::string stage_message;
1455 std::string common_message;
1456 std::string filename_message;
1457 std::string source_message;
1458 std::string vuid_msg;
1459 VkShaderModule shader_module_handle = VK_NULL_HANDLE;
1460 VkPipeline pipeline_handle = VK_NULL_HANDLE;
1461 std::vector<unsigned int> pgm;
1462 // The first record starts at this offset after the total_words.
1463 const uint32_t *debug_record = &debug_output_buffer[kDebugOutputDataOffset];
1464 // Lookup the VkShaderModule handle and SPIR-V code used to create the shader, using the unique shader ID value returned
1465 // by the instrumented shader.
1466 auto it = shader_map.find(debug_record[kInstCommonOutShaderId]);
1467 if (it != shader_map.end()) {
1468 shader_module_handle = it->second.shader_module;
1469 pipeline_handle = it->second.pipeline;
1470 pgm = it->second.pgm;
1471 }
1472 bool gen_full_message = GenerateValidationMessage(debug_record, validation_message, vuid_msg, buffer_info, this);
1473 if (gen_full_message) {
1474 UtilGenerateStageMessage(debug_record, stage_message);
1475 UtilGenerateCommonMessage(report_data, command_buffer, debug_record, shader_module_handle, pipeline_handle,
1476 buffer_info.pipeline_bind_point, operation_index, common_message);
1477 UtilGenerateSourceMessages(pgm, debug_record, false, filename_message, source_message);
1478 LogError(queue, vuid_msg.c_str(), "%s %s %s %s%s", validation_message.c_str(), common_message.c_str(), stage_message.c_str(),
1479 filename_message.c_str(), source_message.c_str());
1480 }
1481 else {
1482 LogError(queue, vuid_msg.c_str(), "%s", validation_message.c_str());
1483 }
1484 // The debug record at word kInstCommonOutSize is the number of words in the record
1485 // written by the shader. Clear the entire record plus the total_words word at the start.
1486 const uint32_t words_to_clear = 1 + std::min(debug_record[kInstCommonOutSize], static_cast<uint32_t>(kInstMaxOutCnt));
1487 memset(debug_output_buffer, 0, sizeof(uint32_t) * words_to_clear);
1488 }
1489
SetDescriptorInitialized(uint32_t * pData,uint32_t index,const cvdescriptorset::Descriptor * descriptor)1490 void GpuAssisted::SetDescriptorInitialized(uint32_t *pData, uint32_t index, const cvdescriptorset::Descriptor *descriptor) {
1491 if (descriptor->GetClass() == cvdescriptorset::DescriptorClass::GeneralBuffer) {
1492 auto buffer = static_cast<const cvdescriptorset::BufferDescriptor *>(descriptor)->GetBuffer();
1493 if (buffer == VK_NULL_HANDLE) {
1494 pData[index] = UINT_MAX;
1495 } else {
1496 auto buffer_state = static_cast<const cvdescriptorset::BufferDescriptor *>(descriptor)->GetBufferState();
1497 pData[index] = static_cast<uint32_t>(buffer_state->createInfo.size);
1498 }
1499 } else if (descriptor->GetClass() == cvdescriptorset::DescriptorClass::TexelBuffer) {
1500 auto buffer_view = static_cast<const cvdescriptorset::TexelDescriptor *>(descriptor)->GetBufferView();
1501 if (buffer_view == VK_NULL_HANDLE) {
1502 pData[index] = UINT_MAX;
1503 } else {
1504 auto buffer_view_state = static_cast<const cvdescriptorset::TexelDescriptor *>(descriptor)->GetBufferViewState();
1505 pData[index] = static_cast<uint32_t>(buffer_view_state->buffer_state->createInfo.size);
1506 }
1507 } else {
1508 pData[index] = 1;
1509 }
1510 }
1511
1512 // For the given command buffer, map its debug data buffers and update the status of any update after bind descriptors
UpdateInstrumentationBuffer(CMD_BUFFER_STATE_GPUAV * cb_node)1513 void GpuAssisted::UpdateInstrumentationBuffer(CMD_BUFFER_STATE_GPUAV *cb_node) {
1514 uint32_t *data;
1515 for (auto &buffer_info : cb_node->gpuav_buffer_list) {
1516 if (buffer_info.di_input_mem_block.update_at_submit.size() > 0) {
1517 VkResult result =
1518 vmaMapMemory(vmaAllocator, buffer_info.di_input_mem_block.allocation, reinterpret_cast<void **>(&data));
1519 if (result == VK_SUCCESS) {
1520 for (const auto &update : buffer_info.di_input_mem_block.update_at_submit) {
1521 if (update.second->updated) {
1522 SetDescriptorInitialized(data, update.first, update.second);
1523 }
1524 }
1525 vmaUnmapMemory(vmaAllocator, buffer_info.di_input_mem_block.allocation);
1526 }
1527 }
1528 }
1529 }
1530
PreRecordCommandBuffer(VkCommandBuffer command_buffer)1531 void GpuAssisted::PreRecordCommandBuffer(VkCommandBuffer command_buffer) {
1532 auto cb_node = GetCBState(command_buffer);
1533 UpdateInstrumentationBuffer(cb_node.get());
1534 for (auto *secondary_cmd_buffer : cb_node->linkedCommandBuffers) {
1535 UpdateInstrumentationBuffer(static_cast<CMD_BUFFER_STATE_GPUAV *>(secondary_cmd_buffer));
1536 }
1537 }
1538
PreCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1539 void GpuAssisted::PreCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) {
1540 ValidationStateTracker::PreCallRecordQueueSubmit(queue, submitCount, pSubmits, fence);
1541 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1542 const VkSubmitInfo *submit = &pSubmits[submit_idx];
1543 for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1544 PreRecordCommandBuffer(submit->pCommandBuffers[i]);
1545 }
1546 }
1547 }
PreCallRecordQueueSubmit2KHR(VkQueue queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence fence)1548 void GpuAssisted::PreCallRecordQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR *pSubmits,
1549 VkFence fence) {
1550 ValidationStateTracker::PreCallRecordQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
1551 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1552 const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1553 for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1554 PreRecordCommandBuffer(submit->pCommandBufferInfos[i].commandBuffer);
1555 }
1556 }
1557 }
1558
CommandBufferNeedsProcessing(VkCommandBuffer command_buffer)1559 bool GpuAssisted::CommandBufferNeedsProcessing(VkCommandBuffer command_buffer) {
1560 bool buffers_present = false;
1561 auto cb_node = GetCBState(command_buffer);
1562
1563 if (cb_node->gpuav_buffer_list.size() || cb_node->hasBuildAccelerationStructureCmd) {
1564 buffers_present = true;
1565 }
1566 for (const auto *secondary : cb_node->linkedCommandBuffers) {
1567 auto secondary_cmd_buffer = static_cast<const CMD_BUFFER_STATE_GPUAV *>(secondary);
1568 if (secondary_cmd_buffer->gpuav_buffer_list.size() || cb_node->hasBuildAccelerationStructureCmd) {
1569 buffers_present = true;
1570 }
1571 }
1572 return buffers_present;
1573 }
1574
ProcessCommandBuffer(VkQueue queue,VkCommandBuffer command_buffer)1575 void GpuAssisted::ProcessCommandBuffer(VkQueue queue, VkCommandBuffer command_buffer) {
1576 auto cb_node = GetCBState(command_buffer);
1577
1578 UtilProcessInstrumentationBuffer(queue, cb_node.get(), this);
1579 ProcessAccelerationStructureBuildValidationBuffer(queue, cb_node.get());
1580 for (auto *secondary_cmd_buffer : cb_node->linkedCommandBuffers) {
1581 UtilProcessInstrumentationBuffer(queue, secondary_cmd_buffer, this);
1582 ProcessAccelerationStructureBuildValidationBuffer(queue, cb_node.get());
1583 }
1584 }
1585
1586 // Issue a memory barrier to make GPU-written data available to host.
1587 // Wait for the queue to complete execution.
1588 // Check the debug buffers for all the command buffers that were submitted.
PostCallRecordQueueSubmit(VkQueue queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence,VkResult result)1589 void GpuAssisted::PostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence,
1590 VkResult result) {
1591 ValidationStateTracker::PostCallRecordQueueSubmit(queue, submitCount, pSubmits, fence, result);
1592
1593 if (aborted || (result != VK_SUCCESS)) return;
1594 bool buffers_present = false;
1595 // Don't QueueWaitIdle if there's nothing to process
1596 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1597 const VkSubmitInfo *submit = &pSubmits[submit_idx];
1598 for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1599 buffers_present |= CommandBufferNeedsProcessing(submit->pCommandBuffers[i]);
1600 }
1601 }
1602 if (!buffers_present) return;
1603
1604 UtilSubmitBarrier(queue, this);
1605
1606 DispatchQueueWaitIdle(queue);
1607
1608 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1609 const VkSubmitInfo *submit = &pSubmits[submit_idx];
1610 for (uint32_t i = 0; i < submit->commandBufferCount; i++) {
1611 ProcessCommandBuffer(queue, submit->pCommandBuffers[i]);
1612 }
1613 }
1614 }
1615
PostCallRecordQueueSubmit2KHR(VkQueue queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence fence,VkResult result)1616 void GpuAssisted::PostCallRecordQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR *pSubmits,
1617 VkFence fence, VkResult result) {
1618 ValidationStateTracker::PostCallRecordQueueSubmit2KHR(queue, submitCount, pSubmits, fence, result);
1619
1620 if (aborted || (result != VK_SUCCESS)) return;
1621 bool buffers_present = false;
1622 // Don't QueueWaitIdle if there's nothing to process
1623 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1624 const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1625 for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1626 buffers_present |= CommandBufferNeedsProcessing(submit->pCommandBufferInfos[i].commandBuffer);
1627 }
1628 }
1629 if (!buffers_present) return;
1630
1631 UtilSubmitBarrier(queue, this);
1632
1633 DispatchQueueWaitIdle(queue);
1634
1635 for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) {
1636 const VkSubmitInfo2KHR *submit = &pSubmits[submit_idx];
1637 for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
1638 ProcessCommandBuffer(queue, submit->pCommandBufferInfos[i].commandBuffer);
1639 }
1640 }
1641 }
1642
PreCallRecordCmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)1643 void GpuAssisted::PreCallRecordCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
1644 uint32_t firstVertex, uint32_t firstInstance) {
1645 ValidationStateTracker::PreCallRecordCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
1646 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAW);
1647 }
1648
PreCallRecordCmdDrawMultiEXT(VkCommandBuffer commandBuffer,uint32_t drawCount,const VkMultiDrawInfoEXT * pVertexInfo,uint32_t instanceCount,uint32_t firstInstance,uint32_t stride)1649 void GpuAssisted::PreCallRecordCmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount,
1650 const VkMultiDrawInfoEXT *pVertexInfo, uint32_t instanceCount,
1651 uint32_t firstInstance, uint32_t stride) {
1652 ValidationStateTracker::PreCallRecordCmdDrawMultiEXT(commandBuffer, drawCount, pVertexInfo, instanceCount, firstInstance,
1653 stride);
1654 for (uint32_t i = 0; i < drawCount; i++) {
1655 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMULTIEXT);
1656 }
1657 }
1658
PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)1659 void GpuAssisted::PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
1660 uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) {
1661 ValidationStateTracker::PreCallRecordCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
1662 firstInstance);
1663 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXED);
1664 }
1665
PreCallRecordCmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,uint32_t drawCount,const VkMultiDrawIndexedInfoEXT * pIndexInfo,uint32_t instanceCount,uint32_t firstInstance,uint32_t stride,const int32_t * pVertexOffset)1666 void GpuAssisted::PreCallRecordCmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount,
1667 const VkMultiDrawIndexedInfoEXT *pIndexInfo, uint32_t instanceCount,
1668 uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset) {
1669 ValidationStateTracker::PreCallRecordCmdDrawMultiIndexedEXT(commandBuffer, drawCount, pIndexInfo, instanceCount, firstInstance,
1670 stride, pVertexOffset);
1671 for (uint32_t i = 0; i < drawCount; i++) {
1672 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMULTIINDEXEDEXT);
1673 }
1674 }
1675
PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)1676 void GpuAssisted::PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count,
1677 uint32_t stride) {
1678 ValidationStateTracker::PreCallRecordCmdDrawIndirect(commandBuffer, buffer, offset, count, stride);
1679 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, count, stride, 0, 0};
1680 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECT, &cdi_state);
1681 }
1682
PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t count,uint32_t stride)1683 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1684 uint32_t count, uint32_t stride) {
1685 ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirect(commandBuffer, buffer, offset, count, stride);
1686 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, count, stride, 0, 0};
1687 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECT, &cdi_state);
1688 }
1689
PreCallRecordCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1690 void GpuAssisted::PreCallRecordCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1691 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1692 uint32_t stride) {
1693 ValidationStateTracker::PreCallRecordCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1694 maxDrawCount, stride);
1695 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1696 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTCOUNTKHR, &cdi_state);
1697 }
1698
PreCallRecordCmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1699 void GpuAssisted::PreCallRecordCmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1700 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1701
1702 uint32_t stride) {
1703 ValidationStateTracker::PreCallRecordCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1704 maxDrawCount, stride);
1705 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1706 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTCOUNT, &cdi_state);
1707 }
1708
PreCallRecordCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,uint32_t instanceCount,uint32_t firstInstance,VkBuffer counterBuffer,VkDeviceSize counterBufferOffset,uint32_t counterOffset,uint32_t vertexStride)1709 void GpuAssisted::PreCallRecordCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
1710 uint32_t firstInstance, VkBuffer counterBuffer,
1711 VkDeviceSize counterBufferOffset, uint32_t counterOffset,
1712 uint32_t vertexStride) {
1713 ValidationStateTracker::PreCallRecordCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer,
1714 counterBufferOffset, counterOffset, vertexStride);
1715 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDIRECTBYTECOUNTEXT);
1716 }
1717
PreCallRecordCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1718 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1719 VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1720 uint32_t maxDrawCount, uint32_t stride) {
1721 ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer,
1722 countBufferOffset, maxDrawCount, stride);
1723 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1724 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECTCOUNTKHR, &cdi_state);
1725 }
1726
PreCallRecordCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1727 void GpuAssisted::PreCallRecordCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1728 VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1729 uint32_t maxDrawCount, uint32_t stride) {
1730 ValidationStateTracker::PreCallRecordCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1731 maxDrawCount, stride);
1732 GpuAssistedCmdDrawIndirectState cdi_state = {buffer, offset, 0, stride, countBuffer, countBufferOffset};
1733 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWINDEXEDINDIRECTCOUNT, &cdi_state);
1734 }
1735
PreCallRecordCmdDrawMeshTasksNV(VkCommandBuffer commandBuffer,uint32_t taskCount,uint32_t firstTask)1736 void GpuAssisted::PreCallRecordCmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) {
1737 ValidationStateTracker::PreCallRecordCmdDrawMeshTasksNV(commandBuffer, taskCount, firstTask);
1738 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSNV);
1739 }
1740
PreCallRecordCmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1741 void GpuAssisted::PreCallRecordCmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1742 uint32_t drawCount, uint32_t stride) {
1743 ValidationStateTracker::PreCallRecordCmdDrawMeshTasksIndirectNV(commandBuffer, buffer, offset, drawCount, stride);
1744 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSINDIRECTNV);
1745 }
1746
PreCallRecordCmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1747 void GpuAssisted::PreCallRecordCmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1748 VkBuffer countBuffer, VkDeviceSize countBufferOffset,
1749 uint32_t maxDrawCount, uint32_t stride) {
1750 ValidationStateTracker::PreCallRecordCmdDrawMeshTasksIndirectCountNV(commandBuffer, buffer, offset, countBuffer,
1751 countBufferOffset, maxDrawCount, stride);
1752 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, CMD_DRAWMESHTASKSINDIRECTCOUNTNV);
1753 }
1754
PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1755 void GpuAssisted::PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) {
1756 ValidationStateTracker::PreCallRecordCmdDispatch(commandBuffer, x, y, z);
1757 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCH);
1758 }
1759
PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)1760 void GpuAssisted::PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) {
1761 ValidationStateTracker::PreCallRecordCmdDispatchIndirect(commandBuffer, buffer, offset);
1762 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHINDIRECT);
1763 }
1764
PreCallRecordCmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)1765 void GpuAssisted::PreCallRecordCmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY,
1766 uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY,
1767 uint32_t groupCountZ) {
1768 ValidationStateTracker::PreCallRecordCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX,
1769 groupCountY, groupCountZ);
1770 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHBASE);
1771 }
1772
PreCallRecordCmdDispatchBaseKHR(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)1773 void GpuAssisted::PreCallRecordCmdDispatchBaseKHR(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY,
1774 uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY,
1775 uint32_t groupCountZ) {
1776 ValidationStateTracker::PreCallRecordCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX,
1777 groupCountY, groupCountZ);
1778 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, CMD_DISPATCHBASEKHR);
1779 }
1780
PreCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer,VkBuffer raygenShaderBindingTableBuffer,VkDeviceSize raygenShaderBindingOffset,VkBuffer missShaderBindingTableBuffer,VkDeviceSize missShaderBindingOffset,VkDeviceSize missShaderBindingStride,VkBuffer hitShaderBindingTableBuffer,VkDeviceSize hitShaderBindingOffset,VkDeviceSize hitShaderBindingStride,VkBuffer callableShaderBindingTableBuffer,VkDeviceSize callableShaderBindingOffset,VkDeviceSize callableShaderBindingStride,uint32_t width,uint32_t height,uint32_t depth)1781 void GpuAssisted::PreCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
1782 VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
1783 VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
1784 VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
1785 VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
1786 VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
1787 uint32_t width, uint32_t height, uint32_t depth) {
1788 ValidationStateTracker::PreCallRecordCmdTraceRaysNV(
1789 commandBuffer, raygenShaderBindingTableBuffer, raygenShaderBindingOffset, missShaderBindingTableBuffer,
1790 missShaderBindingOffset, missShaderBindingStride, hitShaderBindingTableBuffer, hitShaderBindingOffset,
1791 hitShaderBindingStride, callableShaderBindingTableBuffer, callableShaderBindingOffset, callableShaderBindingStride, width,
1792 height, depth);
1793 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, CMD_TRACERAYSNV);
1794 }
1795
PostCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer,VkBuffer raygenShaderBindingTableBuffer,VkDeviceSize raygenShaderBindingOffset,VkBuffer missShaderBindingTableBuffer,VkDeviceSize missShaderBindingOffset,VkDeviceSize missShaderBindingStride,VkBuffer hitShaderBindingTableBuffer,VkDeviceSize hitShaderBindingOffset,VkDeviceSize hitShaderBindingStride,VkBuffer callableShaderBindingTableBuffer,VkDeviceSize callableShaderBindingOffset,VkDeviceSize callableShaderBindingStride,uint32_t width,uint32_t height,uint32_t depth)1796 void GpuAssisted::PostCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
1797 VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
1798 VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
1799 VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
1800 VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
1801 VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
1802 uint32_t width, uint32_t height, uint32_t depth) {
1803 ValidationStateTracker::PostCallRecordCmdTraceRaysNV(
1804 commandBuffer, raygenShaderBindingTableBuffer, raygenShaderBindingOffset, missShaderBindingTableBuffer,
1805 missShaderBindingOffset, missShaderBindingStride, hitShaderBindingTableBuffer, hitShaderBindingOffset,
1806 hitShaderBindingStride, callableShaderBindingTableBuffer, callableShaderBindingOffset, callableShaderBindingStride, width,
1807 height, depth);
1808 auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1809 cb_state->hasTraceRaysCmd = true;
1810 }
1811
PreCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1812 void GpuAssisted::PreCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
1813 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1814 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1815 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1816 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1817 uint32_t height, uint32_t depth) {
1818 ValidationStateTracker::PreCallRecordCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1819 pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1820 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, CMD_TRACERAYSKHR);
1821 }
1822
PostCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1823 void GpuAssisted::PostCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
1824 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1825 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1826 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1827 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1828 uint32_t height, uint32_t depth) {
1829 ValidationStateTracker::PostCallRecordCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1830 pHitShaderBindingTable, pCallableShaderBindingTable, width, height,
1831 depth);
1832 auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1833 cb_state->hasTraceRaysCmd = true;
1834 }
1835
PreCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1836 void GpuAssisted::PreCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1837 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1838 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1839 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1840 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1841 VkDeviceAddress indirectDeviceAddress) {
1842 ValidationStateTracker::PreCallRecordCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1843 pHitShaderBindingTable, pCallableShaderBindingTable,
1844 indirectDeviceAddress);
1845 AllocateValidationResources(commandBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, CMD_TRACERAYSINDIRECTKHR);
1846 }
1847
PostCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1848 void GpuAssisted::PostCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1849 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1850 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1851 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1852 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1853 VkDeviceAddress indirectDeviceAddress) {
1854 ValidationStateTracker::PostCallRecordCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
1855 pHitShaderBindingTable, pCallableShaderBindingTable,
1856 indirectDeviceAddress);
1857 auto cb_state = Get<CMD_BUFFER_STATE>(commandBuffer);
1858 cb_state->hasTraceRaysCmd = true;
1859 }
1860
1861 // To generate the pre draw validation shader, run the following from the repository base level
1862 // python ./scripts/generate_spirv.py --outfilename ./layers/generated/gpu_pre_draw_shader.h ./layers/gpu_pre_draw_shader.vert
1863 // ./External/glslang/build/install/bin/glslangValidator.exe
1864 #include "gpu_pre_draw_shader.h"
AllocatePreDrawValidationResources(GpuAssistedDeviceMemoryBlock output_block,GpuAssistedPreDrawResources & resources,const LAST_BOUND_STATE & state,VkPipeline * pPipeline,const GpuAssistedCmdDrawIndirectState * cdi_state)1865 void GpuAssisted::AllocatePreDrawValidationResources(GpuAssistedDeviceMemoryBlock output_block,
1866 GpuAssistedPreDrawResources &resources, const LAST_BOUND_STATE &state,
1867 VkPipeline *pPipeline, const GpuAssistedCmdDrawIndirectState *cdi_state) {
1868 VkResult result;
1869 if (!pre_draw_validation_state.globals_created) {
1870 auto shader_module_ci = LvlInitStruct<VkShaderModuleCreateInfo>();
1871 shader_module_ci.codeSize = sizeof(gpu_pre_draw_shader_vert);
1872 shader_module_ci.pCode = gpu_pre_draw_shader_vert;
1873 result =
1874 DispatchCreateShaderModule(device, &shader_module_ci, nullptr, &pre_draw_validation_state.validation_shader_module);
1875 if (result != VK_SUCCESS) {
1876 ReportSetupProblem(device, "Unable to create shader module. Aborting GPU-AV");
1877 aborted = true;
1878 return;
1879 }
1880
1881 std::vector<VkDescriptorSetLayoutBinding> bindings;
1882 VkDescriptorSetLayoutBinding binding = {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT, NULL};
1883 // 0 - output buffer, 1 - count buffer
1884 bindings.push_back(binding);
1885 binding.binding = 1;
1886 bindings.push_back(binding);
1887
1888 VkDescriptorSetLayoutCreateInfo ds_layout_ci = {};
1889 ds_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1890 ds_layout_ci.bindingCount = static_cast<uint32_t>(bindings.size());
1891 ds_layout_ci.pBindings = bindings.data();
1892 result = DispatchCreateDescriptorSetLayout(device, &ds_layout_ci, nullptr, &pre_draw_validation_state.validation_ds_layout);
1893 if (result != VK_SUCCESS) {
1894 ReportSetupProblem(device, "Unable to create descriptor set layout. Aborting GPU-AV");
1895 aborted = true;
1896 return;
1897 }
1898
1899 const uint32_t push_constant_range_count = 1;
1900 VkPushConstantRange push_constant_ranges[push_constant_range_count] = {};
1901 push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
1902 push_constant_ranges[0].offset = 0;
1903 push_constant_ranges[0].size = 4 * sizeof(uint32_t);
1904 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo[1] = {};
1905 pipelineLayoutCreateInfo[0].sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1906 pipelineLayoutCreateInfo[0].pNext = NULL;
1907 pipelineLayoutCreateInfo[0].pushConstantRangeCount = push_constant_range_count;
1908 pipelineLayoutCreateInfo[0].pPushConstantRanges = push_constant_ranges;
1909 pipelineLayoutCreateInfo[0].setLayoutCount = 1;
1910 pipelineLayoutCreateInfo[0].pSetLayouts = &pre_draw_validation_state.validation_ds_layout;
1911 result = DispatchCreatePipelineLayout(device, pipelineLayoutCreateInfo, NULL,
1912 &pre_draw_validation_state.validation_pipeline_layout);
1913 if (result != VK_SUCCESS) {
1914 ReportSetupProblem(device, "Unable to create pipeline layout. Aborting GPU-AV");
1915 aborted = true;
1916 return;
1917 }
1918
1919 pre_draw_validation_state.globals_created = true;
1920 }
1921 VkRenderPass render_pass = state.pipeline_state->rp_state->renderPass();
1922 assert(render_pass != VK_NULL_HANDLE);
1923 auto pipeline = pre_draw_validation_state.renderpass_to_pipeline.find(render_pass);
1924 if (pipeline == pre_draw_validation_state.renderpass_to_pipeline.end()) {
1925 auto pipeline_stage_ci = LvlInitStruct<VkPipelineShaderStageCreateInfo>();
1926 pipeline_stage_ci.stage = VK_SHADER_STAGE_VERTEX_BIT;
1927 pipeline_stage_ci.module = pre_draw_validation_state.validation_shader_module;
1928 pipeline_stage_ci.pName = "main";
1929
1930 auto graphicsPipelineCreateInfo = LvlInitStruct<VkGraphicsPipelineCreateInfo>();
1931 auto vertexInputState = LvlInitStruct<VkPipelineVertexInputStateCreateInfo>();
1932 auto inputAssemblyState = LvlInitStruct<VkPipelineInputAssemblyStateCreateInfo>();
1933 inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1934 auto rasterizationState = LvlInitStruct<VkPipelineRasterizationStateCreateInfo>();
1935 rasterizationState.rasterizerDiscardEnable = VK_TRUE;
1936 auto colorBlendState = LvlInitStruct<VkPipelineColorBlendStateCreateInfo>();
1937
1938 graphicsPipelineCreateInfo.pVertexInputState = &vertexInputState;
1939 graphicsPipelineCreateInfo.pInputAssemblyState = &inputAssemblyState;
1940 graphicsPipelineCreateInfo.pRasterizationState = &rasterizationState;
1941 graphicsPipelineCreateInfo.pColorBlendState = &colorBlendState;
1942 graphicsPipelineCreateInfo.renderPass = render_pass;
1943 graphicsPipelineCreateInfo.layout = pre_draw_validation_state.validation_pipeline_layout;
1944 graphicsPipelineCreateInfo.stageCount = 1;
1945 graphicsPipelineCreateInfo.pStages = &pipeline_stage_ci;
1946
1947 VkPipeline new_pipeline = VK_NULL_HANDLE;
1948 result = DispatchCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &graphicsPipelineCreateInfo, nullptr, &new_pipeline);
1949 if (result != VK_SUCCESS) {
1950 ReportSetupProblem(device, "Unable to create graphics pipeline. Aborting GPU-AV");
1951 aborted = true;
1952 return;
1953 }
1954
1955 *pPipeline = new_pipeline;
1956 pre_draw_validation_state.renderpass_to_pipeline[render_pass] = new_pipeline;
1957 } else {
1958 *pPipeline = pipeline->second;
1959 }
1960
1961 result = desc_set_manager->GetDescriptorSet(&resources.desc_pool, pre_draw_validation_state.validation_ds_layout,
1962 &resources.desc_set);
1963 if (result != VK_SUCCESS) {
1964 ReportSetupProblem(device, "Unable to allocate descriptor set. Aborting GPU-AV");
1965 aborted = true;
1966 return;
1967 }
1968
1969 VkDescriptorBufferInfo buffer_infos[3] = {};
1970 // Error output buffer
1971 buffer_infos[0].buffer = output_block.buffer;
1972 buffer_infos[0].offset = 0;
1973 buffer_infos[0].range = VK_WHOLE_SIZE;
1974 if (cdi_state->count_buffer) {
1975 // Count buffer
1976 buffer_infos[1].buffer = cdi_state->count_buffer;
1977 } else {
1978 // Draw Buffer
1979 buffer_infos[1].buffer = cdi_state->buffer;
1980 }
1981 buffer_infos[1].offset = 0;
1982 buffer_infos[1].range = VK_WHOLE_SIZE;
1983
1984 VkWriteDescriptorSet desc_writes[2] = {};
1985 for (auto i = 0; i < 2; i++) {
1986 desc_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1987 desc_writes[i].dstBinding = i;
1988 desc_writes[i].descriptorCount = 1;
1989 desc_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1990 desc_writes[i].pBufferInfo = &buffer_infos[i];
1991 desc_writes[i].dstSet = resources.desc_set;
1992 }
1993 DispatchUpdateDescriptorSets(device, 2, desc_writes, 0, NULL);
1994 }
1995
AllocateValidationResources(const VkCommandBuffer cmd_buffer,const VkPipelineBindPoint bind_point,CMD_TYPE cmd_type,const GpuAssistedCmdDrawIndirectState * cdi_state)1996 void GpuAssisted::AllocateValidationResources(const VkCommandBuffer cmd_buffer, const VkPipelineBindPoint bind_point,
1997 CMD_TYPE cmd_type, const GpuAssistedCmdDrawIndirectState *cdi_state) {
1998 if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE &&
1999 bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) {
2000 return;
2001 }
2002 VkResult result;
2003
2004 if (aborted) return;
2005
2006 std::vector<VkDescriptorSet> desc_sets;
2007 VkDescriptorPool desc_pool = VK_NULL_HANDLE;
2008 result = desc_set_manager->GetDescriptorSets(1, &desc_pool, debug_desc_layout, &desc_sets);
2009 assert(result == VK_SUCCESS);
2010 if (result != VK_SUCCESS) {
2011 ReportSetupProblem(device, "Unable to allocate descriptor sets. Device could become unstable.");
2012 aborted = true;
2013 return;
2014 }
2015
2016 VkDescriptorBufferInfo output_desc_buffer_info = {};
2017 output_desc_buffer_info.range = output_buffer_size;
2018
2019 auto cb_node = GetCBState(cmd_buffer);
2020 if (!cb_node) {
2021 ReportSetupProblem(device, "Unrecognized command buffer");
2022 aborted = true;
2023 return;
2024 }
2025
2026 // Allocate memory for the output block that the gpu will use to return any error information
2027 GpuAssistedDeviceMemoryBlock output_block = {};
2028 VkBufferCreateInfo buffer_info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
2029 buffer_info.size = output_buffer_size;
2030 buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
2031 VmaAllocationCreateInfo alloc_info = {};
2032 alloc_info.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
2033 result = vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &output_block.buffer, &output_block.allocation, nullptr);
2034 if (result != VK_SUCCESS) {
2035 ReportSetupProblem(device, "Unable to allocate device memory. Device could become unstable.");
2036 aborted = true;
2037 return;
2038 }
2039
2040 // Clear the output block to zeros so that only error information from the gpu will be present
2041 uint32_t *data_ptr;
2042 result = vmaMapMemory(vmaAllocator, output_block.allocation, reinterpret_cast<void **>(&data_ptr));
2043 if (result == VK_SUCCESS) {
2044 memset(data_ptr, 0, output_buffer_size);
2045 vmaUnmapMemory(vmaAllocator, output_block.allocation);
2046 }
2047
2048 GpuAssistedDeviceMemoryBlock di_input_block = {}, bda_input_block = {};
2049 VkDescriptorBufferInfo di_input_desc_buffer_info = {};
2050 VkDescriptorBufferInfo bda_input_desc_buffer_info = {};
2051 VkWriteDescriptorSet desc_writes[3] = {};
2052 GpuAssistedPreDrawResources pre_draw_resources = {};
2053 uint32_t desc_count = 1;
2054 const auto lv_bind_point = ConvertToLvlBindPoint(bind_point);
2055 auto const &state = cb_node->lastBound[lv_bind_point];
2056 uint32_t number_of_sets = static_cast<uint32_t>(state.per_set.size());
2057
2058 if (validate_draw_indirect && ((cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR ||
2059 cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR) ||
2060 ((cmd_type == CMD_DRAWINDIRECT || cmd_type == CMD_DRAWINDEXEDINDIRECT) &&
2061 !(enabled_features.core.drawIndirectFirstInstance)))) {
2062 // Insert a draw that can examine some device memory right before the draw we're validating (Pre Draw Validation)
2063 //
2064 // NOTE that this validation does not attempt to abort invalid api calls as most other validation does. A crash
2065 // or DEVICE_LOST resulting from the invalid call will prevent preceeding validation errors from being reported.
2066
2067 assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
2068 assert(cdi_state != NULL);
2069 VkPipeline validation_pipeline;
2070 AllocatePreDrawValidationResources(output_block, pre_draw_resources, state, &validation_pipeline, cdi_state);
2071 if (aborted) return;
2072
2073 // Save current graphics pipeline state
2074 GPUAV_RESTORABLE_PIPELINE_STATE restorable_state;
2075 restorable_state.Create(cb_node.get(), VK_PIPELINE_BIND_POINT_GRAPHICS);
2076
2077 // Save parameters for error message
2078 pre_draw_resources.buffer = cdi_state->buffer;
2079 pre_draw_resources.offset = cdi_state->offset;
2080 pre_draw_resources.stride = cdi_state->stride;
2081
2082 uint32_t pushConstants[4] = {};
2083 if (cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT ||
2084 cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR) {
2085 if (cdi_state->count_buffer_offset > std::numeric_limits<uint32_t>::max()) {
2086 ReportSetupProblem(device,
2087 "Count buffer offset is larger than can be contained in an unsigned int. Aborting GPU-AV");
2088 aborted = true;
2089 return;
2090 }
2091
2092 // Buffer size must be >= (stride * (drawCount - 1) + offset + sizeof(VkDrawIndirectCommand))
2093 uint32_t struct_size;
2094 if (cmd_type == CMD_DRAWINDIRECTCOUNT || cmd_type == CMD_DRAWINDIRECTCOUNTKHR) {
2095 struct_size = sizeof(VkDrawIndirectCommand);
2096 } else {
2097 assert(cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNT || cmd_type == CMD_DRAWINDEXEDINDIRECTCOUNTKHR);
2098 struct_size = sizeof(VkDrawIndexedIndirectCommand);
2099 }
2100 auto buffer_state = Get<BUFFER_STATE>(cdi_state->buffer);
2101 uint32_t max_count;
2102 uint64_t bufsize = buffer_state->createInfo.size;
2103 uint64_t first_command_bytes = struct_size + cdi_state->offset;
2104 if (first_command_bytes > bufsize) {
2105 max_count = 0;
2106 } else {
2107 max_count = 1 + static_cast<uint32_t>(std::floor(((bufsize - first_command_bytes) / cdi_state->stride)));
2108 }
2109 pre_draw_resources.buf_size = buffer_state->createInfo.size;
2110
2111 assert(phys_dev_props.limits.maxDrawIndirectCount > 0);
2112 pushConstants[0] = phys_dev_props.limits.maxDrawIndirectCount;
2113 pushConstants[1] = max_count;
2114 pushConstants[2] = static_cast<uint32_t>((cdi_state->count_buffer_offset / sizeof(uint32_t)));
2115 } else {
2116 pushConstants[0] = 0; // firstInstance check instead of count buffer check
2117 pushConstants[1] = cdi_state->drawCount;
2118 if (cmd_type == CMD_DRAWINDIRECT) {
2119 pushConstants[2] = static_cast<uint32_t>(
2120 ((cdi_state->offset + offsetof(struct VkDrawIndirectCommand, firstInstance)) / sizeof(uint32_t)));
2121 } else {
2122 assert(cmd_type == CMD_DRAWINDEXEDINDIRECT);
2123 pushConstants[2] = static_cast<uint32_t>(
2124 ((cdi_state->offset + offsetof(struct VkDrawIndexedIndirectCommand, firstInstance)) / sizeof(uint32_t)));
2125 }
2126 pushConstants[3] = (cdi_state->stride / sizeof(uint32_t));
2127 }
2128
2129 // Insert diagnostic draw
2130 DispatchCmdBindPipeline(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, validation_pipeline);
2131 DispatchCmdPushConstants(cmd_buffer, pre_draw_validation_state.validation_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
2132 sizeof(pushConstants), pushConstants);
2133 DispatchCmdBindDescriptorSets(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
2134 pre_draw_validation_state.validation_pipeline_layout, 0, 1, &pre_draw_resources.desc_set, 0,
2135 nullptr);
2136 DispatchCmdDraw(cmd_buffer, 3, 1, 0, 0);
2137
2138 // Restore the previous graphics pipeline state.
2139 restorable_state.Restore(cmd_buffer);
2140 }
2141
2142 bool has_buffers = false;
2143 // Figure out how much memory we need for the input block based on how many sets and bindings there are
2144 // and how big each of the bindings is
2145 if (number_of_sets > 0 && (descriptor_indexing || buffer_oob_enabled)) {
2146 uint32_t descriptor_count = 0; // Number of descriptors, including all array elements
2147 uint32_t binding_count = 0; // Number of bindings based on the max binding number used
2148 for (const auto &s : state.per_set) {
2149 auto desc = s.bound_descriptor_set;
2150 if (desc && (desc->GetBindingCount() > 0)) {
2151 auto bindings = desc->GetLayout()->GetSortedBindingSet();
2152 binding_count += desc->GetLayout()->GetMaxBinding() + 1;
2153 for (auto binding : bindings) {
2154 // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline uniform
2155 // blocks
2156 auto descriptor_type = desc->GetLayout()->GetTypeFromBinding(binding);
2157 if (descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
2158 descriptor_count++;
2159 LogWarning(device, "UNASSIGNED-GPU-Assisted Validation Warning",
2160 "VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT descriptors will not be validated by GPU assisted "
2161 "validation");
2162 } else if (binding == desc->GetLayout()->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) {
2163 descriptor_count += desc->GetVariableDescriptorCount();
2164 } else {
2165 descriptor_count += desc->GetDescriptorCountFromBinding(binding);
2166 }
2167 if (!has_buffers && (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
2168 descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ||
2169 descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
2170 descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2171 descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ||
2172 descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)) {
2173 has_buffers = true;
2174 }
2175 }
2176 }
2177 }
2178
2179 if (descriptor_indexing || has_buffers) {
2180 // Note that the size of the input buffer is dependent on the maximum binding number, which
2181 // can be very large. This is because for (set = s, binding = b, index = i), the validation
2182 // code is going to dereference Input[ i + Input[ b + Input[ s + Input[ Input[0] ] ] ] ] to
2183 // see if descriptors have been written. In gpu_validation.md, we note this and advise
2184 // using densely packed bindings as a best practice when using gpu-av with descriptor indexing
2185 uint32_t words_needed;
2186 if (descriptor_indexing) {
2187 words_needed = 1 + (number_of_sets * 2) + (binding_count * 2) + descriptor_count;
2188 } else {
2189 words_needed = 1 + number_of_sets + binding_count + descriptor_count;
2190 }
2191 alloc_info.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
2192 buffer_info.size = words_needed * 4;
2193 result = vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &di_input_block.buffer, &di_input_block.allocation,
2194 nullptr);
2195 if (result != VK_SUCCESS) {
2196 ReportSetupProblem(device, "Unable to allocate device memory. Device could become unstable.");
2197 aborted = true;
2198 return;
2199 }
2200
2201 // Populate input buffer first with the sizes of every descriptor in every set, then with whether
2202 // each element of each descriptor has been written or not. See gpu_validation.md for a more thourough
2203 // outline of the input buffer format
2204 result = vmaMapMemory(vmaAllocator, di_input_block.allocation, reinterpret_cast<void **>(&data_ptr));
2205 memset(data_ptr, 0, static_cast<size_t>(buffer_info.size));
2206
2207 // Descriptor indexing needs the number of descriptors at each binding.
2208 if (descriptor_indexing) {
2209 // Pointer to a sets array that points into the sizes array
2210 uint32_t *sets_to_sizes = data_ptr + 1;
2211 // Pointer to the sizes array that contains the array size of the descriptor at each binding
2212 uint32_t *sizes = sets_to_sizes + number_of_sets;
2213 // Pointer to another sets array that points into the bindings array that points into the written array
2214 uint32_t *sets_to_bindings = sizes + binding_count;
2215 // Pointer to the bindings array that points at the start of the writes in the writes array for each binding
2216 uint32_t *bindings_to_written = sets_to_bindings + number_of_sets;
2217 // Index of the next entry in the written array to be updated
2218 uint32_t written_index = 1 + (number_of_sets * 2) + (binding_count * 2);
2219 uint32_t bind_counter = number_of_sets + 1;
2220 // Index of the start of the sets_to_bindings array
2221 data_ptr[0] = number_of_sets + binding_count + 1;
2222
2223 for (const auto &s : state.per_set) {
2224 auto desc = s.bound_descriptor_set;
2225 if (desc && (desc->GetBindingCount() > 0)) {
2226 auto layout = desc->GetLayout();
2227 auto bindings = layout->GetSortedBindingSet();
2228 // For each set, fill in index of its bindings sizes in the sizes array
2229 *sets_to_sizes++ = bind_counter;
2230 // For each set, fill in the index of its bindings in the bindings_to_written array
2231 *sets_to_bindings++ = bind_counter + number_of_sets + binding_count;
2232 for (auto binding : bindings) {
2233 // For each binding, fill in its size in the sizes array
2234 // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2235 // uniform blocks
2236 if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2237 sizes[binding] = 1;
2238 } else if (binding == layout->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) {
2239 sizes[binding] = desc->GetVariableDescriptorCount();
2240 } else {
2241 sizes[binding] = desc->GetDescriptorCountFromBinding(binding);
2242 }
2243 // Fill in the starting index for this binding in the written array in the bindings_to_written array
2244 bindings_to_written[binding] = written_index;
2245
2246 // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2247 // uniform blocks
2248 if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2249 data_ptr[written_index++] = UINT_MAX;
2250 continue;
2251 }
2252
2253 auto index_range = desc->GetGlobalIndexRangeFromBinding(binding, true);
2254 // For each array element in the binding, update the written array with whether it has been written
2255 for (uint32_t i = index_range.start; i < index_range.end; ++i) {
2256 auto *descriptor = desc->GetDescriptorFromGlobalIndex(i);
2257 if (descriptor->updated) {
2258 SetDescriptorInitialized(data_ptr, written_index, descriptor);
2259 } else if (desc->IsUpdateAfterBind(binding)) {
2260 // If it hasn't been written now and it's update after bind, put it in a list to check at
2261 // QueueSubmit
2262 di_input_block.update_at_submit[written_index] = descriptor;
2263 }
2264 written_index++;
2265 }
2266 }
2267 auto last = desc->GetLayout()->GetMaxBinding();
2268 bindings_to_written += last + 1;
2269 bind_counter += last + 1;
2270 sizes += last + 1;
2271 } else {
2272 *sets_to_sizes++ = 0;
2273 *sets_to_bindings++ = 0;
2274 }
2275 }
2276 } else {
2277 // If no descriptor indexing, we don't need number of descriptors at each binding, so
2278 // no sets_to_sizes or sizes arrays, just sets_to_bindings, bindings_to_written and written_index
2279
2280 // Pointer to sets array that points into the bindings array that points into the written array
2281 uint32_t *sets_to_bindings = data_ptr + 1;
2282 // Pointer to the bindings array that points at the start of the writes in the writes array for each binding
2283 uint32_t *bindings_to_written = sets_to_bindings + number_of_sets;
2284 // Index of the next entry in the written array to be updated
2285 uint32_t written_index = 1 + number_of_sets + binding_count;
2286 uint32_t bind_counter = number_of_sets + 1;
2287 data_ptr[0] = 1;
2288
2289 for (const auto &s : state.per_set) {
2290 auto desc = s.bound_descriptor_set;
2291 if (desc && (desc->GetBindingCount() > 0)) {
2292 auto layout = desc->GetLayout();
2293 auto bindings = layout->GetSortedBindingSet();
2294 *sets_to_bindings++ = bind_counter;
2295 for (auto binding : bindings) {
2296 // Fill in the starting index for this binding in the written array in the bindings_to_written array
2297 bindings_to_written[binding] = written_index;
2298
2299 // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline
2300 // uniform blocks
2301 if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) {
2302 data_ptr[written_index++] = UINT_MAX;
2303 continue;
2304 }
2305
2306 auto index_range = desc->GetGlobalIndexRangeFromBinding(binding, true);
2307
2308 // For each array element in the binding, update the written array with whether it has been written
2309 for (uint32_t i = index_range.start; i < index_range.end; ++i) {
2310 auto *descriptor = desc->GetDescriptorFromGlobalIndex(i);
2311 if (descriptor->updated) {
2312 SetDescriptorInitialized(data_ptr, written_index, descriptor);
2313 } else if (desc->IsUpdateAfterBind(binding)) {
2314 // If it hasn't been written now and it's update after bind, put it in a list to check at
2315 // QueueSubmit
2316 di_input_block.update_at_submit[written_index] = descriptor;
2317 }
2318 written_index++;
2319 }
2320 }
2321 auto last = desc->GetLayout()->GetMaxBinding();
2322 bindings_to_written += last + 1;
2323 bind_counter += last + 1;
2324 } else {
2325 *sets_to_bindings++ = 0;
2326 }
2327 }
2328 }
2329 vmaUnmapMemory(vmaAllocator, di_input_block.allocation);
2330
2331 di_input_desc_buffer_info.range = (words_needed * 4);
2332 di_input_desc_buffer_info.buffer = di_input_block.buffer;
2333 di_input_desc_buffer_info.offset = 0;
2334
2335 desc_writes[1] = LvlInitStruct<VkWriteDescriptorSet>();
2336 desc_writes[1].dstBinding = 1;
2337 desc_writes[1].descriptorCount = 1;
2338 desc_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2339 desc_writes[1].pBufferInfo = &di_input_desc_buffer_info;
2340 desc_writes[1].dstSet = desc_sets[0];
2341
2342 desc_count = 2;
2343 }
2344 }
2345
2346 if ((IsExtEnabled(device_extensions.vk_ext_buffer_device_address) ||
2347 IsExtEnabled(device_extensions.vk_khr_buffer_device_address)) &&
2348 buffer_map.size() && shaderInt64 && enabled_features.core12.bufferDeviceAddress) {
2349 // Example BDA input buffer assuming 2 buffers using BDA:
2350 // Word 0 | Index of start of buffer sizes (in this case 5)
2351 // Word 1 | 0x0000000000000000
2352 // Word 2 | Device Address of first buffer (Addresses sorted in ascending order)
2353 // Word 3 | Device Address of second buffer
2354 // Word 4 | 0xffffffffffffffff
2355 // Word 5 | 0 (size of pretend buffer at word 1)
2356 // Word 6 | Size in bytes of first buffer
2357 // Word 7 | Size in bytes of second buffer
2358 // Word 8 | 0 (size of pretend buffer in word 4)
2359
2360 uint32_t num_buffers = static_cast<uint32_t>(buffer_map.size());
2361 uint32_t words_needed = (num_buffers + 3) + (num_buffers + 2);
2362 alloc_info.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
2363 buffer_info.size = words_needed * 8; // 64 bit words
2364 result =
2365 vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &bda_input_block.buffer, &bda_input_block.allocation, nullptr);
2366 if (result != VK_SUCCESS) {
2367 ReportSetupProblem(device, "Unable to allocate device memory. Device could become unstable.");
2368 aborted = true;
2369 return;
2370 }
2371 uint64_t *bda_data;
2372 result = vmaMapMemory(vmaAllocator, bda_input_block.allocation, reinterpret_cast<void **>(&bda_data));
2373 uint32_t address_index = 1;
2374 uint32_t size_index = 3 + num_buffers;
2375 memset(bda_data, 0, static_cast<size_t>(buffer_info.size));
2376 bda_data[0] = size_index; // Start of buffer sizes
2377 bda_data[address_index++] = 0; // NULL address
2378 bda_data[size_index++] = 0;
2379
2380 for (const auto &value : buffer_map) {
2381 bda_data[address_index++] = value.first;
2382 bda_data[size_index++] = value.second;
2383 }
2384 bda_data[address_index] = UINTPTR_MAX;
2385 bda_data[size_index] = 0;
2386 vmaUnmapMemory(vmaAllocator, bda_input_block.allocation);
2387
2388 bda_input_desc_buffer_info.range = (words_needed * 8);
2389 bda_input_desc_buffer_info.buffer = bda_input_block.buffer;
2390 bda_input_desc_buffer_info.offset = 0;
2391
2392 desc_writes[desc_count] = LvlInitStruct<VkWriteDescriptorSet>();
2393 desc_writes[desc_count].dstBinding = 2;
2394 desc_writes[desc_count].descriptorCount = 1;
2395 desc_writes[desc_count].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2396 desc_writes[desc_count].pBufferInfo = &bda_input_desc_buffer_info;
2397 desc_writes[desc_count].dstSet = desc_sets[0];
2398 desc_count++;
2399 }
2400
2401 // Write the descriptor
2402 output_desc_buffer_info.buffer = output_block.buffer;
2403 output_desc_buffer_info.offset = 0;
2404
2405 desc_writes[0] = LvlInitStruct<VkWriteDescriptorSet>();
2406 desc_writes[0].descriptorCount = 1;
2407 desc_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2408 desc_writes[0].pBufferInfo = &output_desc_buffer_info;
2409 desc_writes[0].dstSet = desc_sets[0];
2410 DispatchUpdateDescriptorSets(device, desc_count, desc_writes, 0, NULL);
2411
2412 const auto *pipeline_state = state.pipeline_state;
2413 if (pipeline_state) {
2414 if ((pipeline_state->pipeline_layout->set_layouts.size() <= desc_set_bind_index) &&
2415 !pipeline_state->pipeline_layout->Destroyed()) {
2416 DispatchCmdBindDescriptorSets(cmd_buffer, bind_point, pipeline_state->pipeline_layout->layout(), desc_set_bind_index, 1,
2417 desc_sets.data(), 0, nullptr);
2418 }
2419 if (pipeline_state->pipeline_layout->Destroyed()) {
2420 ReportSetupProblem(device, "Pipeline layout has been destroyed, aborting GPU-AV");
2421 aborted = true;
2422 } else {
2423 // Record buffer and memory info in CB state tracking
2424 cb_node->gpuav_buffer_list.emplace_back(output_block, di_input_block, bda_input_block, pre_draw_resources, desc_sets[0],
2425 desc_pool, bind_point, cmd_type);
2426 }
2427 } else {
2428 ReportSetupProblem(device, "Unable to find pipeline state");
2429 aborted = true;
2430 }
2431 if (aborted) {
2432 vmaDestroyBuffer(vmaAllocator, di_input_block.buffer, di_input_block.allocation);
2433 vmaDestroyBuffer(vmaAllocator, bda_input_block.buffer, bda_input_block.allocation);
2434 vmaDestroyBuffer(vmaAllocator, output_block.buffer, output_block.allocation);
2435 return;
2436 }
2437 }
2438
CreateCmdBufferState(VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)2439 std::shared_ptr<CMD_BUFFER_STATE> GpuAssisted::CreateCmdBufferState(VkCommandBuffer cb,
2440 const VkCommandBufferAllocateInfo *pCreateInfo,
2441 const COMMAND_POOL_STATE *pool) {
2442 return std::static_pointer_cast<CMD_BUFFER_STATE>(std::make_shared<CMD_BUFFER_STATE_GPUAV>(this, cb, pCreateInfo, pool));
2443 }
2444
CMD_BUFFER_STATE_GPUAV(GpuAssisted * ga,VkCommandBuffer cb,const VkCommandBufferAllocateInfo * pCreateInfo,const COMMAND_POOL_STATE * pool)2445 CMD_BUFFER_STATE_GPUAV::CMD_BUFFER_STATE_GPUAV(GpuAssisted *ga, VkCommandBuffer cb, const VkCommandBufferAllocateInfo *pCreateInfo,
2446 const COMMAND_POOL_STATE *pool)
2447 : CMD_BUFFER_STATE(ga, cb, pCreateInfo, pool) {}
2448
Reset()2449 void CMD_BUFFER_STATE_GPUAV::Reset() {
2450 CMD_BUFFER_STATE::Reset();
2451 auto gpuav = static_cast<GpuAssisted *>(dev_data);
2452 // Free the device memory and descriptor set(s) associated with a command buffer.
2453 if (gpuav->aborted) {
2454 return;
2455 }
2456 for (auto &buffer_info : gpuav_buffer_list) {
2457 gpuav->DestroyBuffer(buffer_info);
2458 }
2459 gpuav_buffer_list.clear();
2460
2461 for (auto &as_validation_buffer_info : as_validation_buffers) {
2462 gpuav->DestroyBuffer(as_validation_buffer_info);
2463 }
2464 as_validation_buffers.clear();
2465 }
2466