1 /* Copyright (c) 2017-2018 Hans-Kristian Arntzen
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "command_buffer.hpp"
24 #include "device.hpp"
25 #include "format.hpp"
26 #include <string.h>
27 
28 using namespace std;
29 using namespace Util;
30 
31 namespace Vulkan
32 {
CommandBuffer(Device * device,VkCommandBuffer cmd,VkPipelineCache cache,Type type)33 CommandBuffer::CommandBuffer(Device *device, VkCommandBuffer cmd, VkPipelineCache cache, Type type)
34     : device(device)
35     , cmd(cmd)
36     , cache(cache)
37     , type(type)
38 {
39 	begin_compute();
40 	set_opaque_state();
41 	memset(&static_state, 0, sizeof(static_state));
42 	memset(&bindings, 0, sizeof(bindings));
43 }
44 
~CommandBuffer()45 CommandBuffer::~CommandBuffer()
46 {
47 	VK_ASSERT(vbo_block.mapped == nullptr);
48 	VK_ASSERT(ibo_block.mapped == nullptr);
49 	VK_ASSERT(ubo_block.mapped == nullptr);
50 	VK_ASSERT(staging_block.mapped == nullptr);
51 }
52 
fill_buffer(const Buffer & dst,uint32_t value)53 void CommandBuffer::fill_buffer(const Buffer &dst, uint32_t value)
54 {
55 	fill_buffer(dst, value, 0, VK_WHOLE_SIZE);
56 }
57 
fill_buffer(const Buffer & dst,uint32_t value,VkDeviceSize offset,VkDeviceSize size)58 void CommandBuffer::fill_buffer(const Buffer &dst, uint32_t value, VkDeviceSize offset, VkDeviceSize size)
59 {
60 	vkCmdFillBuffer(cmd, dst.get_buffer(), offset, size, value);
61 }
62 
copy_buffer(const Buffer & dst,VkDeviceSize dst_offset,const Buffer & src,VkDeviceSize src_offset,VkDeviceSize size)63 void CommandBuffer::copy_buffer(const Buffer &dst, VkDeviceSize dst_offset, const Buffer &src, VkDeviceSize src_offset,
64                                 VkDeviceSize size)
65 {
66 	const VkBufferCopy region = {
67 		src_offset, dst_offset, size,
68 	};
69 	vkCmdCopyBuffer(cmd, src.get_buffer(), dst.get_buffer(), 1, &region);
70 }
71 
copy_buffer(const Buffer & dst,const Buffer & src)72 void CommandBuffer::copy_buffer(const Buffer &dst, const Buffer &src)
73 {
74 	VK_ASSERT(dst.get_create_info().size == src.get_create_info().size);
75 	copy_buffer(dst, 0, src, 0, dst.get_create_info().size);
76 }
77 
copy_image(const Vulkan::Image & dst,const Vulkan::Image & src,const VkOffset3D & dst_offset,const VkOffset3D & src_offset,const VkExtent3D & extent,const VkImageSubresourceLayers & dst_subresource,const VkImageSubresourceLayers & src_subresource)78 void CommandBuffer::copy_image(const Vulkan::Image &dst, const Vulkan::Image &src, const VkOffset3D &dst_offset,
79                                const VkOffset3D &src_offset, const VkExtent3D &extent,
80                                const VkImageSubresourceLayers &dst_subresource,
81                                const VkImageSubresourceLayers &src_subresource)
82 {
83 	VkImageCopy region = {};
84 	region.dstOffset = dst_offset;
85 	region.srcOffset = src_offset;
86 	region.extent = extent;
87 	region.srcSubresource = src_subresource;
88 	region.dstSubresource = dst_subresource;
89 
90 	vkCmdCopyImage(cmd, src.get_image(), src.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
91 	               dst.get_image(), dst.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
92 	               1, &region);
93 }
94 
copy_image(const Image & dst,const Image & src)95 void CommandBuffer::copy_image(const Image &dst, const Image &src)
96 {
97 	uint32_t levels = src.get_create_info().levels;
98 	VK_ASSERT(src.get_create_info().levels == dst.get_create_info().levels);
99 	VK_ASSERT(src.get_create_info().width == dst.get_create_info().width);
100 	VK_ASSERT(src.get_create_info().height == dst.get_create_info().height);
101 	VK_ASSERT(src.get_create_info().depth == dst.get_create_info().depth);
102 	VK_ASSERT(src.get_create_info().type == dst.get_create_info().type);
103 	VK_ASSERT(src.get_create_info().layers == dst.get_create_info().layers);
104 	VK_ASSERT(src.get_create_info().levels == dst.get_create_info().levels);
105 
106 	VkImageCopy regions[32] = {};
107 
108 	for (uint32_t i = 0; i < levels; i++)
109 	{
110 		auto &region = regions[i];
111 		region.extent.width = src.get_create_info().width;
112 		region.extent.height = src.get_create_info().height;
113 		region.extent.depth = src.get_create_info().depth;
114 		region.srcSubresource.aspectMask = format_to_aspect_mask(src.get_format());
115 		region.srcSubresource.layerCount = src.get_create_info().layers;
116 		region.dstSubresource.aspectMask = format_to_aspect_mask(dst.get_format());
117 		region.dstSubresource.layerCount = dst.get_create_info().layers;
118 		region.srcSubresource.mipLevel = i;
119 		region.dstSubresource.mipLevel = i;
120 		VK_ASSERT(region.srcSubresource.aspectMask == region.dstSubresource.aspectMask);
121 	}
122 
123 	vkCmdCopyImage(cmd, src.get_image(), src.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
124 	               dst.get_image(), dst.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
125 	               levels, regions);
126 }
127 
copy_buffer_to_image(const Image & image,const Buffer & buffer,unsigned num_blits,const VkBufferImageCopy * blits)128 void CommandBuffer::copy_buffer_to_image(const Image &image, const Buffer &buffer, unsigned num_blits,
129                                          const VkBufferImageCopy *blits)
130 {
131 	vkCmdCopyBufferToImage(cmd, buffer.get_buffer(),
132 	                       image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL), num_blits, blits);
133 }
134 
copy_image_to_buffer(const Buffer & buffer,const Image & image,unsigned num_blits,const VkBufferImageCopy * blits)135 void CommandBuffer::copy_image_to_buffer(const Buffer &buffer, const Image &image, unsigned num_blits,
136                                          const VkBufferImageCopy *blits)
137 {
138 	vkCmdCopyImageToBuffer(cmd, image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
139 	                       buffer.get_buffer(), num_blits, blits);
140 }
141 
copy_buffer_to_image(const Image & image,const Buffer & src,VkDeviceSize buffer_offset,const VkOffset3D & offset,const VkExtent3D & extent,unsigned row_length,unsigned slice_height,const VkImageSubresourceLayers & subresource)142 void CommandBuffer::copy_buffer_to_image(const Image &image, const Buffer &src, VkDeviceSize buffer_offset,
143                                          const VkOffset3D &offset, const VkExtent3D &extent, unsigned row_length,
144                                          unsigned slice_height, const VkImageSubresourceLayers &subresource)
145 {
146 	const VkBufferImageCopy region = {
147 		buffer_offset,
148 		row_length != extent.width ? row_length : 0, slice_height != extent.height ? slice_height : 0,
149 		subresource, offset, extent,
150 	};
151 	vkCmdCopyBufferToImage(cmd, src.get_buffer(), image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
152 	                       1, &region);
153 }
154 
copy_image_to_buffer(const Buffer & buffer,const Image & image,VkDeviceSize buffer_offset,const VkOffset3D & offset,const VkExtent3D & extent,unsigned row_length,unsigned slice_height,const VkImageSubresourceLayers & subresource)155 void CommandBuffer::copy_image_to_buffer(const Buffer &buffer, const Image &image, VkDeviceSize buffer_offset,
156                                          const VkOffset3D &offset, const VkExtent3D &extent, unsigned row_length,
157                                          unsigned slice_height, const VkImageSubresourceLayers &subresource)
158 {
159 	const VkBufferImageCopy region = {
160 		buffer_offset,
161 		row_length != extent.width ? row_length : 0, slice_height != extent.height ? slice_height : 0,
162 		subresource, offset, extent,
163 	};
164 	vkCmdCopyImageToBuffer(cmd, image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
165 	                       buffer.get_buffer(), 1, &region);
166 }
167 
clear_image(const Image & image,const VkClearValue & value)168 void CommandBuffer::clear_image(const Image &image, const VkClearValue &value)
169 {
170 	VK_ASSERT(!framebuffer);
171 	VK_ASSERT(!actual_render_pass);
172 
173 	auto aspect = format_to_aspect_mask(image.get_format());
174 	VkImageSubresourceRange range = {};
175 	range.aspectMask = aspect;
176 	range.baseArrayLayer = 0;
177 	range.baseMipLevel = 0;
178 	range.levelCount = image.get_create_info().levels;
179 	range.layerCount = image.get_create_info().layers;
180 	if (aspect & VK_IMAGE_ASPECT_COLOR_BIT)
181 	{
182 		vkCmdClearColorImage(cmd, image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
183 		                     &value.color, 1, &range);
184 	}
185 	else
186 	{
187 		vkCmdClearDepthStencilImage(cmd, image.get_image(), image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
188 		                            &value.depthStencil, 1, &range);
189 	}
190 }
191 
clear_quad(unsigned attachment,const VkClearRect & rect,const VkClearValue & value,VkImageAspectFlags aspect)192 void CommandBuffer::clear_quad(unsigned attachment, const VkClearRect &rect, const VkClearValue &value,
193                                VkImageAspectFlags aspect)
194 {
195 	VK_ASSERT(framebuffer);
196 	VK_ASSERT(actual_render_pass);
197 	VkClearAttachment att = {};
198 	att.clearValue = value;
199 	att.colorAttachment = attachment;
200 	att.aspectMask = aspect;
201 	vkCmdClearAttachments(cmd, 1, &att, 1, &rect);
202 }
203 
clear_quad(const VkClearRect & rect,const VkClearAttachment * attachments,unsigned num_attachments)204 void CommandBuffer::clear_quad(const VkClearRect &rect, const VkClearAttachment *attachments, unsigned num_attachments)
205 {
206 	VK_ASSERT(framebuffer);
207 	VK_ASSERT(actual_render_pass);
208 	vkCmdClearAttachments(cmd, num_attachments, attachments, 1, &rect);
209 }
210 
full_barrier()211 void CommandBuffer::full_barrier()
212 {
213 	VK_ASSERT(!actual_render_pass);
214 	VK_ASSERT(!framebuffer);
215 	barrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
216 	        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT |
217 	            VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
218 	        VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
219 	        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
220 	            VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
221 	            VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT);
222 }
223 
pixel_barrier()224 void CommandBuffer::pixel_barrier()
225 {
226 	VK_ASSERT(actual_render_pass);
227 	VK_ASSERT(framebuffer);
228 	VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
229 	barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
230 	barrier.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
231 	vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
232 	                     VK_DEPENDENCY_BY_REGION_BIT, 1, &barrier, 0, nullptr, 0, nullptr);
233 }
234 
fixup_src_stage(VkPipelineStageFlags & src_stages,bool fixup)235 static inline void fixup_src_stage(VkPipelineStageFlags &src_stages, bool fixup)
236 {
237 	// ALL_GRAPHICS_BIT waits for vertex as well which causes performance issues on some drivers.
238 	// It shouldn't matter, but hey.
239 	//
240 	// We aren't using vertex with side-effects on relevant hardware so dropping VERTEX_SHADER_BIT is fine.
241 	if ((src_stages & VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT) != 0 && fixup)
242 	{
243 		src_stages &= ~VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
244 		src_stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
245 		              VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
246 		              VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
247 	}
248 }
249 
barrier(VkPipelineStageFlags src_stages,VkAccessFlags src_access,VkPipelineStageFlags dst_stages,VkAccessFlags dst_access)250 void CommandBuffer::barrier(VkPipelineStageFlags src_stages, VkAccessFlags src_access, VkPipelineStageFlags dst_stages,
251                             VkAccessFlags dst_access)
252 {
253 	VK_ASSERT(!actual_render_pass);
254 	VK_ASSERT(!framebuffer);
255 	VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
256 	barrier.srcAccessMask = src_access;
257 	barrier.dstAccessMask = dst_access;
258 	fixup_src_stage(src_stages, device->get_workarounds().optimize_all_graphics_barrier);
259 	vkCmdPipelineBarrier(cmd, src_stages, dst_stages, 0, 1, &barrier, 0, nullptr, 0, nullptr);
260 }
261 
barrier(VkPipelineStageFlags src_stages,VkPipelineStageFlags dst_stages,unsigned barriers,const VkMemoryBarrier * globals,unsigned buffer_barriers,const VkBufferMemoryBarrier * buffers,unsigned image_barriers,const VkImageMemoryBarrier * images)262 void CommandBuffer::barrier(VkPipelineStageFlags src_stages, VkPipelineStageFlags dst_stages, unsigned barriers,
263                             const VkMemoryBarrier *globals, unsigned buffer_barriers,
264                             const VkBufferMemoryBarrier *buffers, unsigned image_barriers,
265                             const VkImageMemoryBarrier *images)
266 {
267 	VK_ASSERT(!actual_render_pass);
268 	VK_ASSERT(!framebuffer);
269 	fixup_src_stage(src_stages, device->get_workarounds().optimize_all_graphics_barrier);
270 	vkCmdPipelineBarrier(cmd, src_stages, dst_stages, 0, barriers, globals, buffer_barriers, buffers, image_barriers, images);
271 }
272 
buffer_barrier(const Buffer & buffer,VkPipelineStageFlags src_stages,VkAccessFlags src_access,VkPipelineStageFlags dst_stages,VkAccessFlags dst_access)273 void CommandBuffer::buffer_barrier(const Buffer &buffer, VkPipelineStageFlags src_stages, VkAccessFlags src_access,
274                                    VkPipelineStageFlags dst_stages, VkAccessFlags dst_access)
275 {
276 	VK_ASSERT(!actual_render_pass);
277 	VK_ASSERT(!framebuffer);
278 	VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
279 	barrier.srcAccessMask = src_access;
280 	barrier.dstAccessMask = dst_access;
281 	barrier.buffer = buffer.get_buffer();
282 	barrier.offset = 0;
283 	barrier.size = buffer.get_create_info().size;
284 
285 	fixup_src_stage(src_stages, device->get_workarounds().optimize_all_graphics_barrier);
286 	vkCmdPipelineBarrier(cmd, src_stages, dst_stages, 0, 0, nullptr, 1, &barrier, 0, nullptr);
287 }
288 
image_barrier(const Image & image,VkImageLayout old_layout,VkImageLayout new_layout,VkPipelineStageFlags src_stages,VkAccessFlags src_access,VkPipelineStageFlags dst_stages,VkAccessFlags dst_access)289 void CommandBuffer::image_barrier(const Image &image, VkImageLayout old_layout, VkImageLayout new_layout,
290                                   VkPipelineStageFlags src_stages, VkAccessFlags src_access,
291                                   VkPipelineStageFlags dst_stages, VkAccessFlags dst_access)
292 {
293 	VK_ASSERT(!actual_render_pass);
294 	VK_ASSERT(!framebuffer);
295 	VK_ASSERT(image.get_create_info().domain != ImageDomain::Transient);
296 
297 	VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
298 	barrier.srcAccessMask = src_access;
299 	barrier.dstAccessMask = dst_access;
300 	barrier.oldLayout = old_layout;
301 	barrier.newLayout = new_layout;
302 	barrier.image = image.get_image();
303 	barrier.subresourceRange.aspectMask = format_to_aspect_mask(image.get_create_info().format);
304 	barrier.subresourceRange.levelCount = image.get_create_info().levels;
305 	barrier.subresourceRange.layerCount = image.get_create_info().layers;
306 	barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
307 	barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
308 
309 	fixup_src_stage(src_stages, device->get_workarounds().optimize_all_graphics_barrier);
310 	vkCmdPipelineBarrier(cmd, src_stages, dst_stages, 0, 0, nullptr, 0, nullptr, 1, &barrier);
311 }
312 
barrier_prepare_generate_mipmap(const Image & image,VkImageLayout base_level_layout,VkPipelineStageFlags src_stage,VkAccessFlags src_access,bool need_top_level_barrier)313 void CommandBuffer::barrier_prepare_generate_mipmap(const Image &image, VkImageLayout base_level_layout,
314                                                     VkPipelineStageFlags src_stage, VkAccessFlags src_access,
315                                                     bool need_top_level_barrier)
316 {
317 	auto &create_info = image.get_create_info();
318 	VkImageMemoryBarrier barriers[2] = {};
319 	VK_ASSERT(create_info.levels > 1);
320 	(void)create_info;
321 
322 	for (unsigned i = 0; i < 2; i++)
323 	{
324 		barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
325 		barriers[i].image = image.get_image();
326 		barriers[i].subresourceRange.aspectMask = format_to_aspect_mask(image.get_format());
327 		barriers[i].subresourceRange.layerCount = image.get_create_info().layers;
328 		barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
329 		barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
330 
331 		if (i == 0)
332 		{
333 			barriers[i].oldLayout = base_level_layout;
334 			barriers[i].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
335 			barriers[i].srcAccessMask = src_access;
336 			barriers[i].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
337 			barriers[i].subresourceRange.baseMipLevel = 0;
338 			barriers[i].subresourceRange.levelCount = 1;
339 		}
340 		else
341 		{
342 			barriers[i].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
343 			barriers[i].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
344 			barriers[i].srcAccessMask = 0;
345 			barriers[i].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
346 			barriers[i].subresourceRange.baseMipLevel = 1;
347 			barriers[i].subresourceRange.levelCount = image.get_create_info().levels - 1;
348 		}
349 	}
350 
351 	barrier(src_stage, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, 0, nullptr,
352 	        need_top_level_barrier ? 2 : 1,
353 	        need_top_level_barrier ? barriers : barriers + 1);
354 }
355 
generate_mipmap(const Image & image)356 void CommandBuffer::generate_mipmap(const Image &image)
357 {
358 	auto &create_info = image.get_create_info();
359 	VkOffset3D size = { int(create_info.width), int(create_info.height), int(create_info.depth) };
360 	const VkOffset3D origin = { 0, 0, 0 };
361 
362 	VK_ASSERT(image.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
363 
364 	VkImageMemoryBarrier b = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
365 	b.image = image.get_image();
366 	b.subresourceRange.levelCount = 1;
367 	b.subresourceRange.layerCount = image.get_create_info().layers;
368 	b.subresourceRange.aspectMask = format_to_aspect_mask(image.get_format());
369 	b.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
370 	b.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
371 	b.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
372 	b.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
373 	b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
374 	b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
375 
376 	for (unsigned i = 1; i < create_info.levels; i++)
377 	{
378 		VkOffset3D src_size = size;
379 		size.x = max(size.x >> 1, 1);
380 		size.y = max(size.y >> 1, 1);
381 		size.z = max(size.z >> 1, 1);
382 
383 		blit_image(image, image,
384 		           origin, size, origin, src_size, i, i - 1, 0, 0, create_info.layers, VK_FILTER_LINEAR);
385 
386 		b.subresourceRange.baseMipLevel = i;
387 		barrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
388 		        0, nullptr, 0, nullptr, 1, &b);
389 	}
390 }
391 
blit_image(const Image & dst,const Image & src,const VkOffset3D & dst_offset,const VkOffset3D & dst_extent,const VkOffset3D & src_offset,const VkOffset3D & src_extent,unsigned dst_level,unsigned src_level,unsigned dst_base_layer,unsigned src_base_layer,unsigned num_layers,VkFilter filter)392 void CommandBuffer::blit_image(const Image &dst, const Image &src,
393                                const VkOffset3D &dst_offset,
394                                const VkOffset3D &dst_extent, const VkOffset3D &src_offset, const VkOffset3D &src_extent,
395                                unsigned dst_level, unsigned src_level, unsigned dst_base_layer, unsigned src_base_layer,
396                                unsigned num_layers, VkFilter filter)
397 {
398 	const auto add_offset = [](const VkOffset3D &a, const VkOffset3D &b) -> VkOffset3D {
399 		return { a.x + b.x, a.y + b.y, a.z + b.z };
400 	};
401 
402 #if 0
403 	const VkImageBlit blit = {
404 		{ format_to_aspect_mask(src.get_create_info().format), src_level, src_base_layer, num_layers },
405 		{ src_offset, add_offset(src_offset, src_extent) },
406 		{ format_to_aspect_mask(dst.get_create_info().format), dst_level, dst_base_layer, num_layers },
407 		{ dst_offset, add_offset(dst_offset, dst_extent) },
408 	};
409 
410 	vkCmdBlitImage(cmd,
411 	               src.get_image(), src.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
412 	               dst.get_image(), dst.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
413 	               1, &blit, filter);
414 #else
415 	// RADV workaround.
416 	for (unsigned i = 0; i < num_layers; i++)
417 	{
418 		const VkImageBlit blit = {
419 				{ format_to_aspect_mask(src.get_create_info().format), src_level, src_base_layer + i, 1 },
420 				{ src_offset,                                          add_offset(src_offset, src_extent) },
421 				{ format_to_aspect_mask(dst.get_create_info().format), dst_level, dst_base_layer + i, 1 },
422 				{ dst_offset,                                          add_offset(dst_offset, dst_extent) },
423 		};
424 
425 		vkCmdBlitImage(cmd,
426 		               src.get_image(), src.get_layout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
427 		               dst.get_image(), dst.get_layout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
428 		               1, &blit, filter);
429 	}
430 #endif
431 }
432 
begin_context()433 void CommandBuffer::begin_context()
434 {
435 	dirty = ~0u;
436 	dirty_sets = ~0u;
437 	dirty_vbos = ~0u;
438 	current_pipeline = VK_NULL_HANDLE;
439 	current_pipeline_layout = VK_NULL_HANDLE;
440 	current_layout = nullptr;
441 	current_program = nullptr;
442 	memset(bindings.cookies, 0, sizeof(bindings.cookies));
443 	memset(bindings.secondary_cookies, 0, sizeof(bindings.secondary_cookies));
444 	memset(&index, 0, sizeof(index));
445 	memset(vbo.buffers, 0, sizeof(vbo.buffers));
446 }
447 
begin_compute()448 void CommandBuffer::begin_compute()
449 {
450 	is_compute = true;
451 	begin_context();
452 }
453 
begin_graphics()454 void CommandBuffer::begin_graphics()
455 {
456 	is_compute = false;
457 	begin_context();
458 }
459 
init_viewport_scissor(const RenderPassInfo & info,const Framebuffer * framebuffer)460 void CommandBuffer::init_viewport_scissor(const RenderPassInfo &info, const Framebuffer *framebuffer)
461 {
462 	VkRect2D rect = info.render_area;
463 	rect.offset.x = min(framebuffer->get_width(), uint32_t(rect.offset.x));
464 	rect.offset.y = min(framebuffer->get_height(), uint32_t(rect.offset.y));
465 	rect.extent.width = min(framebuffer->get_width() - rect.offset.x, rect.extent.width);
466 	rect.extent.height = min(framebuffer->get_height() - rect.offset.y, rect.extent.height);
467 
468 	viewport = { 0.0f, 0.0f, float(framebuffer->get_width()), float(framebuffer->get_height()), 0.0f, 1.0f };
469 	scissor = rect;
470 }
471 
request_secondary_command_buffer(Device & device,const RenderPassInfo & info,unsigned thread_index,unsigned subpass)472 CommandBufferHandle CommandBuffer::request_secondary_command_buffer(Device &device, const RenderPassInfo &info,
473                                                                     unsigned thread_index, unsigned subpass)
474 {
475 	auto *fb = &device.request_framebuffer(info);
476 	auto cmd = device.request_secondary_command_buffer_for_thread(thread_index, fb, subpass);
477 	cmd->begin_graphics();
478 
479 	cmd->framebuffer = fb;
480 	cmd->compatible_render_pass = &fb->get_compatible_render_pass();
481 	cmd->actual_render_pass = &device.request_render_pass(info, false);
482 
483 	cmd->init_viewport_scissor(info, fb);
484 	cmd->current_subpass = subpass;
485 	cmd->current_contents = VK_SUBPASS_CONTENTS_INLINE;
486 
487 	return cmd;
488 }
489 
request_secondary_command_buffer(unsigned thread_index,unsigned subpass)490 CommandBufferHandle CommandBuffer::request_secondary_command_buffer(unsigned thread_index, unsigned subpass)
491 {
492 	VK_ASSERT(framebuffer);
493 	VK_ASSERT(!is_secondary);
494 
495 	auto cmd = device->request_secondary_command_buffer_for_thread(thread_index, framebuffer, subpass);
496 	cmd->begin_graphics();
497 
498 	cmd->framebuffer = framebuffer;
499 	cmd->compatible_render_pass = compatible_render_pass;
500 	cmd->actual_render_pass = actual_render_pass;
501 
502 	cmd->current_subpass = subpass;
503 	cmd->viewport = viewport;
504 	cmd->scissor = scissor;
505 	cmd->current_contents = VK_SUBPASS_CONTENTS_INLINE;
506 
507 	return cmd;
508 }
509 
submit_secondary(CommandBufferHandle secondary)510 void CommandBuffer::submit_secondary(CommandBufferHandle secondary)
511 {
512 	VK_ASSERT(!is_secondary);
513 	VK_ASSERT(secondary->is_secondary);
514 	VK_ASSERT(current_subpass == secondary->current_subpass);
515 	VK_ASSERT(current_contents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
516 
517 	device->submit_secondary(*this, *secondary);
518 }
519 
next_subpass(VkSubpassContents contents)520 void CommandBuffer::next_subpass(VkSubpassContents contents)
521 {
522 	VK_ASSERT(framebuffer);
523 	VK_ASSERT(compatible_render_pass);
524 	VK_ASSERT(actual_render_pass);
525 	current_subpass++;
526 	VK_ASSERT(current_subpass < actual_render_pass->get_num_subpasses());
527 	vkCmdNextSubpass(cmd, contents);
528 	current_contents = contents;
529 	begin_graphics();
530 }
531 
begin_render_pass(const RenderPassInfo & info,VkSubpassContents contents)532 void CommandBuffer::begin_render_pass(const RenderPassInfo &info, VkSubpassContents contents)
533 {
534 	VK_ASSERT(!framebuffer);
535 	VK_ASSERT(!compatible_render_pass);
536 	VK_ASSERT(!actual_render_pass);
537 
538 	framebuffer = &device->request_framebuffer(info);
539 	compatible_render_pass = &framebuffer->get_compatible_render_pass();
540 	actual_render_pass = &device->request_render_pass(info, false);
541 
542 	init_viewport_scissor(info, framebuffer);
543 
544 	VkClearValue clear_values[VULKAN_NUM_ATTACHMENTS + 1];
545 	unsigned num_clear_values = 0;
546 
547 	for (unsigned i = 0; i < info.num_color_attachments; i++)
548 	{
549 		VK_ASSERT(info.color_attachments[i]);
550 		if (info.clear_attachments & (1u << i))
551 		{
552 			clear_values[i].color = info.clear_color[i];
553 			num_clear_values = i + 1;
554 		}
555 
556 		if (info.color_attachments[i]->get_image().is_swapchain_image())
557 			uses_swapchain = true;
558 	}
559 
560 	if (info.depth_stencil && (info.op_flags & RENDER_PASS_OP_CLEAR_DEPTH_STENCIL_BIT) != 0)
561 	{
562 		clear_values[info.num_color_attachments].depthStencil = info.clear_depth_stencil;
563 		num_clear_values = info.num_color_attachments + 1;
564 	}
565 
566 	VkRenderPassBeginInfo begin_info = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO };
567 	begin_info.renderPass = actual_render_pass->get_render_pass();
568 	begin_info.framebuffer = framebuffer->get_framebuffer();
569 	begin_info.renderArea = scissor;
570 	begin_info.clearValueCount = num_clear_values;
571 	begin_info.pClearValues = clear_values;
572 
573 	vkCmdBeginRenderPass(cmd, &begin_info, contents);
574 
575 	current_contents = contents;
576 	begin_graphics();
577 }
578 
end_render_pass()579 void CommandBuffer::end_render_pass()
580 {
581 	VK_ASSERT(framebuffer);
582 	VK_ASSERT(actual_render_pass);
583 	VK_ASSERT(compatible_render_pass);
584 
585 	vkCmdEndRenderPass(cmd);
586 
587 	framebuffer = nullptr;
588 	actual_render_pass = nullptr;
589 	compatible_render_pass = nullptr;
590 	begin_compute();
591 }
592 
build_compute_pipeline(Hash hash)593 VkPipeline CommandBuffer::build_compute_pipeline(Hash hash)
594 {
595 	auto &shader = *current_program->get_shader(ShaderStage::Compute);
596 	VkComputePipelineCreateInfo info = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
597 	info.layout = current_program->get_pipeline_layout()->get_layout();
598 	info.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
599 	info.stage.module = shader.get_module();
600 	info.stage.pName = "main";
601 	info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
602 
603 #ifdef GRANITE_SPIRV_DUMP
604 	LOGI("Compiling SPIR-V file: (%s) %s\n",
605 		     Shader::stage_to_name(ShaderStage::Compute),
606 		     (to_string(shader.get_hash()) + ".spv").c_str());
607 #endif
608 
609 	VkSpecializationInfo spec_info = {};
610 	VkSpecializationMapEntry spec_entries[VULKAN_NUM_SPEC_CONSTANTS];
611 	auto mask = current_layout->get_resource_layout().combined_spec_constant_mask &
612 	            static_state.state.spec_constant_mask;
613 
614 	if (mask)
615 	{
616 		info.stage.pSpecializationInfo = &spec_info;
617 		spec_info.pData = potential_static_state.spec_constants;
618 		spec_info.dataSize = sizeof(potential_static_state.spec_constants);
619 		spec_info.pMapEntries = spec_entries;
620 
621 		for_each_bit(mask, [&](uint32_t bit) {
622 			auto &entry = spec_entries[spec_info.mapEntryCount++];
623 			entry.offset = sizeof(uint32_t) * bit;
624 			entry.size = sizeof(uint32_t);
625 			entry.constantID = bit;
626 		});
627 	}
628 
629 	VkPipeline compute_pipeline;
630 #ifdef GRANITE_VULKAN_FOSSILIZE
631 	device->register_compute_pipeline(hash, info);
632 #endif
633 
634 	LOGI("Creating compute pipeline.\n");
635 	if (vkCreateComputePipelines(device->get_device(), cache, 1, &info, nullptr, &compute_pipeline) != VK_SUCCESS)
636 		LOGE("Failed to create compute pipeline!\n");
637 
638 	return current_program->add_pipeline(hash, compute_pipeline);
639 }
640 
build_graphics_pipeline(Hash hash)641 VkPipeline CommandBuffer::build_graphics_pipeline(Hash hash)
642 {
643 	// Viewport state
644 	VkPipelineViewportStateCreateInfo vp = { VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO };
645 	vp.viewportCount = 1;
646 	vp.scissorCount = 1;
647 
648 	// Dynamic state
649 	VkPipelineDynamicStateCreateInfo dyn = { VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO };
650 	dyn.dynamicStateCount = 2;
651 	VkDynamicState states[7] = {
652 		VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_VIEWPORT,
653 	};
654 	dyn.pDynamicStates = states;
655 
656 	if (static_state.state.depth_bias_enable)
657 		states[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BIAS;
658 	if (static_state.state.stencil_test)
659 	{
660 		states[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
661 		states[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
662 		states[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
663 	}
664 
665 	// Blend state
666 	VkPipelineColorBlendAttachmentState blend_attachments[VULKAN_NUM_ATTACHMENTS];
667 	VkPipelineColorBlendStateCreateInfo blend = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO };
668 	blend.attachmentCount = compatible_render_pass->get_num_color_attachments(current_subpass);
669 	blend.pAttachments = blend_attachments;
670 	for (unsigned i = 0; i < blend.attachmentCount; i++)
671 	{
672 		auto &att = blend_attachments[i];
673 		att = {};
674 
675 		if (compatible_render_pass->get_color_attachment(current_subpass, i).attachment != VK_ATTACHMENT_UNUSED &&
676 			(current_layout->get_resource_layout().render_target_mask & (1u << i)))
677 		{
678 			att.colorWriteMask = (static_state.state.write_mask >> (4 * i)) & 0xf;
679 			att.blendEnable = static_state.state.blend_enable;
680 			if (att.blendEnable)
681 			{
682 				att.alphaBlendOp = static_cast<VkBlendOp>(static_state.state.alpha_blend_op);
683 				att.colorBlendOp = static_cast<VkBlendOp>(static_state.state.color_blend_op);
684 				att.dstAlphaBlendFactor = static_cast<VkBlendFactor>(static_state.state.dst_alpha_blend);
685 				att.srcAlphaBlendFactor = static_cast<VkBlendFactor>(static_state.state.src_alpha_blend);
686 				att.dstColorBlendFactor = static_cast<VkBlendFactor>(static_state.state.dst_color_blend);
687 				att.srcColorBlendFactor = static_cast<VkBlendFactor>(static_state.state.src_color_blend);
688 			}
689 		}
690 	}
691 	memcpy(blend.blendConstants, potential_static_state.blend_constants, sizeof(blend.blendConstants));
692 
693 	// Depth state
694 	VkPipelineDepthStencilStateCreateInfo ds = { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO };
695 	ds.stencilTestEnable = compatible_render_pass->has_stencil(current_subpass) && static_state.state.stencil_test;
696 	ds.depthTestEnable = compatible_render_pass->has_depth(current_subpass) && static_state.state.depth_test;
697 	ds.depthWriteEnable = compatible_render_pass->has_depth(current_subpass) && static_state.state.depth_write;
698 
699 	if (ds.depthTestEnable)
700 		ds.depthCompareOp = static_cast<VkCompareOp>(static_state.state.depth_compare);
701 
702 	if (ds.stencilTestEnable)
703 	{
704 		ds.front.compareOp = static_cast<VkCompareOp>(static_state.state.stencil_front_compare_op);
705 		ds.front.passOp = static_cast<VkStencilOp>(static_state.state.stencil_front_pass);
706 		ds.front.failOp = static_cast<VkStencilOp>(static_state.state.stencil_front_fail);
707 		ds.front.depthFailOp = static_cast<VkStencilOp>(static_state.state.stencil_front_depth_fail);
708 		ds.back.compareOp = static_cast<VkCompareOp>(static_state.state.stencil_back_compare_op);
709 		ds.back.passOp = static_cast<VkStencilOp>(static_state.state.stencil_back_pass);
710 		ds.back.failOp = static_cast<VkStencilOp>(static_state.state.stencil_back_fail);
711 		ds.back.depthFailOp = static_cast<VkStencilOp>(static_state.state.stencil_back_depth_fail);
712 	}
713 
714 	// Vertex input
715 	VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
716 	VkVertexInputAttributeDescription vi_attribs[VULKAN_NUM_VERTEX_ATTRIBS];
717 	vi.pVertexAttributeDescriptions = vi_attribs;
718 	uint32_t attr_mask = current_layout->get_resource_layout().attribute_mask;
719 	uint32_t binding_mask = 0;
720 	for_each_bit(attr_mask, [&](uint32_t bit) {
721 		auto &attr = vi_attribs[vi.vertexAttributeDescriptionCount++];
722 		attr.location = bit;
723 		attr.binding = attribs[bit].binding;
724 		attr.format = attribs[bit].format;
725 		attr.offset = attribs[bit].offset;
726 		binding_mask |= 1u << attr.binding;
727 	});
728 
729 	VkVertexInputBindingDescription vi_bindings[VULKAN_NUM_VERTEX_BUFFERS];
730 	vi.pVertexBindingDescriptions = vi_bindings;
731 	for_each_bit(binding_mask, [&](uint32_t bit) {
732 		auto &bind = vi_bindings[vi.vertexBindingDescriptionCount++];
733 		bind.binding = bit;
734 		bind.inputRate = vbo.input_rates[bit];
735 		bind.stride = vbo.strides[bit];
736 	});
737 
738 	// Input assembly
739 	VkPipelineInputAssemblyStateCreateInfo ia = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };
740 	ia.primitiveRestartEnable = static_state.state.primitive_restart;
741 	ia.topology = static_cast<VkPrimitiveTopology>(static_state.state.topology);
742 
743 	// Multisample
744 	VkPipelineMultisampleStateCreateInfo ms = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };
745 	ms.rasterizationSamples = static_cast<VkSampleCountFlagBits>(compatible_render_pass->get_sample_count(current_subpass));
746 
747 	if (compatible_render_pass->get_sample_count(current_subpass) > 1)
748 	{
749 		ms.alphaToCoverageEnable = static_state.state.alpha_to_coverage;
750 		ms.alphaToOneEnable = static_state.state.alpha_to_one;
751 		ms.sampleShadingEnable = static_state.state.sample_shading;
752 		ms.minSampleShading = 1.0f;
753 	}
754 
755 	// Raster
756 	VkPipelineRasterizationStateCreateInfo raster = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO };
757 	raster.cullMode = static_cast<VkCullModeFlags>(static_state.state.cull_mode);
758 	raster.frontFace = static_cast<VkFrontFace>(static_state.state.front_face);
759 	raster.lineWidth = 1.0f;
760 	raster.polygonMode = static_state.state.wireframe ? VK_POLYGON_MODE_LINE : VK_POLYGON_MODE_FILL;
761 	raster.depthBiasEnable = static_state.state.depth_bias_enable != 0;
762 
763 	// Stages
764 	VkPipelineShaderStageCreateInfo stages[static_cast<unsigned>(ShaderStage::Count)];
765 	unsigned num_stages = 0;
766 
767 	VkSpecializationInfo spec_info[ecast(ShaderStage::Count)] = {};
768 	VkSpecializationMapEntry spec_entries[ecast(ShaderStage::Count)][VULKAN_NUM_SPEC_CONSTANTS];
769 
770 	for (unsigned i = 0; i < static_cast<unsigned>(ShaderStage::Count); i++)
771 	{
772 		auto stage = static_cast<ShaderStage>(i);
773 		if (current_program->get_shader(stage))
774 		{
775 			auto &s = stages[num_stages++];
776 			s = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO };
777 			s.module = current_program->get_shader(stage)->get_module();
778 #ifdef GRANITE_SPIRV_DUMP
779 			LOGI("Compiling SPIR-V file: (%s) %s\n",
780 			     Shader::stage_to_name(stage),
781 			     (to_string(current_program->get_shader(stage)->get_hash()) + ".spv").c_str());
782 #endif
783 			s.pName = "main";
784 			s.stage = static_cast<VkShaderStageFlagBits>(1u << i);
785 
786 			auto mask = current_layout->get_resource_layout().spec_constant_mask[i] &
787 			            static_state.state.spec_constant_mask;
788 
789 			if (mask)
790 			{
791 				s.pSpecializationInfo = &spec_info[i];
792 				spec_info[i].pData = potential_static_state.spec_constants;
793 				spec_info[i].dataSize = sizeof(potential_static_state.spec_constants);
794 				spec_info[i].pMapEntries = spec_entries[i];
795 
796 				for_each_bit(mask, [&](uint32_t bit) {
797 					auto &entry = spec_entries[i][spec_info[i].mapEntryCount++];
798 					entry.offset = sizeof(uint32_t) * bit;
799 					entry.size = sizeof(uint32_t);
800 					entry.constantID = bit;
801 				});
802 			}
803 		}
804 	}
805 
806 	VkGraphicsPipelineCreateInfo pipe = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };
807 	pipe.layout = current_pipeline_layout;
808 	pipe.renderPass = compatible_render_pass->get_render_pass();
809 	pipe.subpass = current_subpass;
810 
811 	pipe.pViewportState = &vp;
812 	pipe.pDynamicState = &dyn;
813 	pipe.pColorBlendState = &blend;
814 	pipe.pDepthStencilState = &ds;
815 	pipe.pVertexInputState = &vi;
816 	pipe.pInputAssemblyState = &ia;
817 	pipe.pMultisampleState = &ms;
818 	pipe.pRasterizationState = &raster;
819 	pipe.pStages = stages;
820 	pipe.stageCount = num_stages;
821 
822 	VkPipeline pipeline;
823 #ifdef GRANITE_VULKAN_FOSSILIZE
824 	device->register_graphics_pipeline(hash, pipe);
825 #endif
826 
827 	LOGI("Creating graphics pipeline.\n");
828 	VkResult res = vkCreateGraphicsPipelines(device->get_device(), cache, 1, &pipe, nullptr, &pipeline);
829 	if (res != VK_SUCCESS)
830 		LOGE("Failed to create graphics pipeline!\n");
831 
832 	return current_program->add_pipeline(hash, pipeline);
833 }
834 
flush_compute_pipeline()835 void CommandBuffer::flush_compute_pipeline()
836 {
837 	Hasher h;
838 	h.u64(current_program->get_hash());
839 
840 	// Spec constants.
841 	auto &layout = current_layout->get_resource_layout();
842 	uint32_t combined_spec_constant = layout.combined_spec_constant_mask;
843 	combined_spec_constant &= static_state.state.spec_constant_mask;
844 	h.u32(combined_spec_constant);
845 	for_each_bit(combined_spec_constant, [&](uint32_t bit) {
846 		h.u32(potential_static_state.spec_constants[bit]);
847 	});
848 
849 	auto hash = h.get();
850 	current_pipeline = current_program->get_pipeline(hash);
851 	if (current_pipeline == VK_NULL_HANDLE)
852 		current_pipeline = build_compute_pipeline(hash);
853 }
854 
flush_graphics_pipeline()855 void CommandBuffer::flush_graphics_pipeline()
856 {
857 	Hasher h;
858 	active_vbos = 0;
859 	auto &layout = current_layout->get_resource_layout();
860 	for_each_bit(layout.attribute_mask, [&](uint32_t bit) {
861 		h.u32(bit);
862 		active_vbos |= 1u << attribs[bit].binding;
863 		h.u32(attribs[bit].binding);
864 		h.u32(attribs[bit].format);
865 		h.u32(attribs[bit].offset);
866 	});
867 
868 	for_each_bit(active_vbos, [&](uint32_t bit) {
869 		h.u32(vbo.input_rates[bit]);
870 		h.u32(vbo.strides[bit]);
871 	});
872 
873 	h.u64(compatible_render_pass->get_hash());
874 	h.u32(current_subpass);
875 	h.u64(current_program->get_hash());
876 	h.data(static_state.words, sizeof(static_state.words));
877 
878 	if (static_state.state.blend_enable)
879 	{
880 		const auto needs_blend_constant = [](VkBlendFactor factor) {
881 			return factor == VK_BLEND_FACTOR_CONSTANT_COLOR || factor == VK_BLEND_FACTOR_CONSTANT_ALPHA;
882 		};
883 		bool b0 = needs_blend_constant(static_cast<VkBlendFactor>(static_state.state.src_color_blend));
884 		bool b1 = needs_blend_constant(static_cast<VkBlendFactor>(static_state.state.src_alpha_blend));
885 		bool b2 = needs_blend_constant(static_cast<VkBlendFactor>(static_state.state.dst_color_blend));
886 		bool b3 = needs_blend_constant(static_cast<VkBlendFactor>(static_state.state.dst_alpha_blend));
887 		if (b0 || b1 || b2 || b3)
888 			h.data(reinterpret_cast<uint32_t *>(potential_static_state.blend_constants),
889 			       sizeof(potential_static_state.blend_constants));
890 	}
891 
892 	// Spec constants.
893 	uint32_t combined_spec_constant = layout.combined_spec_constant_mask;
894 	combined_spec_constant &= static_state.state.spec_constant_mask;
895 	h.u32(combined_spec_constant);
896 	for_each_bit(combined_spec_constant, [&](uint32_t bit) {
897 		h.u32(potential_static_state.spec_constants[bit]);
898 	});
899 
900 	auto hash = h.get();
901 	current_pipeline = current_program->get_pipeline(hash);
902 	if (current_pipeline == VK_NULL_HANDLE)
903 		current_pipeline = build_graphics_pipeline(hash);
904 }
905 
flush_compute_state()906 void CommandBuffer::flush_compute_state()
907 {
908 	VK_ASSERT(current_layout);
909 	VK_ASSERT(current_program);
910 
911 	if (get_and_clear(COMMAND_BUFFER_DIRTY_PIPELINE_BIT))
912 	{
913 		VkPipeline old_pipe = current_pipeline;
914 		flush_compute_pipeline();
915 		if (old_pipe != current_pipeline)
916 			vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, current_pipeline);
917 	}
918 
919 	flush_descriptor_sets();
920 
921 	if (get_and_clear(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT))
922 	{
923 		auto &range = current_layout->get_resource_layout().push_constant_range;
924 		if (range.stageFlags != 0)
925 		{
926 			VK_ASSERT(range.offset == 0);
927 			vkCmdPushConstants(cmd, current_pipeline_layout, range.stageFlags,
928 			                   0, range.size,
929 			                   bindings.push_constant_data);
930 		}
931 	}
932 }
933 
flush_render_state()934 void CommandBuffer::flush_render_state()
935 {
936 	VK_ASSERT(current_layout);
937 	VK_ASSERT(current_program);
938 
939 	// We've invalidated pipeline state, update the VkPipeline.
940 	if (get_and_clear(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT | COMMAND_BUFFER_DIRTY_PIPELINE_BIT |
941 	                  COMMAND_BUFFER_DIRTY_STATIC_VERTEX_BIT))
942 	{
943 		VkPipeline old_pipe = current_pipeline;
944 		flush_graphics_pipeline();
945 		if (old_pipe != current_pipeline)
946 		{
947 			vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, current_pipeline);
948 			set_dirty(COMMAND_BUFFER_DYNAMIC_BITS);
949 		}
950 	}
951 
952 	flush_descriptor_sets();
953 
954 	if (get_and_clear(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT))
955 	{
956 		auto &range = current_layout->get_resource_layout().push_constant_range;
957 		if (range.stageFlags != 0)
958 		{
959 			VK_ASSERT(range.offset == 0);
960 			vkCmdPushConstants(cmd, current_pipeline_layout, range.stageFlags,
961 			                   0, range.size,
962 			                   bindings.push_constant_data);
963 		}
964 	}
965 
966 	if (get_and_clear(COMMAND_BUFFER_DIRTY_VIEWPORT_BIT))
967 		vkCmdSetViewport(cmd, 0, 1, &viewport);
968 	if (get_and_clear(COMMAND_BUFFER_DIRTY_SCISSOR_BIT))
969 		vkCmdSetScissor(cmd, 0, 1, &scissor);
970 	if (static_state.state.depth_bias_enable && get_and_clear(COMMAND_BUFFER_DIRTY_DEPTH_BIAS_BIT))
971 		vkCmdSetDepthBias(cmd, dynamic_state.depth_bias_constant, 0.0f, dynamic_state.depth_bias_slope);
972 	if (static_state.state.stencil_test && get_and_clear(COMMAND_BUFFER_DIRTY_STENCIL_REFERENCE_BIT))
973 	{
974 		vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FACE_FRONT_BIT, dynamic_state.front_compare_mask);
975 		vkCmdSetStencilReference(cmd, VK_STENCIL_FACE_FRONT_BIT, dynamic_state.front_reference);
976 		vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FACE_FRONT_BIT, dynamic_state.front_write_mask);
977 		vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FACE_BACK_BIT, dynamic_state.back_compare_mask);
978 		vkCmdSetStencilReference(cmd, VK_STENCIL_FACE_BACK_BIT, dynamic_state.back_reference);
979 		vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FACE_BACK_BIT, dynamic_state.back_write_mask);
980 	}
981 
982 	uint32_t update_vbo_mask = dirty_vbos & active_vbos;
983 	for_each_bit_range(update_vbo_mask, [&](uint32_t binding, uint32_t binding_count) {
984 #ifdef VULKAN_DEBUG
985 		for (unsigned i = binding; i < binding + binding_count; i++)
986 			VK_ASSERT(vbo.buffers[i] != VK_NULL_HANDLE);
987 #endif
988 		vkCmdBindVertexBuffers(cmd, binding, binding_count, vbo.buffers + binding, vbo.offsets + binding);
989 	});
990 	dirty_vbos &= ~update_vbo_mask;
991 }
992 
wait_events(unsigned num_events,const VkEvent * events,VkPipelineStageFlags src_stages,VkPipelineStageFlags dst_stages,unsigned barriers,const VkMemoryBarrier * globals,unsigned buffer_barriers,const VkBufferMemoryBarrier * buffers,unsigned image_barriers,const VkImageMemoryBarrier * images)993 void CommandBuffer::wait_events(unsigned num_events, const VkEvent *events,
994                                 VkPipelineStageFlags src_stages, VkPipelineStageFlags dst_stages,
995                                 unsigned barriers,
996                                 const VkMemoryBarrier *globals, unsigned buffer_barriers,
997                                 const VkBufferMemoryBarrier *buffers, unsigned image_barriers,
998                                 const VkImageMemoryBarrier *images)
999 {
1000 	VK_ASSERT(!framebuffer);
1001 	VK_ASSERT(!actual_render_pass);
1002 
1003 	if (device->get_workarounds().emulate_event_as_pipeline_barrier)
1004 	{
1005 		barrier(src_stages, dst_stages,
1006 		        barriers, globals,
1007 		        buffer_barriers, buffers,
1008 		        image_barriers, images);
1009 	}
1010 	else
1011 	{
1012 		vkCmdWaitEvents(cmd, num_events, events, src_stages, dst_stages,
1013 		                barriers, globals, buffer_barriers, buffers, image_barriers, images);
1014 	}
1015 }
1016 
signal_event(VkPipelineStageFlags stages)1017 PipelineEvent CommandBuffer::signal_event(VkPipelineStageFlags stages)
1018 {
1019 	VK_ASSERT(!framebuffer);
1020 	VK_ASSERT(!actual_render_pass);
1021 	auto event = device->request_pipeline_event();
1022 	if (!device->get_workarounds().emulate_event_as_pipeline_barrier)
1023 		vkCmdSetEvent(cmd, event->get_event(), stages);
1024 	event->set_stages(stages);
1025 	return event;
1026 }
1027 
set_vertex_attrib(uint32_t attrib,uint32_t binding,VkFormat format,VkDeviceSize offset)1028 void CommandBuffer::set_vertex_attrib(uint32_t attrib, uint32_t binding, VkFormat format, VkDeviceSize offset)
1029 {
1030 	VK_ASSERT(attrib < VULKAN_NUM_VERTEX_ATTRIBS);
1031 	VK_ASSERT(framebuffer);
1032 
1033 	auto &attr = attribs[attrib];
1034 
1035 	if (attr.binding != binding || attr.format != format || attr.offset != offset)
1036 		set_dirty(COMMAND_BUFFER_DIRTY_STATIC_VERTEX_BIT);
1037 
1038 	VK_ASSERT(binding < VULKAN_NUM_VERTEX_BUFFERS);
1039 
1040 	attr.binding = binding;
1041 	attr.format = format;
1042 	attr.offset = offset;
1043 }
1044 
set_index_buffer(const Buffer & buffer,VkDeviceSize offset,VkIndexType index_type)1045 void CommandBuffer::set_index_buffer(const Buffer &buffer, VkDeviceSize offset, VkIndexType index_type)
1046 {
1047 	if (index.buffer == buffer.get_buffer() && index.offset == offset && index.index_type == index_type)
1048 		return;
1049 
1050 	index.buffer = buffer.get_buffer();
1051 	index.offset = offset;
1052 	index.index_type = index_type;
1053 	vkCmdBindIndexBuffer(cmd, buffer.get_buffer(), offset, index_type);
1054 }
1055 
set_vertex_binding(uint32_t binding,const Buffer & buffer,VkDeviceSize offset,VkDeviceSize stride,VkVertexInputRate step_rate)1056 void CommandBuffer::set_vertex_binding(uint32_t binding, const Buffer &buffer, VkDeviceSize offset, VkDeviceSize stride,
1057                                        VkVertexInputRate step_rate)
1058 {
1059 	VK_ASSERT(binding < VULKAN_NUM_VERTEX_BUFFERS);
1060 	VK_ASSERT(framebuffer);
1061 
1062 	VkBuffer vkbuffer = buffer.get_buffer();
1063 	if (vbo.buffers[binding] != vkbuffer || vbo.offsets[binding] != offset)
1064 		dirty_vbos |= 1u << binding;
1065 	if (vbo.strides[binding] != stride || vbo.input_rates[binding] != step_rate)
1066 		set_dirty(COMMAND_BUFFER_DIRTY_STATIC_VERTEX_BIT);
1067 
1068 	vbo.buffers[binding] = vkbuffer;
1069 	vbo.offsets[binding] = offset;
1070 	vbo.strides[binding] = stride;
1071 	vbo.input_rates[binding] = step_rate;
1072 }
1073 
set_viewport(const VkViewport & viewport)1074 void CommandBuffer::set_viewport(const VkViewport &viewport)
1075 {
1076 	VK_ASSERT(framebuffer);
1077 	this->viewport = viewport;
1078 	set_dirty(COMMAND_BUFFER_DIRTY_VIEWPORT_BIT);
1079 }
1080 
get_viewport() const1081 const VkViewport &CommandBuffer::get_viewport() const
1082 {
1083 	return this->viewport;
1084 }
1085 
set_scissor(const VkRect2D & rect)1086 void CommandBuffer::set_scissor(const VkRect2D &rect)
1087 {
1088 	VK_ASSERT(framebuffer);
1089 	VK_ASSERT(rect.offset.x >= 0);
1090 	VK_ASSERT(rect.offset.y >= 0);
1091 	scissor = rect;
1092 	set_dirty(COMMAND_BUFFER_DIRTY_SCISSOR_BIT);
1093 }
1094 
push_constants(const void * data,VkDeviceSize offset,VkDeviceSize range)1095 void CommandBuffer::push_constants(const void *data, VkDeviceSize offset, VkDeviceSize range)
1096 {
1097 	VK_ASSERT(offset + range <= VULKAN_PUSH_CONSTANT_SIZE);
1098 	memcpy(bindings.push_constant_data + offset, data, range);
1099 	set_dirty(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT);
1100 }
1101 
1102 #ifdef GRANITE_VULKAN_FILESYSTEM
set_program(const std::string & compute,const std::vector<std::pair<std::string,int>> & defines)1103 void CommandBuffer::set_program(const std::string &compute, const std::vector<std::pair<std::string, int>> &defines)
1104 {
1105 	auto *p = device->get_shader_manager().register_compute(compute);
1106 	unsigned variant = p->register_variant(defines);
1107 	set_program(*p->get_program(variant));
1108 }
1109 
set_program(const std::string & vertex,const std::string & fragment,const std::vector<std::pair<std::string,int>> & defines)1110 void CommandBuffer::set_program(const std::string &vertex, const std::string &fragment,
1111                                 const std::vector<std::pair<std::string, int>> &defines)
1112 {
1113 	auto *p = device->get_shader_manager().register_graphics(vertex, fragment);
1114 	unsigned variant = p->register_variant(defines);
1115 	set_program(*p->get_program(variant));
1116 }
1117 #endif
1118 
set_program(Program & program)1119 void CommandBuffer::set_program(Program &program)
1120 {
1121 	if (current_program == &program)
1122 		return;
1123 
1124 	current_program = &program;
1125 	current_pipeline = VK_NULL_HANDLE;
1126 
1127 	VK_ASSERT((framebuffer && current_program->get_shader(ShaderStage::Vertex)) ||
1128 	          (!framebuffer && current_program->get_shader(ShaderStage::Compute)));
1129 
1130 	set_dirty(COMMAND_BUFFER_DIRTY_PIPELINE_BIT | COMMAND_BUFFER_DYNAMIC_BITS);
1131 
1132 	if (!current_layout)
1133 	{
1134 		dirty_sets = ~0u;
1135 		set_dirty(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT);
1136 
1137 		current_layout = program.get_pipeline_layout();
1138 		current_pipeline_layout = current_layout->get_layout();
1139 	}
1140 	else if (program.get_pipeline_layout()->get_hash() != current_layout->get_hash())
1141 	{
1142 		auto &new_layout = program.get_pipeline_layout()->get_resource_layout();
1143 		auto &old_layout = current_layout->get_resource_layout();
1144 
1145 		// If the push constant layout changes, all descriptor sets
1146 		// are invalidated.
1147 		if (new_layout.push_constant_layout_hash != old_layout.push_constant_layout_hash)
1148 		{
1149 			dirty_sets = ~0u;
1150 			set_dirty(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT);
1151 		}
1152 		else
1153 		{
1154 			// Find the first set whose descriptor set layout differs.
1155 			auto *new_pipe_layout = program.get_pipeline_layout();
1156 			for (unsigned set = 0; set < VULKAN_NUM_DESCRIPTOR_SETS; set++)
1157 			{
1158 				if (new_pipe_layout->get_allocator(set) != current_layout->get_allocator(set))
1159 				{
1160 					dirty_sets |= ~((1u << set) - 1);
1161 					break;
1162 				}
1163 			}
1164 		}
1165 		current_layout = program.get_pipeline_layout();
1166 		current_pipeline_layout = current_layout->get_layout();
1167 	}
1168 }
1169 
allocate_constant_data(unsigned set,unsigned binding,VkDeviceSize size)1170 void *CommandBuffer::allocate_constant_data(unsigned set, unsigned binding, VkDeviceSize size)
1171 {
1172 	auto data = ubo_block.allocate(size);
1173 	if (!data.host)
1174 	{
1175 		device->request_uniform_block(ubo_block, size);
1176 		data = ubo_block.allocate(size);
1177 	}
1178 	set_uniform_buffer(set, binding, *ubo_block.gpu, data.offset, size);
1179 	return data.host;
1180 }
1181 
allocate_index_data(VkDeviceSize size,VkIndexType index_type)1182 void *CommandBuffer::allocate_index_data(VkDeviceSize size, VkIndexType index_type)
1183 {
1184 	auto data = ibo_block.allocate(size);
1185 	if (!data.host)
1186 	{
1187 		device->request_index_block(ibo_block, size);
1188 		data = ibo_block.allocate(size);
1189 	}
1190 	set_index_buffer(*ibo_block.gpu, data.offset, index_type);
1191 	return data.host;
1192 }
1193 
update_buffer(const Buffer & buffer,VkDeviceSize offset,VkDeviceSize size)1194 void *CommandBuffer::update_buffer(const Buffer &buffer, VkDeviceSize offset, VkDeviceSize size)
1195 {
1196 	auto data = staging_block.allocate(size);
1197 	if (!data.host)
1198 	{
1199 		device->request_staging_block(staging_block, size);
1200 		data = staging_block.allocate(size);
1201 	}
1202 	copy_buffer(buffer, offset, *staging_block.cpu, data.offset, size);
1203 	return data.host;
1204 }
1205 
update_image(const Image & image,const VkOffset3D & offset,const VkExtent3D & extent,uint32_t row_length,uint32_t image_height,const VkImageSubresourceLayers & subresource)1206 void *CommandBuffer::update_image(const Image &image, const VkOffset3D &offset, const VkExtent3D &extent,
1207                                   uint32_t row_length, uint32_t image_height,
1208                                   const VkImageSubresourceLayers &subresource)
1209 {
1210 	auto &create_info = image.get_create_info();
1211 	uint32_t width = max(image.get_width() >> subresource.mipLevel, 1u);
1212 	uint32_t height = max(image.get_height() >> subresource.mipLevel, 1u);
1213 	uint32_t depth = max(image.get_depth() >> subresource.mipLevel, 1u);
1214 
1215 	if (!row_length)
1216 		row_length = width;
1217 	if (!image_height)
1218 		image_height = height;
1219 
1220 	uint32_t blocks_x = row_length;
1221 	uint32_t blocks_y = image_height;
1222 	format_num_blocks(create_info.format, blocks_x, blocks_y);
1223 
1224 	VkDeviceSize size =
1225 	    TextureFormatLayout::format_block_size(create_info.format) * subresource.layerCount * depth * blocks_x * blocks_y;
1226 
1227 	auto data = staging_block.allocate(size);
1228 	if (!data.host)
1229 	{
1230 		device->request_staging_block(staging_block, size);
1231 		data = staging_block.allocate(size);
1232 	}
1233 
1234 	copy_buffer_to_image(image, *staging_block.cpu, data.offset, offset, extent, row_length, image_height, subresource);
1235 	return data.host;
1236 }
1237 
update_image(const Image & image,uint32_t row_length,uint32_t image_height)1238 void *CommandBuffer::update_image(const Image &image, uint32_t row_length, uint32_t image_height)
1239 {
1240 	const VkImageSubresourceLayers subresource = {
1241 		format_to_aspect_mask(image.get_format()), 0, 0, 1,
1242 	};
1243 	return update_image(image, { 0, 0, 0 }, { image.get_width(), image.get_height(), image.get_depth() }, row_length,
1244 	                    image_height, subresource);
1245 }
1246 
allocate_vertex_data(unsigned binding,VkDeviceSize size,VkDeviceSize stride,VkVertexInputRate step_rate)1247 void *CommandBuffer::allocate_vertex_data(unsigned binding, VkDeviceSize size, VkDeviceSize stride,
1248                                           VkVertexInputRate step_rate)
1249 {
1250 	auto data = vbo_block.allocate(size);
1251 	if (!data.host)
1252 	{
1253 		device->request_vertex_block(vbo_block, size);
1254 		data = vbo_block.allocate(size);
1255 	}
1256 
1257 	set_vertex_binding(binding, *vbo_block.gpu, data.offset, stride, step_rate);
1258 	return data.host;
1259 }
1260 
set_uniform_buffer(unsigned set,unsigned binding,const Buffer & buffer,VkDeviceSize offset,VkDeviceSize range)1261 void CommandBuffer::set_uniform_buffer(unsigned set, unsigned binding, const Buffer &buffer, VkDeviceSize offset,
1262                                        VkDeviceSize range)
1263 {
1264 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1265 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1266 	VK_ASSERT(buffer.get_create_info().usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
1267 	auto &b = bindings.bindings[set][binding];
1268 
1269 	if (buffer.get_cookie() == bindings.cookies[set][binding] && b.buffer.offset == offset && b.buffer.range == range)
1270 		return;
1271 
1272 	b.buffer = { buffer.get_buffer(), offset, range };
1273 	bindings.cookies[set][binding] = buffer.get_cookie();
1274 	bindings.secondary_cookies[set][binding] = 0;
1275 	dirty_sets |= 1u << set;
1276 }
1277 
set_storage_buffer(unsigned set,unsigned binding,const Buffer & buffer,VkDeviceSize offset,VkDeviceSize range)1278 void CommandBuffer::set_storage_buffer(unsigned set, unsigned binding, const Buffer &buffer, VkDeviceSize offset,
1279                                        VkDeviceSize range)
1280 {
1281 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1282 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1283 	VK_ASSERT(buffer.get_create_info().usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1284 	auto &b = bindings.bindings[set][binding];
1285 
1286 	if (buffer.get_cookie() == bindings.cookies[set][binding] && b.buffer.offset == offset && b.buffer.range == range)
1287 		return;
1288 
1289 	b.buffer = { buffer.get_buffer(), offset, range };
1290 	bindings.cookies[set][binding] = buffer.get_cookie();
1291 	bindings.secondary_cookies[set][binding] = 0;
1292 	dirty_sets |= 1u << set;
1293 }
1294 
set_uniform_buffer(unsigned set,unsigned binding,const Buffer & buffer)1295 void CommandBuffer::set_uniform_buffer(unsigned set, unsigned binding, const Buffer &buffer)
1296 {
1297 	set_uniform_buffer(set, binding, buffer, 0, buffer.get_create_info().size);
1298 }
1299 
set_storage_buffer(unsigned set,unsigned binding,const Buffer & buffer)1300 void CommandBuffer::set_storage_buffer(unsigned set, unsigned binding, const Buffer &buffer)
1301 {
1302 	set_storage_buffer(set, binding, buffer, 0, buffer.get_create_info().size);
1303 }
1304 
set_sampler(unsigned set,unsigned binding,const Sampler & sampler)1305 void CommandBuffer::set_sampler(unsigned set, unsigned binding, const Sampler &sampler)
1306 {
1307 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1308 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1309 	if (sampler.get_cookie() == bindings.secondary_cookies[set][binding])
1310 		return;
1311 
1312 	auto &b = bindings.bindings[set][binding];
1313 	b.image.fp.sampler = sampler.get_sampler();
1314 	b.image.integer.sampler = sampler.get_sampler();
1315 	dirty_sets |= 1u << set;
1316 	bindings.secondary_cookies[set][binding] = sampler.get_cookie();
1317 }
1318 
set_buffer_view(unsigned set,unsigned binding,const BufferView & view)1319 void CommandBuffer::set_buffer_view(unsigned set, unsigned binding, const BufferView &view)
1320 {
1321 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1322 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1323 	VK_ASSERT(view.get_buffer().get_create_info().usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT);
1324 	if (view.get_cookie() == bindings.cookies[set][binding])
1325 		return;
1326 	auto &b = bindings.bindings[set][binding];
1327 	b.buffer_view = view.get_view();
1328 	bindings.cookies[set][binding] = view.get_cookie();
1329 	bindings.secondary_cookies[set][binding] = 0;
1330 	dirty_sets |= 1u << set;
1331 }
1332 
set_input_attachments(unsigned set,unsigned start_binding)1333 void CommandBuffer::set_input_attachments(unsigned set, unsigned start_binding)
1334 {
1335 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1336 	VK_ASSERT(start_binding + actual_render_pass->get_num_input_attachments(current_subpass) <= VULKAN_NUM_BINDINGS);
1337 	unsigned num_input_attachments = actual_render_pass->get_num_input_attachments(current_subpass);
1338 	for (unsigned i = 0; i < num_input_attachments; i++)
1339 	{
1340 		auto &ref = actual_render_pass->get_input_attachment(current_subpass, i);
1341 		if (ref.attachment == VK_ATTACHMENT_UNUSED)
1342 			continue;
1343 
1344 		ImageView *view = framebuffer->get_attachment(ref.attachment);
1345 		VK_ASSERT(view);
1346 		VK_ASSERT(view->get_image().get_create_info().usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
1347 
1348 		if (view->get_cookie() == bindings.cookies[set][start_binding + i] &&
1349 		    bindings.bindings[set][start_binding + i].image.fp.imageLayout == ref.layout)
1350 		{
1351 			continue;
1352 		}
1353 
1354 		auto &b = bindings.bindings[set][start_binding + i];
1355 		b.image.fp.imageLayout = ref.layout;
1356 		b.image.integer.imageLayout = ref.layout;
1357 		b.image.fp.imageView = view->get_float_view();
1358 		b.image.integer.imageView = view->get_integer_view();
1359 		bindings.cookies[set][start_binding + i] = view->get_cookie();
1360 		dirty_sets |= 1u << set;
1361 	}
1362 }
1363 
set_texture(unsigned set,unsigned binding,VkImageView float_view,VkImageView integer_view,VkImageLayout layout,uint64_t cookie)1364 void CommandBuffer::set_texture(unsigned set, unsigned binding,
1365                                 VkImageView float_view, VkImageView integer_view,
1366                                 VkImageLayout layout,
1367                                 uint64_t cookie)
1368 {
1369 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1370 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1371 
1372 	if (cookie == bindings.cookies[set][binding] && bindings.bindings[set][binding].image.fp.imageLayout == layout)
1373 		return;
1374 
1375 	auto &b = bindings.bindings[set][binding];
1376 	b.image.fp.imageLayout = layout;
1377 	b.image.fp.imageView = float_view;
1378 	b.image.integer.imageLayout = layout;
1379 	b.image.integer.imageView = integer_view;
1380 	bindings.cookies[set][binding] = cookie;
1381 	dirty_sets |= 1u << set;
1382 }
1383 
set_texture(unsigned set,unsigned binding,const ImageView & view)1384 void CommandBuffer::set_texture(unsigned set, unsigned binding, const ImageView &view)
1385 {
1386 	VK_ASSERT(view.get_image().get_create_info().usage & VK_IMAGE_USAGE_SAMPLED_BIT);
1387 	set_texture(set, binding, view.get_float_view(), view.get_integer_view(),
1388 	            view.get_image().get_layout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), view.get_cookie());
1389 }
1390 
1391 enum CookieBits
1392 {
1393 	COOKIE_BIT_UNORM = 1 << 0,
1394 	COOKIE_BIT_SRGB = 1 << 1
1395 };
1396 
set_unorm_texture(unsigned set,unsigned binding,const ImageView & view)1397 void CommandBuffer::set_unorm_texture(unsigned set, unsigned binding, const ImageView &view)
1398 {
1399 	VK_ASSERT(view.get_image().get_create_info().usage & VK_IMAGE_USAGE_SAMPLED_BIT);
1400 	auto unorm_view = view.get_unorm_view();
1401 	VK_ASSERT(unorm_view != VK_NULL_HANDLE);
1402 	set_texture(set, binding, unorm_view, unorm_view,
1403 	            view.get_image().get_layout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), view.get_cookie() | COOKIE_BIT_UNORM);
1404 }
1405 
set_srgb_texture(unsigned set,unsigned binding,const ImageView & view)1406 void CommandBuffer::set_srgb_texture(unsigned set, unsigned binding, const ImageView &view)
1407 {
1408 	VK_ASSERT(view.get_image().get_create_info().usage & VK_IMAGE_USAGE_SAMPLED_BIT);
1409 	auto srgb_view = view.get_srgb_view();
1410 	VK_ASSERT(srgb_view != VK_NULL_HANDLE);
1411 	set_texture(set, binding, srgb_view, srgb_view,
1412 	            view.get_image().get_layout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), view.get_cookie() | COOKIE_BIT_SRGB);
1413 }
1414 
set_texture(unsigned set,unsigned binding,const ImageView & view,const Sampler & sampler)1415 void CommandBuffer::set_texture(unsigned set, unsigned binding, const ImageView &view, const Sampler &sampler)
1416 {
1417 	set_sampler(set, binding, sampler);
1418 	set_texture(set, binding, view);
1419 }
1420 
set_texture(unsigned set,unsigned binding,const ImageView & view,StockSampler stock)1421 void CommandBuffer::set_texture(unsigned set, unsigned binding, const ImageView &view, StockSampler stock)
1422 {
1423 	VK_ASSERT(set < VULKAN_NUM_DESCRIPTOR_SETS);
1424 	VK_ASSERT(binding < VULKAN_NUM_BINDINGS);
1425 	VK_ASSERT(view.get_image().get_create_info().usage & VK_IMAGE_USAGE_SAMPLED_BIT);
1426 	const auto &sampler = device->get_stock_sampler(stock);
1427 	set_texture(set, binding, view, sampler);
1428 }
1429 
set_sampler(unsigned set,unsigned binding,StockSampler stock)1430 void CommandBuffer::set_sampler(unsigned set, unsigned binding, StockSampler stock)
1431 {
1432 	const auto &sampler = device->get_stock_sampler(stock);
1433 	set_sampler(set, binding, sampler);
1434 }
1435 
set_storage_texture(unsigned set,unsigned binding,const ImageView & view)1436 void CommandBuffer::set_storage_texture(unsigned set, unsigned binding, const ImageView &view)
1437 {
1438 	VK_ASSERT(view.get_image().get_create_info().usage & VK_IMAGE_USAGE_STORAGE_BIT);
1439 	set_texture(set, binding, view.get_float_view(), view.get_integer_view(),
1440 	            view.get_image().get_layout(VK_IMAGE_LAYOUT_GENERAL), view.get_cookie());
1441 }
1442 
flush_descriptor_set(uint32_t set)1443 void CommandBuffer::flush_descriptor_set(uint32_t set)
1444 {
1445 	auto &layout = current_layout->get_resource_layout();
1446 	auto &set_layout = layout.sets[set];
1447 	uint32_t num_dynamic_offsets = 0;
1448 	uint32_t dynamic_offsets[VULKAN_NUM_BINDINGS];
1449 	Hasher h;
1450 
1451 	h.u32(set_layout.fp_mask);
1452 
1453 	// UBOs
1454 	for_each_bit(set_layout.uniform_buffer_mask, [&](uint32_t binding) {
1455 		h.u64(bindings.cookies[set][binding]);
1456 		h.u32(bindings.bindings[set][binding].buffer.range);
1457 		VK_ASSERT(bindings.bindings[set][binding].buffer.buffer != VK_NULL_HANDLE);
1458 
1459 		dynamic_offsets[num_dynamic_offsets++] = bindings.bindings[set][binding].buffer.offset;
1460 	});
1461 
1462 	// SSBOs
1463 	for_each_bit(set_layout.storage_buffer_mask, [&](uint32_t binding) {
1464 		h.u64(bindings.cookies[set][binding]);
1465 		h.u32(bindings.bindings[set][binding].buffer.offset);
1466 		h.u32(bindings.bindings[set][binding].buffer.range);
1467 		VK_ASSERT(bindings.bindings[set][binding].buffer.buffer != VK_NULL_HANDLE);
1468 	});
1469 
1470 	// Sampled buffers
1471 	for_each_bit(set_layout.sampled_buffer_mask, [&](uint32_t binding) {
1472 		h.u64(bindings.cookies[set][binding]);
1473 		VK_ASSERT(bindings.bindings[set][binding].buffer_view != VK_NULL_HANDLE);
1474 	});
1475 
1476 	// Sampled images
1477 	for_each_bit(set_layout.sampled_image_mask, [&](uint32_t binding) {
1478 		h.u64(bindings.cookies[set][binding]);
1479 		if (!has_immutable_sampler(set_layout, binding))
1480 		{
1481 			h.u64(bindings.secondary_cookies[set][binding]);
1482 			VK_ASSERT(bindings.bindings[set][binding].image.fp.sampler != VK_NULL_HANDLE);
1483 		}
1484 		h.u32(bindings.bindings[set][binding].image.fp.imageLayout);
1485 		VK_ASSERT(bindings.bindings[set][binding].image.fp.imageView != VK_NULL_HANDLE);
1486 	});
1487 
1488 	// Separate images
1489 	for_each_bit(set_layout.separate_image_mask, [&](uint32_t binding) {
1490 		h.u64(bindings.cookies[set][binding]);
1491 		h.u32(bindings.bindings[set][binding].image.fp.imageLayout);
1492 		VK_ASSERT(bindings.bindings[set][binding].image.fp.imageView != VK_NULL_HANDLE);
1493 	});
1494 
1495 	// Separate samplers
1496 	for_each_bit(set_layout.sampler_mask & ~set_layout.immutable_sampler_mask, [&](uint32_t binding) {
1497 		h.u64(bindings.secondary_cookies[set][binding]);
1498 		VK_ASSERT(bindings.bindings[set][binding].image.fp.sampler != VK_NULL_HANDLE);
1499 	});
1500 
1501 	// Storage images
1502 	for_each_bit(set_layout.storage_image_mask, [&](uint32_t binding) {
1503 		h.u64(bindings.cookies[set][binding]);
1504 		h.u32(bindings.bindings[set][binding].image.fp.imageLayout);
1505 		VK_ASSERT(bindings.bindings[set][binding].image.fp.imageView != VK_NULL_HANDLE);
1506 	});
1507 
1508 	// Input attachments
1509 	for_each_bit(set_layout.input_attachment_mask, [&](uint32_t binding) {
1510 		h.u64(bindings.cookies[set][binding]);
1511 		h.u32(bindings.bindings[set][binding].image.fp.imageLayout);
1512 		VK_ASSERT(bindings.bindings[set][binding].image.fp.imageView != VK_NULL_HANDLE);
1513 	});
1514 
1515 	Hash hash = h.get();
1516 	auto allocated = current_layout->get_allocator(set)->find(thread_index, hash);
1517 
1518 	// The descriptor set was not successfully cached, rebuild.
1519 	if (!allocated.second)
1520 	{
1521 		uint32_t write_count = 0;
1522 		uint32_t buffer_info_count = 0;
1523 		VkWriteDescriptorSet writes[VULKAN_NUM_BINDINGS];
1524 		VkDescriptorBufferInfo buffer_info[VULKAN_NUM_BINDINGS];
1525 
1526 		for_each_bit(set_layout.uniform_buffer_mask, [&](uint32_t binding) {
1527 			auto &write = writes[write_count++];
1528 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1529 			write.pNext = nullptr;
1530 			write.descriptorCount = 1;
1531 			write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
1532 			write.dstArrayElement = 0;
1533 			write.dstBinding = binding;
1534 			write.dstSet = allocated.first;
1535 
1536 			// Offsets are applied dynamically.
1537 			auto &buffer = buffer_info[buffer_info_count++];
1538 			buffer = bindings.bindings[set][binding].buffer;
1539 			buffer.offset = 0;
1540 			write.pBufferInfo = &buffer;
1541 		});
1542 
1543 		for_each_bit(set_layout.storage_buffer_mask, [&](uint32_t binding) {
1544 			auto &write = writes[write_count++];
1545 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1546 			write.pNext = nullptr;
1547 			write.descriptorCount = 1;
1548 			write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1549 			write.dstArrayElement = 0;
1550 			write.dstBinding = binding;
1551 			write.dstSet = allocated.first;
1552 			write.pBufferInfo = &bindings.bindings[set][binding].buffer;
1553 		});
1554 
1555 		for_each_bit(set_layout.sampled_buffer_mask, [&](uint32_t binding) {
1556 			auto &write = writes[write_count++];
1557 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1558 			write.pNext = nullptr;
1559 			write.descriptorCount = 1;
1560 			write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
1561 			write.dstArrayElement = 0;
1562 			write.dstBinding = binding;
1563 			write.dstSet = allocated.first;
1564 			write.pTexelBufferView = &bindings.bindings[set][binding].buffer_view;
1565 		});
1566 
1567 		for_each_bit(set_layout.sampled_image_mask, [&](uint32_t binding) {
1568 			auto &write = writes[write_count++];
1569 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1570 			write.pNext = nullptr;
1571 			write.descriptorCount = 1;
1572 			write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
1573 			write.dstArrayElement = 0;
1574 			write.dstBinding = binding;
1575 			write.dstSet = allocated.first;
1576 
1577 			if (set_layout.fp_mask & (1u << binding))
1578 				write.pImageInfo = &bindings.bindings[set][binding].image.fp;
1579 			else
1580 				write.pImageInfo = &bindings.bindings[set][binding].image.integer;
1581 		});
1582 
1583 		for_each_bit(set_layout.separate_image_mask, [&](uint32_t binding) {
1584 			auto &write = writes[write_count++];
1585 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1586 			write.pNext = nullptr;
1587 			write.descriptorCount = 1;
1588 			write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
1589 			write.dstArrayElement = 0;
1590 			write.dstBinding = binding;
1591 			write.dstSet = allocated.first;
1592 
1593 			if (set_layout.fp_mask & (1u << binding))
1594 				write.pImageInfo = &bindings.bindings[set][binding].image.fp;
1595 			else
1596 				write.pImageInfo = &bindings.bindings[set][binding].image.integer;
1597 		});
1598 
1599 		for_each_bit(set_layout.sampler_mask & ~set_layout.immutable_sampler_mask, [&](uint32_t binding) {
1600 			auto &write = writes[write_count++];
1601 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1602 			write.pNext = nullptr;
1603 			write.descriptorCount = 1;
1604 			write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
1605 			write.dstArrayElement = 0;
1606 			write.dstBinding = binding;
1607 			write.dstSet = allocated.first;
1608 			write.pImageInfo = &bindings.bindings[set][binding].image.fp;
1609 		});
1610 
1611 		for_each_bit(set_layout.storage_image_mask, [&](uint32_t binding) {
1612 			auto &write = writes[write_count++];
1613 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1614 			write.pNext = nullptr;
1615 			write.descriptorCount = 1;
1616 			write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1617 			write.dstArrayElement = 0;
1618 			write.dstBinding = binding;
1619 			write.dstSet = allocated.first;
1620 
1621 			if (set_layout.fp_mask & (1u << binding))
1622 				write.pImageInfo = &bindings.bindings[set][binding].image.fp;
1623 			else
1624 				write.pImageInfo = &bindings.bindings[set][binding].image.integer;
1625 		});
1626 
1627 		for_each_bit(set_layout.input_attachment_mask, [&](uint32_t binding) {
1628 			auto &write = writes[write_count++];
1629 			write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1630 			write.pNext = nullptr;
1631 			write.descriptorCount = 1;
1632 			write.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
1633 			write.dstArrayElement = 0;
1634 			write.dstBinding = binding;
1635 			write.dstSet = allocated.first;
1636 			if (set_layout.fp_mask & (1u << binding))
1637 				write.pImageInfo = &bindings.bindings[set][binding].image.fp;
1638 			else
1639 				write.pImageInfo = &bindings.bindings[set][binding].image.integer;
1640 		});
1641 
1642 		vkUpdateDescriptorSets(device->get_device(), write_count, writes, 0, nullptr);
1643 	}
1644 
1645 	vkCmdBindDescriptorSets(cmd, actual_render_pass ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE,
1646 	                        current_pipeline_layout, set, 1, &allocated.first, num_dynamic_offsets, dynamic_offsets);
1647 }
1648 
flush_descriptor_sets()1649 void CommandBuffer::flush_descriptor_sets()
1650 {
1651 	auto &layout = current_layout->get_resource_layout();
1652 	uint32_t set_update = layout.descriptor_set_mask & dirty_sets;
1653 	for_each_bit(set_update, [&](uint32_t set) { flush_descriptor_set(set); });
1654 	dirty_sets &= ~set_update;
1655 }
1656 
draw(uint32_t vertex_count,uint32_t instance_count,uint32_t first_vertex,uint32_t first_instance)1657 void CommandBuffer::draw(uint32_t vertex_count, uint32_t instance_count, uint32_t first_vertex, uint32_t first_instance)
1658 {
1659 	VK_ASSERT(current_program);
1660 	VK_ASSERT(!is_compute);
1661 	flush_render_state();
1662 	vkCmdDraw(cmd, vertex_count, instance_count, first_vertex, first_instance);
1663 }
1664 
draw_indexed(uint32_t index_count,uint32_t instance_count,uint32_t first_index,int32_t vertex_offset,uint32_t first_instance)1665 void CommandBuffer::draw_indexed(uint32_t index_count, uint32_t instance_count, uint32_t first_index,
1666                                  int32_t vertex_offset, uint32_t first_instance)
1667 {
1668 	VK_ASSERT(current_program);
1669 	VK_ASSERT(!is_compute);
1670 	VK_ASSERT(index.buffer != VK_NULL_HANDLE);
1671 	flush_render_state();
1672 	vkCmdDrawIndexed(cmd, index_count, instance_count, first_index, vertex_offset, first_instance);
1673 }
1674 
draw_indirect(const Vulkan::Buffer & buffer,uint32_t offset,uint32_t draw_count,uint32_t stride)1675 void CommandBuffer::draw_indirect(const Vulkan::Buffer &buffer,
1676                                   uint32_t offset, uint32_t draw_count, uint32_t stride)
1677 {
1678 	VK_ASSERT(current_program);
1679 	VK_ASSERT(!is_compute);
1680 	flush_render_state();
1681 	vkCmdDrawIndirect(cmd, buffer.get_buffer(), offset, draw_count, stride);
1682 }
1683 
draw_indexed_indirect(const Vulkan::Buffer & buffer,uint32_t offset,uint32_t draw_count,uint32_t stride)1684 void CommandBuffer::draw_indexed_indirect(const Vulkan::Buffer &buffer,
1685                                           uint32_t offset, uint32_t draw_count, uint32_t stride)
1686 {
1687 	VK_ASSERT(current_program);
1688 	VK_ASSERT(!is_compute);
1689 	flush_render_state();
1690 	vkCmdDrawIndexedIndirect(cmd, buffer.get_buffer(), offset, draw_count, stride);
1691 }
1692 
dispatch_indirect(const Buffer & buffer,uint32_t offset)1693 void CommandBuffer::dispatch_indirect(const Buffer &buffer, uint32_t offset)
1694 {
1695 	VK_ASSERT(current_program);
1696 	VK_ASSERT(is_compute);
1697 	flush_compute_state();
1698 	vkCmdDispatchIndirect(cmd, buffer.get_buffer(), offset);
1699 }
1700 
dispatch(uint32_t groups_x,uint32_t groups_y,uint32_t groups_z)1701 void CommandBuffer::dispatch(uint32_t groups_x, uint32_t groups_y, uint32_t groups_z)
1702 {
1703 	VK_ASSERT(current_program);
1704 	VK_ASSERT(is_compute);
1705 	flush_compute_state();
1706 	vkCmdDispatch(cmd, groups_x, groups_y, groups_z);
1707 }
1708 
set_opaque_state()1709 void CommandBuffer::set_opaque_state()
1710 {
1711 	auto &state = static_state.state;
1712 	memset(&state, 0, sizeof(state));
1713 	state.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1714 	state.cull_mode = VK_CULL_MODE_BACK_BIT;
1715 	state.blend_enable = false;
1716 	state.depth_test = true;
1717 	state.depth_compare = VK_COMPARE_OP_LESS_OR_EQUAL;
1718 	state.depth_write = true;
1719 	state.depth_bias_enable = false;
1720 	state.primitive_restart = false;
1721 	state.stencil_test = false;
1722 	state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1723 	state.write_mask = ~0u;
1724 
1725 	set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1726 }
1727 
set_quad_state()1728 void CommandBuffer::set_quad_state()
1729 {
1730 	auto &state = static_state.state;
1731 	memset(&state, 0, sizeof(state));
1732 	state.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1733 	state.cull_mode = VK_CULL_MODE_NONE;
1734 	state.blend_enable = false;
1735 	state.depth_test = false;
1736 	state.depth_write = false;
1737 	state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
1738 	state.write_mask = ~0u;
1739 	set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1740 }
1741 
set_opaque_sprite_state()1742 void CommandBuffer::set_opaque_sprite_state()
1743 {
1744 	auto &state = static_state.state;
1745 	memset(&state, 0, sizeof(state));
1746 	state.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1747 	state.cull_mode = VK_CULL_MODE_NONE;
1748 	state.blend_enable = false;
1749 	state.depth_compare = VK_COMPARE_OP_LESS;
1750 	state.depth_test = true;
1751 	state.depth_write = true;
1752 	state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
1753 	state.write_mask = ~0u;
1754 	set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1755 }
1756 
set_transparent_sprite_state()1757 void CommandBuffer::set_transparent_sprite_state()
1758 {
1759 	auto &state = static_state.state;
1760 	memset(&state, 0, sizeof(state));
1761 	state.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1762 	state.cull_mode = VK_CULL_MODE_NONE;
1763 	state.blend_enable = true;
1764 	state.depth_test = true;
1765 	state.depth_compare = VK_COMPARE_OP_LESS;
1766 	state.depth_write = false;
1767 	state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
1768 	state.write_mask = ~0u;
1769 
1770 	// The alpha layer should start at 1 (fully transparent).
1771 	// As layers are blended in, the transparency is multiplied with other transparencies (1 - alpha).
1772 	set_blend_factors(VK_BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_ZERO,
1773 	                  VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA);
1774 	set_blend_op(VK_BLEND_OP_ADD);
1775 
1776 	set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1777 }
1778 
restore_state(const CommandBufferSavedState & state)1779 void CommandBuffer::restore_state(const CommandBufferSavedState &state)
1780 {
1781 	for (unsigned i = 0; i < VULKAN_NUM_DESCRIPTOR_SETS; i++)
1782 	{
1783 		if (state.flags & (COMMAND_BUFFER_SAVED_BINDINGS_0_BIT << i))
1784 		{
1785 			if (memcmp(state.bindings.bindings[i], bindings.bindings[i], sizeof(bindings.bindings[i])))
1786 			{
1787 				memcpy(bindings.bindings[i], state.bindings.bindings[i], sizeof(bindings.bindings[i]));
1788 				memcpy(bindings.cookies[i], state.bindings.cookies[i], sizeof(bindings.cookies[i]));
1789 				memcpy(bindings.secondary_cookies[i], state.bindings.secondary_cookies[i], sizeof(bindings.secondary_cookies[i]));
1790 				dirty_sets |= 1u << i;
1791 			}
1792 		}
1793 	}
1794 
1795 	if (state.flags & COMMAND_BUFFER_SAVED_PUSH_CONSTANT_BIT)
1796 	{
1797 		if (memcmp(state.bindings.push_constant_data, bindings.push_constant_data, sizeof(bindings.push_constant_data)))
1798 		{
1799 			memcpy(bindings.push_constant_data, state.bindings.push_constant_data, sizeof(bindings.push_constant_data));
1800 			set_dirty(COMMAND_BUFFER_DIRTY_PUSH_CONSTANTS_BIT);
1801 		}
1802 	}
1803 
1804 	if ((state.flags & COMMAND_BUFFER_SAVED_VIEWPORT_BIT) && memcmp(&state.viewport, &viewport, sizeof(viewport)))
1805 	{
1806 		viewport = state.viewport;
1807 		set_dirty(COMMAND_BUFFER_DIRTY_VIEWPORT_BIT);
1808 	}
1809 
1810 	if ((state.flags & COMMAND_BUFFER_SAVED_SCISSOR_BIT) && memcmp(&state.scissor, &scissor, sizeof(scissor)))
1811 	{
1812 		scissor = state.scissor;
1813 		set_dirty(COMMAND_BUFFER_DIRTY_SCISSOR_BIT);
1814 	}
1815 
1816 	if (state.flags & COMMAND_BUFFER_SAVED_RENDER_STATE_BIT)
1817 	{
1818 		if (memcmp(&state.static_state, &static_state, sizeof(static_state)))
1819 		{
1820 			memcpy(&static_state, &state.static_state, sizeof(static_state));
1821 			set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1822 		}
1823 
1824 		if (memcmp(&state.potential_static_state, &potential_static_state, sizeof(potential_static_state)))
1825 		{
1826 			memcpy(&potential_static_state, &state.potential_static_state, sizeof(potential_static_state));
1827 			set_dirty(COMMAND_BUFFER_DIRTY_STATIC_STATE_BIT);
1828 		}
1829 
1830 		if (memcmp(&state.dynamic_state, &dynamic_state, sizeof(dynamic_state)))
1831 		{
1832 			memcpy(&dynamic_state, &state.dynamic_state, sizeof(dynamic_state));
1833 			set_dirty(COMMAND_BUFFER_DIRTY_STENCIL_REFERENCE_BIT | COMMAND_BUFFER_DIRTY_DEPTH_BIAS_BIT);
1834 		}
1835 	}
1836 }
1837 
save_state(CommandBufferSaveStateFlags flags,CommandBufferSavedState & state)1838 void CommandBuffer::save_state(CommandBufferSaveStateFlags flags, CommandBufferSavedState &state)
1839 {
1840 	for (unsigned i = 0; i < VULKAN_NUM_DESCRIPTOR_SETS; i++)
1841 	{
1842 		if (flags & (COMMAND_BUFFER_SAVED_BINDINGS_0_BIT << i))
1843 		{
1844 			memcpy(state.bindings.bindings[i], bindings.bindings[i], sizeof(bindings.bindings[i]));
1845 			memcpy(state.bindings.cookies[i], bindings.cookies[i], sizeof(bindings.cookies[i]));
1846 			memcpy(state.bindings.secondary_cookies[i], bindings.secondary_cookies[i],
1847 			       sizeof(bindings.secondary_cookies[i]));
1848 		}
1849 	}
1850 
1851 	if (flags & COMMAND_BUFFER_SAVED_VIEWPORT_BIT)
1852 		state.viewport = viewport;
1853 	if (flags & COMMAND_BUFFER_SAVED_SCISSOR_BIT)
1854 		state.scissor = scissor;
1855 	if (flags & COMMAND_BUFFER_SAVED_RENDER_STATE_BIT)
1856 	{
1857 		memcpy(&state.static_state, &static_state, sizeof(static_state));
1858 		state.potential_static_state = potential_static_state;
1859 		state.dynamic_state = dynamic_state;
1860 	}
1861 
1862 	if (flags & COMMAND_BUFFER_SAVED_PUSH_CONSTANT_BIT)
1863 		memcpy(state.bindings.push_constant_data, bindings.push_constant_data, sizeof(bindings.push_constant_data));
1864 
1865 	state.flags = flags;
1866 }
1867 
write_timestamp(VkPipelineStageFlagBits stage)1868 QueryPoolHandle CommandBuffer::write_timestamp(VkPipelineStageFlagBits stage)
1869 {
1870 	return device->write_timestamp(cmd, stage);
1871 }
1872 
end()1873 void CommandBuffer::end()
1874 {
1875 	if (vkEndCommandBuffer(cmd) != VK_SUCCESS)
1876 		LOGE("Failed to end command buffer.\n");
1877 
1878 	if (vbo_block.mapped)
1879 		device->request_vertex_block_nolock(vbo_block, 0);
1880 	if (ibo_block.mapped)
1881 		device->request_index_block_nolock(ibo_block, 0);
1882 	if (ubo_block.mapped)
1883 		device->request_uniform_block_nolock(ubo_block, 0);
1884 	if (staging_block.mapped)
1885 		device->request_staging_block_nolock(staging_block, 0);
1886 }
1887 
begin_region(const char * name,const float * color)1888 void CommandBuffer::begin_region(const char *name, const float *color)
1889 {
1890 	if (device->ext.supports_debug_utils)
1891 	{
1892 		VkDebugUtilsLabelEXT info = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
1893 		if (color)
1894 		{
1895 			for (unsigned i = 0; i < 4; i++)
1896 				info.color[i] = color[i];
1897 		}
1898 		else
1899 		{
1900 			for (unsigned i = 0; i < 4; i++)
1901 				info.color[i] = 1.0f;
1902 		}
1903 
1904 		info.pLabelName = name;
1905 		if (vkCmdBeginDebugUtilsLabelEXT)
1906 			vkCmdBeginDebugUtilsLabelEXT(cmd, &info);
1907 	}
1908 	else if (device->ext.supports_debug_marker)
1909 	{
1910 		VkDebugMarkerMarkerInfoEXT info = { VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT };
1911 		if (color)
1912 		{
1913 			for (unsigned i = 0; i < 4; i++)
1914 				info.color[i] = color[i];
1915 		}
1916 		else
1917 		{
1918 			for (unsigned i = 0; i < 4; i++)
1919 				info.color[i] = 1.0f;
1920 		}
1921 
1922 		info.pMarkerName = name;
1923 		vkCmdDebugMarkerBeginEXT(cmd, &info);
1924 	}
1925 }
1926 
end_region()1927 void CommandBuffer::end_region()
1928 {
1929 	if (device->ext.supports_debug_utils)
1930 	{
1931 		if (vkCmdEndDebugUtilsLabelEXT)
1932 			vkCmdEndDebugUtilsLabelEXT(cmd);
1933 	}
1934 	else if (device->ext.supports_debug_marker)
1935 		vkCmdDebugMarkerEndEXT(cmd);
1936 }
1937 
1938 #ifdef GRANITE_VULKAN_FILESYSTEM
set_quad_vertex_state(CommandBuffer & cmd)1939 void CommandBufferUtil::set_quad_vertex_state(CommandBuffer &cmd)
1940 {
1941 	auto *data = static_cast<int8_t *>(cmd.allocate_vertex_data(0, 8, 2));
1942 	*data++ = -128;
1943 	*data++ = +127;
1944 	*data++ = +127;
1945 	*data++ = +127;
1946 	*data++ = -128;
1947 	*data++ = -128;
1948 	*data++ = +127;
1949 	*data++ = -128;
1950 
1951 	cmd.set_vertex_attrib(0, 0, VK_FORMAT_R8G8_SNORM, 0);
1952 }
1953 
set_fullscreen_quad_vertex_state(CommandBuffer & cmd)1954 void CommandBufferUtil::set_fullscreen_quad_vertex_state(CommandBuffer &cmd)
1955 {
1956 	auto *data = static_cast<float *>(cmd.allocate_vertex_data(0, 6 * sizeof(float), 2 * sizeof(float)));
1957 	*data++ = -1.0f;
1958 	*data++ = -3.0f;
1959 	*data++ = -1.0f;
1960 	*data++ = +1.0f;
1961 	*data++ = +3.0f;
1962 	*data++ = +1.0f;
1963 
1964 	cmd.set_vertex_attrib(0, 0, VK_FORMAT_R32G32_SFLOAT, 0);
1965 }
1966 
draw_fullscreen_quad(CommandBuffer & cmd,unsigned instances)1967 void CommandBufferUtil::draw_fullscreen_quad(CommandBuffer &cmd, unsigned instances)
1968 {
1969 	cmd.set_primitive_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
1970 	cmd.draw(3, instances);
1971 }
1972 
draw_quad(CommandBuffer & cmd,unsigned instances)1973 void CommandBufferUtil::draw_quad(CommandBuffer &cmd, unsigned instances)
1974 {
1975 	cmd.set_primitive_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP);
1976 	cmd.draw(4, instances);
1977 }
1978 
draw_fullscreen_quad(CommandBuffer & cmd,const std::string & vertex,const std::string & fragment,const std::vector<std::pair<std::string,int>> & defines)1979 void CommandBufferUtil::draw_fullscreen_quad(CommandBuffer &cmd, const std::string &vertex, const std::string &fragment,
1980                                              const std::vector<std::pair<std::string, int>> &defines)
1981 {
1982 	draw_fullscreen_quad_depth(cmd, vertex, fragment, false, false, VK_COMPARE_OP_ALWAYS, defines);
1983 }
1984 
draw_fullscreen_quad_depth(CommandBuffer & cmd,const std::string & vertex,const std::string & fragment,bool depth_test,bool depth_write,VkCompareOp depth_compare,const std::vector<std::pair<std::string,int>> & defines)1985 void CommandBufferUtil::draw_fullscreen_quad_depth(CommandBuffer &cmd, const std::string &vertex,
1986                                                    const std::string &fragment,
1987                                                    bool depth_test, bool depth_write, VkCompareOp depth_compare,
1988                                                    const std::vector<std::pair<std::string, int>> &defines)
1989 {
1990 	setup_fullscreen_quad(cmd, vertex, fragment, defines, depth_test, depth_write, depth_compare);
1991 	draw_fullscreen_quad(cmd);
1992 }
1993 
setup_fullscreen_quad(Vulkan::CommandBuffer & cmd,const std::string & vertex,const std::string & fragment,const std::vector<std::pair<std::string,int>> & defines,bool depth_test,bool depth_write,VkCompareOp depth_compare)1994 void CommandBufferUtil::setup_fullscreen_quad(Vulkan::CommandBuffer &cmd, const std::string &vertex,
1995                                               const std::string &fragment,
1996                                               const std::vector<std::pair<std::string, int>> &defines, bool depth_test,
1997                                               bool depth_write, VkCompareOp depth_compare)
1998 {
1999 	cmd.set_program(vertex, fragment, defines);
2000 	cmd.set_quad_state();
2001 	set_fullscreen_quad_vertex_state(cmd);
2002 	cmd.set_depth_test(depth_test, depth_write);
2003 	cmd.set_depth_compare(depth_compare);
2004 }
2005 #endif
2006 
operator ()(Vulkan::CommandBuffer * cmd)2007 void CommandBufferDeleter::operator()(Vulkan::CommandBuffer *cmd)
2008 {
2009 	cmd->device->handle_pool.command_buffers.free(cmd);
2010 }
2011 }
2012