1 #include <algorithm>
2 #include <cstdint>
3 
4 #include <sstream>
5 
6 #include "Common/Log.h"
7 #include "Common/StringUtils.h"
8 
9 #include "Common/GPU/Vulkan/VulkanContext.h"
10 #include "Common/GPU/Vulkan/VulkanRenderManager.h"
11 #include "Common/Thread/ThreadUtil.h"
12 
13 #if 0 // def _DEBUG
14 #define VLOG(...) INFO_LOG(G3D, __VA_ARGS__)
15 #else
16 #define VLOG(...)
17 #endif
18 
19 #ifndef UINT64_MAX
20 #define UINT64_MAX 0xFFFFFFFFFFFFFFFFULL
21 #endif
22 
23 using namespace PPSSPP_VK;
24 
VKRFramebuffer(VulkanContext * vk,VkCommandBuffer initCmd,VkRenderPass renderPass,int _width,int _height,const char * tag)25 VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VkRenderPass renderPass, int _width, int _height, const char *tag) : vulkan_(vk) {
26 	width = _width;
27 	height = _height;
28 
29 	CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
30 	CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
31 
32 	VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
33 	VkImageView views[2]{};
34 
35 	fbci.renderPass = renderPass;
36 	fbci.attachmentCount = 2;
37 	fbci.pAttachments = views;
38 	views[0] = color.imageView;
39 	views[1] = depth.imageView;
40 	fbci.width = width;
41 	fbci.height = height;
42 	fbci.layers = 1;
43 
44 	VkResult res = vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf);
45 	_assert_(res == VK_SUCCESS);
46 
47 	if (tag && vk->Extensions().EXT_debug_utils) {
48 		vk->SetDebugName(color.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_color_%s", tag).c_str());
49 		vk->SetDebugName(depth.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_depth_%s", tag).c_str());
50 		vk->SetDebugName(framebuf, VK_OBJECT_TYPE_FRAMEBUFFER, StringFromFormat("fb_%s", tag).c_str());
51 		this->tag = tag;
52 	}
53 }
54 
~VKRFramebuffer()55 VKRFramebuffer::~VKRFramebuffer() {
56 	if (color.image)
57 		vulkan_->Delete().QueueDeleteImage(color.image);
58 	if (depth.image)
59 		vulkan_->Delete().QueueDeleteImage(depth.image);
60 	if (color.imageView)
61 		vulkan_->Delete().QueueDeleteImageView(color.imageView);
62 	if (depth.imageView)
63 		vulkan_->Delete().QueueDeleteImageView(depth.imageView);
64 	if (depth.depthSampleView)
65 		vulkan_->Delete().QueueDeleteImageView(depth.depthSampleView);
66 	if (color.memory)
67 		vulkan_->Delete().QueueDeleteDeviceMemory(color.memory);
68 	if (depth.memory)
69 		vulkan_->Delete().QueueDeleteDeviceMemory(depth.memory);
70 	if (framebuf)
71 		vulkan_->Delete().QueueDeleteFramebuffer(framebuf);
72 }
73 
CreateImage(VulkanContext * vulkan,VkCommandBuffer cmd,VKRImage & img,int width,int height,VkFormat format,VkImageLayout initialLayout,bool color,const char * tag)74 void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
75 	VkImageCreateInfo ici{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
76 	ici.arrayLayers = 1;
77 	ici.mipLevels = 1;
78 	ici.extent.width = width;
79 	ici.extent.height = height;
80 	ici.extent.depth = 1;
81 	ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
82 	ici.imageType = VK_IMAGE_TYPE_2D;
83 	ici.samples = VK_SAMPLE_COUNT_1_BIT;
84 	ici.tiling = VK_IMAGE_TILING_OPTIMAL;
85 	ici.format = format;
86 	// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
87 	ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
88 	if (color) {
89 		ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
90 	} else {
91 		ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
92 	}
93 
94 	VkResult res = vkCreateImage(vulkan->GetDevice(), &ici, nullptr, &img.image);
95 	_dbg_assert_(res == VK_SUCCESS);
96 
97 	VkMemoryRequirements memreq;
98 	bool dedicatedAllocation = false;
99 	vulkan->GetImageMemoryRequirements(img.image, &memreq, &dedicatedAllocation);
100 
101 	VkMemoryAllocateInfo alloc{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
102 	alloc.allocationSize = memreq.size;
103 	VkMemoryDedicatedAllocateInfoKHR dedicatedAllocateInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
104 	if (dedicatedAllocation) {
105 		dedicatedAllocateInfo.image = img.image;
106 		alloc.pNext = &dedicatedAllocateInfo;
107 	}
108 
109 	vulkan->MemoryTypeFromProperties(memreq.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &alloc.memoryTypeIndex);
110 
111 	res = vkAllocateMemory(vulkan->GetDevice(), &alloc, nullptr, &img.memory);
112 	_dbg_assert_(res == VK_SUCCESS);
113 
114 	res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0);
115 	_dbg_assert_(res == VK_SUCCESS);
116 
117 	VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
118 
119 	VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
120 	ivci.components = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY };
121 	ivci.format = ici.format;
122 	ivci.image = img.image;
123 	ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
124 	ivci.subresourceRange.aspectMask = aspects;
125 	ivci.subresourceRange.layerCount = 1;
126 	ivci.subresourceRange.levelCount = 1;
127 	res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView);
128 	_dbg_assert_(res == VK_SUCCESS);
129 
130 	// Separate view for texture sampling that only exposes depth.
131 	if (!color) {
132 		ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
133 		res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.depthSampleView);
134 		_dbg_assert_(res == VK_SUCCESS);
135 	} else {
136 		img.depthSampleView = VK_NULL_HANDLE;
137 	}
138 
139 	VkPipelineStageFlags dstStage;
140 	VkAccessFlagBits dstAccessMask;
141 	switch (initialLayout) {
142 	case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
143 		dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
144 		dstStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
145 		break;
146 	case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
147 		dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
148 		dstStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
149 		break;
150 	case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
151 		dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
152 		dstStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
153 		break;
154 	default:
155 		Crash();
156 		return;
157 	}
158 
159 	TransitionImageLayout2(cmd, img.image, 0, 1, aspects,
160 		VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
161 		VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, dstStage,
162 		0, dstAccessMask);
163 	img.layout = initialLayout;
164 
165 	img.format = format;
166 	img.tag = tag ? tag : "N/A";
167 }
168 
VulkanRenderManager(VulkanContext * vulkan)169 VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
170 	VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
171 	semaphoreCreateInfo.flags = 0;
172 	VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_);
173 	_dbg_assert_(res == VK_SUCCESS);
174 	res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_);
175 	_dbg_assert_(res == VK_SUCCESS);
176 
177 	inflightFramesAtStart_ = vulkan_->GetInflightFrames();
178 	for (int i = 0; i < inflightFramesAtStart_; i++) {
179 		VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
180 		cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex();
181 		cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
182 		VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit);
183 		_dbg_assert_(res == VK_SUCCESS);
184 		res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain);
185 		_dbg_assert_(res == VK_SUCCESS);
186 
187 		VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
188 		cmd_alloc.commandPool = frameData_[i].cmdPoolInit;
189 		cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
190 		cmd_alloc.commandBufferCount = 1;
191 
192 		res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd);
193 		_dbg_assert_(res == VK_SUCCESS);
194 		cmd_alloc.commandPool = frameData_[i].cmdPoolMain;
195 		res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd);
196 		_dbg_assert_(res == VK_SUCCESS);
197 
198 		// Creating the frame fence with true so they can be instantly waited on the first frame
199 		frameData_[i].fence = vulkan_->CreateFence(true);
200 
201 		// This fence one is used for synchronizing readbacks. Does not need preinitialization.
202 		frameData_[i].readbackFence = vulkan_->CreateFence(false);
203 
204 		VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
205 		query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
206 		query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
207 		res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
208 	}
209 
210 	queueRunner_.CreateDeviceObjects();
211 
212 	// AMD hack for issue #10097 (older drivers only.)
213 	const auto &props = vulkan_->GetPhysicalDeviceProperties().properties;
214 	if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) {
215 		useThread_ = false;
216 	}
217 }
218 
CreateBackbuffers()219 bool VulkanRenderManager::CreateBackbuffers() {
220 	if (!vulkan_->GetSwapchain()) {
221 		ERROR_LOG(G3D, "No swapchain - can't create backbuffers");
222 		return false;
223 	}
224 	VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
225 	_dbg_assert_(res == VK_SUCCESS);
226 
227 	VkImage *swapchainImages = new VkImage[swapchainImageCount_];
228 	res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
229 	if (res != VK_SUCCESS) {
230 		ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
231 		delete[] swapchainImages;
232 		return false;
233 	}
234 
235 	VkCommandBuffer cmdInit = GetInitCmd();
236 
237 	for (uint32_t i = 0; i < swapchainImageCount_; i++) {
238 		SwapchainImageData sc_buffer{};
239 		sc_buffer.image = swapchainImages[i];
240 
241 		VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
242 		color_image_view.format = vulkan_->GetSwapchainFormat();
243 		color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
244 		color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
245 		color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
246 		color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
247 		color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
248 		color_image_view.subresourceRange.baseMipLevel = 0;
249 		color_image_view.subresourceRange.levelCount = 1;
250 		color_image_view.subresourceRange.baseArrayLayer = 0;
251 		color_image_view.subresourceRange.layerCount = 1;
252 		color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
253 		color_image_view.flags = 0;
254 		color_image_view.image = sc_buffer.image;
255 
256 		// We leave the images as UNDEFINED, there's no need to pre-transition them as
257 		// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
258 		// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
259 
260 		res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
261 		swapchainImages_.push_back(sc_buffer);
262 		_dbg_assert_(res == VK_SUCCESS);
263 	}
264 	delete[] swapchainImages;
265 
266 	// Must be before InitBackbufferRenderPass.
267 	if (InitDepthStencilBuffer(cmdInit)) {
268 		InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
269 	}
270 	curWidthRaw_ = -1;
271 	curHeightRaw_ = -1;
272 
273 	if (HasBackbuffers()) {
274 		VLOG("Backbuffers Created");
275 	}
276 
277 	if (newInflightFrames_ != -1) {
278 		INFO_LOG(G3D, "Updating inflight frames to %d", newInflightFrames_);
279 		vulkan_->UpdateInflightFrames(newInflightFrames_);
280 		newInflightFrames_ = -1;
281 	}
282 
283 	outOfDateFrames_ = 0;
284 
285 	// Start the thread.
286 	if (useThread_ && HasBackbuffers()) {
287 		run_ = true;
288 		// Won't necessarily be 0.
289 		threadInitFrame_ = vulkan_->GetCurFrame();
290 		INFO_LOG(G3D, "Starting Vulkan submission thread (threadInitFrame_ = %d)", vulkan_->GetCurFrame());
291 		thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
292 	}
293 	return true;
294 }
295 
StopThread()296 void VulkanRenderManager::StopThread() {
297 	if (useThread_ && run_) {
298 		run_ = false;
299 		// Stop the thread.
300 		for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
301 			auto &frameData = frameData_[i];
302 			{
303 				std::unique_lock<std::mutex> lock(frameData.push_mutex);
304 				frameData.push_condVar.notify_all();
305 			}
306 			{
307 				std::unique_lock<std::mutex> lock(frameData.pull_mutex);
308 				frameData.pull_condVar.notify_all();
309 			}
310 			// Zero the queries so we don't try to pull them later.
311 			frameData.profile.timestampDescriptions.clear();
312 		}
313 		thread_.join();
314 		INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
315 
316 		// Eat whatever has been queued up for this frame if anything.
317 		Wipe();
318 
319 		// Wait for any fences to finish and be resignaled, so we don't have sync issues.
320 		// Also clean out any queued data, which might refer to things that might not be valid
321 		// when we restart...
322 		for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
323 			auto &frameData = frameData_[i];
324 			_assert_(!frameData.readyForRun);
325 			_assert_(frameData.steps.empty());
326 			if (frameData.hasInitCommands) {
327 				// Clear 'em out.  This can happen on restart sometimes.
328 				vkEndCommandBuffer(frameData.initCmd);
329 				frameData.hasInitCommands = false;
330 			}
331 			frameData.readyForRun = false;
332 			for (size_t i = 0; i < frameData.steps.size(); i++) {
333 				delete frameData.steps[i];
334 			}
335 			frameData.steps.clear();
336 
337 			std::unique_lock<std::mutex> lock(frameData.push_mutex);
338 			while (!frameData.readyForFence) {
339 				VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
340 				frameData.push_condVar.wait(lock);
341 			}
342 		}
343 	} else {
344 		INFO_LOG(G3D, "Vulkan submission thread was already stopped.");
345 	}
346 }
347 
DestroyBackbuffers()348 void VulkanRenderManager::DestroyBackbuffers() {
349 	StopThread();
350 	vulkan_->WaitUntilQueueIdle();
351 
352 	for (auto &image : swapchainImages_) {
353 		vulkan_->Delete().QueueDeleteImageView(image.view);
354 	}
355 	swapchainImages_.clear();
356 
357 	if (depth_.view) {
358 		vulkan_->Delete().QueueDeleteImageView(depth_.view);
359 	}
360 	if (depth_.image) {
361 		vulkan_->Delete().QueueDeleteImage(depth_.image);
362 	}
363 	if (depth_.mem) {
364 		vulkan_->Delete().QueueDeleteDeviceMemory(depth_.mem);
365 	}
366 	depth_ = {};
367 	for (uint32_t i = 0; i < framebuffers_.size(); i++) {
368 		_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
369 		vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
370 	}
371 	framebuffers_.clear();
372 
373 	INFO_LOG(G3D, "Backbuffers destroyed");
374 }
375 
~VulkanRenderManager()376 VulkanRenderManager::~VulkanRenderManager() {
377 	INFO_LOG(G3D, "VulkanRenderManager destructor");
378 	StopThread();
379 	vulkan_->WaitUntilQueueIdle();
380 
381 	VkDevice device = vulkan_->GetDevice();
382 	vkDestroySemaphore(device, acquireSemaphore_, nullptr);
383 	vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr);
384 	for (int i = 0; i < inflightFramesAtStart_; i++) {
385 		vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd);
386 		vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd);
387 		vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
388 		vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
389 		vkDestroyFence(device, frameData_[i].fence, nullptr);
390 		vkDestroyFence(device, frameData_[i].readbackFence, nullptr);
391 		vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
392 	}
393 	queueRunner_.DestroyDeviceObjects();
394 }
395 
ThreadFunc()396 void VulkanRenderManager::ThreadFunc() {
397 	SetCurrentThreadName("RenderMan");
398 	int threadFrame = threadInitFrame_;
399 	bool nextFrame = false;
400 	bool firstFrame = true;
401 	while (true) {
402 		{
403 			if (nextFrame) {
404 				threadFrame++;
405 				if (threadFrame >= vulkan_->GetInflightFrames())
406 					threadFrame = 0;
407 			}
408 			FrameData &frameData = frameData_[threadFrame];
409 			std::unique_lock<std::mutex> lock(frameData.pull_mutex);
410 			while (!frameData.readyForRun && run_) {
411 				VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
412 				frameData.pull_condVar.wait(lock);
413 			}
414 			if (!frameData.readyForRun && !run_) {
415 				// This means we're out of frames to render and run_ is false, so bail.
416 				break;
417 			}
418 			VLOG("PULL: frame[%d].readyForRun = false", threadFrame);
419 			frameData.readyForRun = false;
420 			// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
421 			// but that created a race condition where frames could end up not finished properly on resize etc.
422 
423 			// Only increment next time if we're done.
424 			nextFrame = frameData.type == VKRRunType::END;
425 			_dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
426 		}
427 		VLOG("PULL: Running frame %d", threadFrame);
428 		if (firstFrame) {
429 			INFO_LOG(G3D, "Running first frame (%d)", threadFrame);
430 			firstFrame = false;
431 		}
432 		Run(threadFrame);
433 		VLOG("PULL: Finished frame %d", threadFrame);
434 	}
435 
436 	// Wait for the device to be done with everything, before tearing stuff down.
437 	vkDeviceWaitIdle(vulkan_->GetDevice());
438 
439 	VLOG("PULL: Quitting");
440 }
441 
BeginFrame(bool enableProfiling)442 void VulkanRenderManager::BeginFrame(bool enableProfiling) {
443 	VLOG("BeginFrame");
444 	VkDevice device = vulkan_->GetDevice();
445 
446 	int curFrame = vulkan_->GetCurFrame();
447 	FrameData &frameData = frameData_[curFrame];
448 
449 	// Make sure the very last command buffer from the frame before the previous has been fully executed.
450 	if (useThread_) {
451 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
452 		while (!frameData.readyForFence) {
453 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
454 			frameData.push_condVar.wait(lock);
455 		}
456 		frameData.readyForFence = false;
457 	}
458 
459 	VLOG("PUSH: Fencing %d", curFrame);
460 
461 	vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX);
462 	vkResetFences(device, 1, &frameData.fence);
463 
464 	// Can't set this until after the fence.
465 	frameData.profilingEnabled_ = enableProfiling;
466 	frameData.readbackFenceUsed = false;
467 
468 	uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
469 
470 	if (frameData.profilingEnabled_) {
471 		// Pull the profiling results from last time and produce a summary!
472 		if (!frameData.profile.timestampDescriptions.empty()) {
473 			int numQueries = (int)frameData.profile.timestampDescriptions.size();
474 			VkResult res = vkGetQueryPoolResults(
475 				vulkan_->GetDevice(),
476 				frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),
477 				VK_QUERY_RESULT_64_BIT);
478 			if (res == VK_SUCCESS) {
479 				double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
480 				int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
481 				uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);
482 				std::stringstream str;
483 
484 				char line[256];
485 				snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
486 				str << line;
487 				snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
488 				str << line;
489 				for (int i = 0; i < numQueries - 1; i++) {
490 					uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
491 					double milliseconds = (double)diff * timestampConversionFactor;
492 					snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
493 					str << line;
494 				}
495 				frameData.profile.profileSummary = str.str();
496 			} else {
497 				frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";
498 			}
499 		} else {
500 			frameData.profile.profileSummary = "(no GPU profile data collected)";
501 		}
502 	}
503 
504 	// Must be after the fence - this performs deletes.
505 	VLOG("PUSH: BeginFrame %d", curFrame);
506 	if (!run_) {
507 		WARN_LOG(G3D, "BeginFrame while !run_!");
508 	}
509 	vulkan_->BeginFrame();
510 
511 	insideFrame_ = true;
512 	renderStepOffset_ = 0;
513 
514 	frameData.profile.timestampDescriptions.clear();
515 	if (frameData.profilingEnabled_) {
516 		// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,
517 		// unless we want to limit ourselves to only measure the main cmd buffer.
518 		// Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.
519 		// Reserve the first two queries for initCmd.
520 		frameData.profile.timestampDescriptions.push_back("initCmd Begin");
521 		frameData.profile.timestampDescriptions.push_back("initCmd");
522 		VkCommandBuffer initCmd = GetInitCmd();
523 		vkCmdResetQueryPool(initCmd, frameData.profile.queryPool, 0, MAX_TIMESTAMP_QUERIES);
524 		vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.profile.queryPool, 0);
525 	}
526 }
527 
GetInitCmd()528 VkCommandBuffer VulkanRenderManager::GetInitCmd() {
529 	int curFrame = vulkan_->GetCurFrame();
530 	FrameData &frameData = frameData_[curFrame];
531 	if (!frameData.hasInitCommands) {
532 		VkCommandBufferBeginInfo begin = {
533 			VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
534 			nullptr,
535 			VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
536 		};
537 		VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
538 		if (res != VK_SUCCESS) {
539 			return VK_NULL_HANDLE;
540 		}
541 		frameData.hasInitCommands = true;
542 	}
543 	return frameData_[curFrame].initCmd;
544 }
545 
EndCurRenderStep()546 void VulkanRenderManager::EndCurRenderStep() {
547 	// Save the accumulated pipeline flags so we can use that to configure the render pass.
548 	// We'll often be able to avoid loading/saving the depth/stencil buffer.
549 	if (curRenderStep_) {
550 		curRenderStep_->render.pipelineFlags = curPipelineFlags_;
551 		// We don't do this optimization for very small targets, probably not worth it.
552 		if (!curRenderArea_.Empty() && (curWidth_ > 32 && curHeight_ > 32)) {
553 			curRenderStep_->render.renderArea = curRenderArea_.ToVkRect2D();
554 		} else {
555 			curRenderStep_->render.renderArea.offset = {};
556 			curRenderStep_->render.renderArea.extent = { (uint32_t)curWidth_, (uint32_t)curHeight_ };
557 		}
558 		curRenderArea_.Reset();
559 
560 		// We no longer have a current render step.
561 		curRenderStep_ = nullptr;
562 		curPipelineFlags_ = 0;
563 	}
564 }
565 
BindFramebufferAsRenderTarget(VKRFramebuffer * fb,VKRRenderPassAction color,VKRRenderPassAction depth,VKRRenderPassAction stencil,uint32_t clearColor,float clearDepth,uint8_t clearStencil,const char * tag)566 void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassAction color, VKRRenderPassAction depth, VKRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
567 	_dbg_assert_(insideFrame_);
568 	// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.
569 	if (!steps_.empty() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
570 		u32 clearMask = 0;
571 		if (color == VKRRenderPassAction::CLEAR) {
572 			clearMask |= VK_IMAGE_ASPECT_COLOR_BIT;
573 		}
574 		if (depth == VKRRenderPassAction::CLEAR) {
575 			clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
576 		}
577 		if (stencil == VKRRenderPassAction::CLEAR) {
578 			clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
579 		}
580 
581 		// If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.
582 		// If there's no clear needed, let's also do that.
583 		//
584 		// However, if we do need a clear and there are no commands in the previous pass,
585 		// we want the queuerunner to have the opportunity to merge, so we'll go ahead and make a new renderpass.
586 		if (clearMask == 0 || !steps_.back()->commands.empty()) {
587 			curRenderStep_ = steps_.back();
588 			curStepHasViewport_ = false;
589 			curStepHasScissor_ = false;
590 			for (const auto &c : steps_.back()->commands) {
591 				if (c.cmd == VKRRenderCommand::VIEWPORT) {
592 					curStepHasViewport_ = true;
593 				} else if (c.cmd == VKRRenderCommand::SCISSOR) {
594 					curStepHasScissor_ = true;
595 				}
596 			}
597 			if (clearMask != 0) {
598 				VkRenderData data{ VKRRenderCommand::CLEAR };
599 				data.clear.clearColor = clearColor;
600 				data.clear.clearZ = clearDepth;
601 				data.clear.clearStencil = clearStencil;
602 				data.clear.clearMask = clearMask;
603 				curRenderStep_->commands.push_back(data);
604 				curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
605 			}
606 			return;
607 		}
608 	}
609 
610 	// More redundant bind elimination.
611 	if (curRenderStep_) {
612 		if (curRenderStep_->commands.empty()) {
613 			if (curRenderStep_->render.color != VKRRenderPassAction::CLEAR && curRenderStep_->render.depth != VKRRenderPassAction::CLEAR && curRenderStep_->render.stencil != VKRRenderPassAction::CLEAR) {
614 				// Can trivially kill the last empty render step.
615 				_dbg_assert_(steps_.back() == curRenderStep_);
616 				delete steps_.back();
617 				steps_.pop_back();
618 				curRenderStep_ = nullptr;
619 			}
620 			VLOG("Empty render step. Usually happens after uploading pixels..");
621 		}
622 
623 		EndCurRenderStep();
624 	}
625 
626 	// Older Mali drivers have issues with depth and stencil don't match load/clear/etc.
627 	// TODO: Determine which versions and do this only where necessary.
628 	u32 lateClearMask = 0;
629 	if (depth != stencil && vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_ARM) {
630 		if (stencil == VKRRenderPassAction::DONT_CARE) {
631 			stencil = depth;
632 		} else if (depth == VKRRenderPassAction::DONT_CARE) {
633 			depth = stencil;
634 		} else if (stencil == VKRRenderPassAction::CLEAR) {
635 			depth = stencil;
636 			lateClearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
637 		} else if (depth == VKRRenderPassAction::CLEAR) {
638 			stencil = depth;
639 			lateClearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
640 		}
641 	}
642 
643 	VKRStep *step = new VKRStep{ VKRStepType::RENDER };
644 	step->render.framebuffer = fb;
645 	step->render.color = color;
646 	step->render.depth = depth;
647 	step->render.stencil = stencil;
648 	step->render.clearColor = clearColor;
649 	step->render.clearDepth = clearDepth;
650 	step->render.clearStencil = clearStencil;
651 	step->render.numDraws = 0;
652 	step->render.numReads = 0;
653 	step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
654 	step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
655 	step->tag = tag;
656 	steps_.push_back(step);
657 
658 	if (fb) {
659 		// If there's a KEEP, we naturally read from the framebuffer.
660 		if (color == VKRRenderPassAction::KEEP || depth == VKRRenderPassAction::KEEP || stencil == VKRRenderPassAction::KEEP) {
661 			step->dependencies.insert(fb);
662 		}
663 	}
664 
665 	curRenderStep_ = step;
666 	curStepHasViewport_ = false;
667 	curStepHasScissor_ = false;
668 	if (fb) {
669 		curWidthRaw_ = fb->width;
670 		curHeightRaw_ = fb->height;
671 		curWidth_ = fb->width;
672 		curHeight_ = fb->height;
673 	} else {
674 		curWidthRaw_ = vulkan_->GetBackbufferWidth();
675 		curHeightRaw_ = vulkan_->GetBackbufferHeight();
676 		if (g_display_rotation == DisplayRotation::ROTATE_90 || g_display_rotation == DisplayRotation::ROTATE_270) {
677 			curWidth_ = curHeightRaw_;
678 			curHeight_ = curWidthRaw_;
679 		} else {
680 			curWidth_ = curWidthRaw_;
681 			curHeight_ = curHeightRaw_;
682 		}
683 	}
684 
685 	if (color == VKRRenderPassAction::CLEAR || depth == VKRRenderPassAction::CLEAR || stencil == VKRRenderPassAction::CLEAR) {
686 		curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
687 	}
688 
689 	// See above - we add a clear afterward if only one side for depth/stencil CLEAR/KEEP.
690 	if (lateClearMask != 0) {
691 		VkRenderData data{ VKRRenderCommand::CLEAR };
692 		data.clear.clearColor = clearColor;
693 		data.clear.clearZ = clearDepth;
694 		data.clear.clearStencil = clearStencil;
695 		data.clear.clearMask = lateClearMask;
696 		curRenderStep_->commands.push_back(data);
697 	}
698 }
699 
CopyFramebufferToMemorySync(VKRFramebuffer * src,VkImageAspectFlags aspectBits,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)700 bool VulkanRenderManager::CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
701 	_dbg_assert_(insideFrame_);
702 	for (int i = (int)steps_.size() - 1; i >= 0; i--) {
703 		if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
704 			steps_[i]->render.numReads++;
705 			break;
706 		}
707 	}
708 
709 	EndCurRenderStep();
710 
711 	VKRStep *step = new VKRStep{ VKRStepType::READBACK };
712 	step->readback.aspectMask = aspectBits;
713 	step->readback.src = src;
714 	step->readback.srcRect.offset = { x, y };
715 	step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };
716 	step->dependencies.insert(src);
717 	step->tag = tag;
718 	steps_.push_back(step);
719 
720 	FlushSync();
721 
722 	Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED;
723 	if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) {
724 		if (src) {
725 			switch (src->color.format) {
726 			case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;
727 			default: _assert_(false);
728 			}
729 		} else {
730 			// Backbuffer.
731 			if (!(vulkan_->GetSurfaceCapabilities().supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) {
732 				ERROR_LOG(G3D, "Copying from backbuffer not supported, can't take screenshots");
733 				return false;
734 			}
735 			switch (vulkan_->GetSwapchainFormat()) {
736 			case VK_FORMAT_B8G8R8A8_UNORM: srcFormat = Draw::DataFormat::B8G8R8A8_UNORM; break;
737 			case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;
738 			// NOTE: If you add supported formats here, make sure to also support them in VulkanQueueRunner::CopyReadbackBuffer.
739 			default:
740 				ERROR_LOG(G3D, "Unsupported backbuffer format for screenshots");
741 				return false;
742 			}
743 		}
744 	} else if (aspectBits & VK_IMAGE_ASPECT_STENCIL_BIT) {
745 		// Copies from stencil are always S8.
746 		srcFormat = Draw::DataFormat::S8;
747 	} else if (aspectBits & VK_IMAGE_ASPECT_DEPTH_BIT) {
748 		switch (src->depth.format) {
749 		case VK_FORMAT_D24_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D24_S8; break;
750 		case VK_FORMAT_D32_SFLOAT_S8_UINT: srcFormat = Draw::DataFormat::D32F; break;
751 		case VK_FORMAT_D16_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D16; break;
752 		default: _assert_(false);
753 		}
754 	} else {
755 		_assert_(false);
756 	}
757 	// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.
758 	queueRunner_.CopyReadbackBuffer(w, h, srcFormat, destFormat, pixelStride, pixels);
759 	return true;
760 }
761 
CopyImageToMemorySync(VkImage image,int mipLevel,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)762 void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
763 	_dbg_assert_(insideFrame_);
764 
765 	EndCurRenderStep();
766 
767 	VKRStep *step = new VKRStep{ VKRStepType::READBACK_IMAGE };
768 	step->readback_image.image = image;
769 	step->readback_image.srcRect.offset = { x, y };
770 	step->readback_image.srcRect.extent = { (uint32_t)w, (uint32_t)h };
771 	step->readback_image.mipLevel = mipLevel;
772 	step->tag = tag;
773 	steps_.push_back(step);
774 
775 	FlushSync();
776 
777 	// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.
778 	queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels);
779 }
780 
InitBackbufferFramebuffers(int width,int height)781 bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
782 	VkResult res;
783 	// We share the same depth buffer but have multiple color buffers, see the loop below.
784 	VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
785 
786 	VLOG("InitFramebuffers: %dx%d", width, height);
787 	VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
788 	fb_info.renderPass = queueRunner_.GetBackbufferRenderPass();
789 	fb_info.attachmentCount = 2;
790 	fb_info.pAttachments = attachments;
791 	fb_info.width = width;
792 	fb_info.height = height;
793 	fb_info.layers = 1;
794 
795 	framebuffers_.resize(swapchainImageCount_);
796 
797 	for (uint32_t i = 0; i < swapchainImageCount_; i++) {
798 		attachments[0] = swapchainImages_[i].view;
799 		res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
800 		_dbg_assert_(res == VK_SUCCESS);
801 		if (res != VK_SUCCESS) {
802 			framebuffers_.clear();
803 			return false;
804 		}
805 	}
806 
807 	return true;
808 }
809 
InitDepthStencilBuffer(VkCommandBuffer cmd)810 bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) {
811 	VkResult res;
812 	bool pass;
813 
814 	const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
815 	int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
816 	VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
817 	image_info.imageType = VK_IMAGE_TYPE_2D;
818 	image_info.format = depth_format;
819 	image_info.extent.width = vulkan_->GetBackbufferWidth();
820 	image_info.extent.height = vulkan_->GetBackbufferHeight();
821 	image_info.extent.depth = 1;
822 	image_info.mipLevels = 1;
823 	image_info.arrayLayers = 1;
824 	image_info.samples = VK_SAMPLE_COUNT_1_BIT;
825 	image_info.queueFamilyIndexCount = 0;
826 	image_info.pQueueFamilyIndices = nullptr;
827 	image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
828 	image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
829 	image_info.flags = 0;
830 
831 	depth_.format = depth_format;
832 
833 	VkDevice device = vulkan_->GetDevice();
834 	res = vkCreateImage(device, &image_info, nullptr, &depth_.image);
835 	_dbg_assert_(res == VK_SUCCESS);
836 	if (res != VK_SUCCESS)
837 		return false;
838 
839 	vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
840 
841 	bool dedicatedAllocation = false;
842 	VkMemoryRequirements mem_reqs;
843 	vulkan_->GetImageMemoryRequirements(depth_.image, &mem_reqs, &dedicatedAllocation);
844 
845 	VkMemoryAllocateInfo mem_alloc = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
846 	mem_alloc.allocationSize = mem_reqs.size;
847 	mem_alloc.memoryTypeIndex = 0;
848 
849 	VkMemoryDedicatedAllocateInfoKHR dedicatedAllocateInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
850 	if (dedicatedAllocation) {
851 		dedicatedAllocateInfo.image = depth_.image;
852 		mem_alloc.pNext = &dedicatedAllocateInfo;
853 	}
854 
855 	// Use the memory properties to determine the type of memory required
856 	pass = vulkan_->MemoryTypeFromProperties(mem_reqs.memoryTypeBits,
857 		0, /* No requirements */
858 		&mem_alloc.memoryTypeIndex);
859 	_dbg_assert_(pass);
860 	if (!pass)
861 		return false;
862 
863 	res = vkAllocateMemory(device, &mem_alloc, NULL, &depth_.mem);
864 	_dbg_assert_(res == VK_SUCCESS);
865 	if (res != VK_SUCCESS)
866 		return false;
867 
868 	res = vkBindImageMemory(device, depth_.image, depth_.mem, 0);
869 	_dbg_assert_(res == VK_SUCCESS);
870 	if (res != VK_SUCCESS)
871 		return false;
872 
873 	TransitionImageLayout2(cmd, depth_.image, 0, 1,
874 		aspectMask,
875 		VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
876 		VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
877 		0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
878 
879 	VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
880 	depth_view_info.image = depth_.image;
881 	depth_view_info.format = depth_format;
882 	depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
883 	depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
884 	depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
885 	depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
886 	depth_view_info.subresourceRange.aspectMask = aspectMask;
887 	depth_view_info.subresourceRange.baseMipLevel = 0;
888 	depth_view_info.subresourceRange.levelCount = 1;
889 	depth_view_info.subresourceRange.baseArrayLayer = 0;
890 	depth_view_info.subresourceRange.layerCount = 1;
891 	depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
892 	depth_view_info.flags = 0;
893 
894 	res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
895 	_dbg_assert_(res == VK_SUCCESS);
896 	if (res != VK_SUCCESS)
897 		return false;
898 
899 	return true;
900 }
901 
RemoveDrawCommands(std::vector<VkRenderData> * cmds)902 static void RemoveDrawCommands(std::vector<VkRenderData> *cmds) {
903 	// Here we remove any DRAW type commands when we hit a CLEAR.
904 	for (auto &c : *cmds) {
905 		if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {
906 			c.cmd = VKRRenderCommand::REMOVED;
907 		}
908 	}
909 }
910 
CleanupRenderCommands(std::vector<VkRenderData> * cmds)911 static void CleanupRenderCommands(std::vector<VkRenderData> *cmds) {
912 	size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];
913 	memset(lastCommand, -1, sizeof(lastCommand));
914 
915 	// Find any duplicate state commands (likely from RemoveDrawCommands.)
916 	for (size_t i = 0; i < cmds->size(); ++i) {
917 		auto &c = cmds->at(i);
918 		auto &lastOfCmd = lastCommand[(uint8_t)c.cmd];
919 
920 		switch (c.cmd) {
921 		case VKRRenderCommand::REMOVED:
922 			continue;
923 
924 		case VKRRenderCommand::BIND_PIPELINE:
925 		case VKRRenderCommand::VIEWPORT:
926 		case VKRRenderCommand::SCISSOR:
927 		case VKRRenderCommand::BLEND:
928 		case VKRRenderCommand::STENCIL:
929 			if (lastOfCmd != -1) {
930 				cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;
931 			}
932 			break;
933 
934 		case VKRRenderCommand::PUSH_CONSTANTS:
935 			// TODO: For now, we have to keep this one (it has an offset.)  Still update lastCommand.
936 			break;
937 
938 		case VKRRenderCommand::CLEAR:
939 			// Ignore, doesn't participate in state.
940 			continue;
941 
942 		case VKRRenderCommand::DRAW_INDEXED:
943 		case VKRRenderCommand::DRAW:
944 		default:
945 			// Boundary - must keep state before this.
946 			memset(lastCommand, -1, sizeof(lastCommand));
947 			continue;
948 		}
949 
950 		lastOfCmd = i;
951 	}
952 
953 	// At this point, anything in lastCommand can be cleaned up too.
954 	// Note that it's safe to remove the last unused PUSH_CONSTANTS here.
955 	for (size_t i = 0; i < ARRAY_SIZE(lastCommand); ++i) {
956 		auto &lastOfCmd = lastCommand[i];
957 		if (lastOfCmd != -1) {
958 			cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;
959 		}
960 	}
961 }
962 
Clear(uint32_t clearColor,float clearZ,int clearStencil,int clearMask)963 void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {
964 	_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
965 	if (!clearMask)
966 		return;
967 	// If this is the first drawing command or clears everything, merge it into the pass.
968 	int allAspects = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
969 	if (curRenderStep_->render.numDraws == 0 || clearMask == allAspects) {
970 		curRenderStep_->render.clearColor = clearColor;
971 		curRenderStep_->render.clearDepth = clearZ;
972 		curRenderStep_->render.clearStencil = clearStencil;
973 		curRenderStep_->render.color = (clearMask & VK_IMAGE_ASPECT_COLOR_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
974 		curRenderStep_->render.depth = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
975 		curRenderStep_->render.stencil = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
976 
977 		// In case there were commands already.
978 		curRenderStep_->render.numDraws = 0;
979 		RemoveDrawCommands(&curRenderStep_->commands);
980 	} else {
981 		VkRenderData data{ VKRRenderCommand::CLEAR };
982 		data.clear.clearColor = clearColor;
983 		data.clear.clearZ = clearZ;
984 		data.clear.clearStencil = clearStencil;
985 		data.clear.clearMask = clearMask;
986 		curRenderStep_->commands.push_back(data);
987 	}
988 
989 	curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
990 }
991 
CopyFramebuffer(VKRFramebuffer * src,VkRect2D srcRect,VKRFramebuffer * dst,VkOffset2D dstPos,VkImageAspectFlags aspectMask,const char * tag)992 void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, VkImageAspectFlags aspectMask, const char *tag) {
993 	_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);
994 	_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);
995 	_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);
996 	_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);
997 
998 	_dbg_assert_msg_(srcRect.extent.width > 0, "copy srcwidth == 0");
999 	_dbg_assert_msg_(srcRect.extent.height > 0, "copy srcheight == 0");
1000 
1001 	_dbg_assert_msg_(dstPos.x >= 0, "dstPos offset x (%d) < 0", dstPos.x);
1002 	_dbg_assert_msg_(dstPos.y >= 0, "dstPos offset y (%d) < 0", dstPos.y);
1003 	_dbg_assert_msg_(dstPos.x + srcRect.extent.width <= (uint32_t)dst->width, "dstPos + extent x > width");
1004 	_dbg_assert_msg_(dstPos.y + srcRect.extent.height <= (uint32_t)dst->height, "dstPos + extent y > height");
1005 
1006 	for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1007 		if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
1008 			if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1009 				if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1010 					steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1011 				}
1012 			}
1013 			if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1014 				if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1015 					steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1016 				}
1017 			}
1018 			steps_[i]->render.numReads++;
1019 			break;
1020 		}
1021 	}
1022 	for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1023 		if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == dst) {
1024 			if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1025 				if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1026 					steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1027 				}
1028 			}
1029 			if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1030 				if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1031 					steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1032 				}
1033 			}
1034 			break;
1035 		}
1036 	}
1037 
1038 	EndCurRenderStep();
1039 
1040 	VKRStep *step = new VKRStep{ VKRStepType::COPY };
1041 
1042 	step->copy.aspectMask = aspectMask;
1043 	step->copy.src = src;
1044 	step->copy.srcRect = srcRect;
1045 	step->copy.dst = dst;
1046 	step->copy.dstPos = dstPos;
1047 	step->dependencies.insert(src);
1048 	step->tag = tag;
1049 	bool fillsDst = dst && srcRect.offset.x == 0 && srcRect.offset.y == 0 && srcRect.extent.width == dst->width && srcRect.extent.height == dst->height;
1050 	if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
1051 		step->dependencies.insert(dst);
1052 
1053 	std::unique_lock<std::mutex> lock(mutex_);
1054 	steps_.push_back(step);
1055 }
1056 
BlitFramebuffer(VKRFramebuffer * src,VkRect2D srcRect,VKRFramebuffer * dst,VkRect2D dstRect,VkImageAspectFlags aspectMask,VkFilter filter,const char * tag)1057 void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, VkImageAspectFlags aspectMask, VkFilter filter, const char *tag) {
1058 	_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);
1059 	_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);
1060 	_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);
1061 	_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);
1062 
1063 	_dbg_assert_msg_(srcRect.extent.width > 0, "blit srcwidth == 0");
1064 	_dbg_assert_msg_(srcRect.extent.height > 0, "blit srcheight == 0");
1065 
1066 	_dbg_assert_msg_(dstRect.offset.x >= 0, "dstrect offset x < 0");
1067 	_dbg_assert_msg_(dstRect.offset.y >= 0, "dstrect offset y < 0");
1068 	_dbg_assert_msg_(dstRect.offset.x + dstRect.extent.width <= (uint32_t)dst->width, "dstrect offset x + extent > width");
1069 	_dbg_assert_msg_(dstRect.offset.y + dstRect.extent.height <= (uint32_t)dst->height, "dstrect offset y + extent > height");
1070 
1071 	_dbg_assert_msg_(dstRect.extent.width > 0, "blit dstwidth == 0");
1072 	_dbg_assert_msg_(dstRect.extent.height > 0, "blit dstheight == 0");
1073 
1074 	// TODO: Seem to be missing final layouts here like in Copy...
1075 
1076 	for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1077 		if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
1078 			steps_[i]->render.numReads++;
1079 			break;
1080 		}
1081 	}
1082 
1083 	EndCurRenderStep();
1084 
1085 	VKRStep *step = new VKRStep{ VKRStepType::BLIT };
1086 
1087 	step->blit.aspectMask = aspectMask;
1088 	step->blit.src = src;
1089 	step->blit.srcRect = srcRect;
1090 	step->blit.dst = dst;
1091 	step->blit.dstRect = dstRect;
1092 	step->blit.filter = filter;
1093 	step->dependencies.insert(src);
1094 	step->tag = tag;
1095 	bool fillsDst = dst && dstRect.offset.x == 0 && dstRect.offset.y == 0 && dstRect.extent.width == dst->width && dstRect.extent.height == dst->height;
1096 	if (!fillsDst)
1097 		step->dependencies.insert(dst);
1098 
1099 	std::unique_lock<std::mutex> lock(mutex_);
1100 	steps_.push_back(step);
1101 }
1102 
BindFramebufferAsTexture(VKRFramebuffer * fb,int binding,VkImageAspectFlags aspectBit,int attachment)1103 VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int attachment) {
1104 	_dbg_assert_(curRenderStep_ != nullptr);
1105 	// Mark the dependency, check for required transitions, and return the image.
1106 
1107 	// Optimization: If possible, use final*Layout to put the texture into the correct layout "early".
1108 	for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1109 		if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {
1110 			if (aspectBit == VK_IMAGE_ASPECT_COLOR_BIT) {
1111 				// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.
1112 				if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1113 					steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1114 				}
1115 				// If we find some other layout, a copy after this is likely involved. It's fine though,
1116 				// we'll just transition it right as we need it and lose a tiny optimization.
1117 			} else if (aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT) {
1118 				// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.
1119 				if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1120 					steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1121 				}
1122 			}  // We don't (yet?) support texturing from stencil images.
1123 			steps_[i]->render.numReads++;
1124 			break;
1125 		}
1126 	}
1127 
1128 	// Track dependencies fully.
1129 	curRenderStep_->dependencies.insert(fb);
1130 
1131 	if (!curRenderStep_->preTransitions.empty() &&
1132 		curRenderStep_->preTransitions.back().fb == fb &&
1133 		curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
1134 		// We're done.
1135 		return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.depthSampleView;
1136 	} else {
1137 		curRenderStep_->preTransitions.push_back({ aspectBit, fb, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL });
1138 		return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.depthSampleView;
1139 	}
1140 }
1141 
Finish()1142 void VulkanRenderManager::Finish() {
1143 	EndCurRenderStep();
1144 
1145 	// Let's do just a bit of cleanup on render commands now.
1146 	for (auto &step : steps_) {
1147 		if (step->stepType == VKRStepType::RENDER) {
1148 			CleanupRenderCommands(&step->commands);
1149 		}
1150 	}
1151 
1152 	int curFrame = vulkan_->GetCurFrame();
1153 	FrameData &frameData = frameData_[curFrame];
1154 	if (!useThread_) {
1155 		frameData.steps = std::move(steps_);
1156 		steps_.clear();
1157 		frameData.type = VKRRunType::END;
1158 		Run(curFrame);
1159 	} else {
1160 		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
1161 		VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
1162 		frameData.steps = std::move(steps_);
1163 		steps_.clear();
1164 		frameData.readyForRun = true;
1165 		frameData.type = VKRRunType::END;
1166 		frameData.pull_condVar.notify_all();
1167 	}
1168 	vulkan_->EndFrame();
1169 
1170 	insideFrame_ = false;
1171 }
1172 
Wipe()1173 void VulkanRenderManager::Wipe() {
1174 	for (auto step : steps_) {
1175 		delete step;
1176 	}
1177 	steps_.clear();
1178 }
1179 
1180 // Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
1181 // or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating
1182 // the backbuffer image acquisition.
BeginSubmitFrame(int frame)1183 void VulkanRenderManager::BeginSubmitFrame(int frame) {
1184 	FrameData &frameData = frameData_[frame];
1185 	if (!frameData.hasBegun) {
1186 		// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
1187 		// Now, I wonder if we should do this early in the frame or late? Right now we do it early, which should be fine.
1188 		VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage);
1189 		if (res == VK_SUBOPTIMAL_KHR) {
1190 			// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
1191 			WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
1192 		} else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
1193 			WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
1194 			frameData.skipSwap = true;
1195 		} else {
1196 			_assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
1197 		}
1198 
1199 		VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
1200 		begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
1201 		res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
1202 
1203 		_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));
1204 
1205 		queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
1206 
1207 		frameData.hasBegun = true;
1208 	}
1209 }
1210 
Submit(int frame,bool triggerFrameFence)1211 void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) {
1212 	FrameData &frameData = frameData_[frame];
1213 	if (frameData.hasInitCommands) {
1214 		if (frameData.profilingEnabled_ && triggerFrameFence) {
1215 			// Pre-allocated query ID 1.
1216 			vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
1217 		}
1218 		VkResult res = vkEndCommandBuffer(frameData.initCmd);
1219 		_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
1220 	}
1221 
1222 	VkResult res = vkEndCommandBuffer(frameData.mainCmd);
1223 	_assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
1224 
1225 	VkCommandBuffer cmdBufs[2];
1226 	int numCmdBufs = 0;
1227 	if (frameData.hasInitCommands) {
1228 		cmdBufs[numCmdBufs++] = frameData.initCmd;
1229 		if (splitSubmit_) {
1230 			// Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf.
1231 			VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
1232 			submit_info.commandBufferCount = (uint32_t)numCmdBufs;
1233 			submit_info.pCommandBuffers = cmdBufs;
1234 			res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE);
1235 			if (res == VK_ERROR_DEVICE_LOST) {
1236 				_assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan");
1237 			} else {
1238 				_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res));
1239 			}
1240 			numCmdBufs = 0;
1241 		}
1242 	}
1243 	cmdBufs[numCmdBufs++] = frameData.mainCmd;
1244 
1245 	VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
1246 	VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
1247 	if (triggerFrameFence && !frameData.skipSwap) {
1248 		submit_info.waitSemaphoreCount = 1;
1249 		submit_info.pWaitSemaphores = &acquireSemaphore_;
1250 		submit_info.pWaitDstStageMask = waitStage;
1251 	}
1252 	submit_info.commandBufferCount = (uint32_t)numCmdBufs;
1253 	submit_info.pCommandBuffers = cmdBufs;
1254 	if (triggerFrameFence && !frameData.skipSwap) {
1255 		submit_info.signalSemaphoreCount = 1;
1256 		submit_info.pSignalSemaphores = &renderingCompleteSemaphore_;
1257 	}
1258 	res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence);
1259 	if (res == VK_ERROR_DEVICE_LOST) {
1260 		_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
1261 	} else {
1262 		_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res));
1263 	}
1264 
1265 	// When !triggerFence, we notify after syncing with Vulkan.
1266 	if (useThread_ && triggerFrameFence) {
1267 		VLOG("PULL: Frame %d.readyForFence = true", frame);
1268 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
1269 		frameData.readyForFence = true;
1270 		frameData.push_condVar.notify_all();
1271 	}
1272 
1273 	frameData.hasInitCommands = false;
1274 }
1275 
EndSubmitFrame(int frame)1276 void VulkanRenderManager::EndSubmitFrame(int frame) {
1277 	FrameData &frameData = frameData_[frame];
1278 	frameData.hasBegun = false;
1279 
1280 	Submit(frame, true);
1281 
1282 	if (!frameData.skipSwap) {
1283 		VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
1284 		VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
1285 		present.swapchainCount = 1;
1286 		present.pSwapchains = &swapchain;
1287 		present.pImageIndices = &frameData.curSwapchainImage;
1288 		present.pWaitSemaphores = &renderingCompleteSemaphore_;
1289 		present.waitSemaphoreCount = 1;
1290 
1291 		VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
1292 		if (res == VK_ERROR_OUT_OF_DATE_KHR) {
1293 			// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
1294 			// Do the increment.
1295 			outOfDateFrames_++;
1296 		} else if (res == VK_SUBOPTIMAL_KHR) {
1297 			outOfDateFrames_++;
1298 		} else if (res != VK_SUCCESS) {
1299 			_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));
1300 		} else {
1301 			// Success
1302 			outOfDateFrames_ = 0;
1303 		}
1304 	} else {
1305 		// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.
1306 		outOfDateFrames_++;
1307 		frameData.skipSwap = false;
1308 	}
1309 }
1310 
Run(int frame)1311 void VulkanRenderManager::Run(int frame) {
1312 	BeginSubmitFrame(frame);
1313 
1314 	FrameData &frameData = frameData_[frame];
1315 	auto &stepsOnThread = frameData_[frame].steps;
1316 	VkCommandBuffer cmd = frameData.mainCmd;
1317 	queueRunner_.PreprocessSteps(stepsOnThread);
1318 	//queueRunner_.LogSteps(stepsOnThread, false);
1319 	queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
1320 	stepsOnThread.clear();
1321 
1322 	switch (frameData.type) {
1323 	case VKRRunType::END:
1324 		EndSubmitFrame(frame);
1325 		break;
1326 
1327 	case VKRRunType::SYNC:
1328 		EndSyncFrame(frame);
1329 		break;
1330 
1331 	default:
1332 		_dbg_assert_(false);
1333 	}
1334 
1335 	VLOG("PULL: Finished running frame %d", frame);
1336 }
1337 
EndSyncFrame(int frame)1338 void VulkanRenderManager::EndSyncFrame(int frame) {
1339 	FrameData &frameData = frameData_[frame];
1340 
1341 	frameData.readbackFenceUsed = true;
1342 
1343 	// The submit will trigger the readbackFence.
1344 	Submit(frame, false);
1345 
1346 	// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
1347 	vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
1348 	vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
1349 
1350 	// At this point we can resume filling the command buffers for the current frame since
1351 	// we know the device is idle - and thus all previously enqueued command buffers have been processed.
1352 	// No need to switch to the next frame number.
1353 	VkCommandBufferBeginInfo begin{
1354 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1355 		nullptr,
1356 		VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
1357 	};
1358 	VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
1359 	_assert_(res == VK_SUCCESS);
1360 
1361 	if (useThread_) {
1362 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
1363 		frameData.readyForFence = true;
1364 		frameData.push_condVar.notify_all();
1365 	}
1366 }
1367 
FlushSync()1368 void VulkanRenderManager::FlushSync() {
1369 	renderStepOffset_ += (int)steps_.size();
1370 
1371 	int curFrame = vulkan_->GetCurFrame();
1372 	FrameData &frameData = frameData_[curFrame];
1373 	if (!useThread_) {
1374 		frameData.steps = std::move(steps_);
1375 		steps_.clear();
1376 		frameData.type = VKRRunType::SYNC;
1377 		Run(curFrame);
1378 	} else {
1379 		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
1380 		VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
1381 		frameData.steps = std::move(steps_);
1382 		steps_.clear();
1383 		frameData.readyForRun = true;
1384 		_dbg_assert_(!frameData.readyForFence);
1385 		frameData.type = VKRRunType::SYNC;
1386 		frameData.pull_condVar.notify_all();
1387 	}
1388 
1389 	if (useThread_) {
1390 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
1391 		// Wait for the flush to be hit, since we're syncing.
1392 		while (!frameData.readyForFence) {
1393 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
1394 			frameData.push_condVar.wait(lock);
1395 		}
1396 		frameData.readyForFence = false;
1397 	}
1398 }
1399