1 #include <algorithm>
2 #include <cstdint>
3
4 #include <sstream>
5
6 #include "Common/Log.h"
7 #include "Common/StringUtils.h"
8
9 #include "Common/GPU/Vulkan/VulkanContext.h"
10 #include "Common/GPU/Vulkan/VulkanRenderManager.h"
11 #include "Common/Thread/ThreadUtil.h"
12
13 #if 0 // def _DEBUG
14 #define VLOG(...) INFO_LOG(G3D, __VA_ARGS__)
15 #else
16 #define VLOG(...)
17 #endif
18
19 #ifndef UINT64_MAX
20 #define UINT64_MAX 0xFFFFFFFFFFFFFFFFULL
21 #endif
22
23 using namespace PPSSPP_VK;
24
VKRFramebuffer(VulkanContext * vk,VkCommandBuffer initCmd,VkRenderPass renderPass,int _width,int _height,const char * tag)25 VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VkRenderPass renderPass, int _width, int _height, const char *tag) : vulkan_(vk) {
26 width = _width;
27 height = _height;
28
29 CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
30 CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
31
32 VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
33 VkImageView views[2]{};
34
35 fbci.renderPass = renderPass;
36 fbci.attachmentCount = 2;
37 fbci.pAttachments = views;
38 views[0] = color.imageView;
39 views[1] = depth.imageView;
40 fbci.width = width;
41 fbci.height = height;
42 fbci.layers = 1;
43
44 VkResult res = vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf);
45 _assert_(res == VK_SUCCESS);
46
47 if (tag && vk->Extensions().EXT_debug_utils) {
48 vk->SetDebugName(color.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_color_%s", tag).c_str());
49 vk->SetDebugName(depth.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_depth_%s", tag).c_str());
50 vk->SetDebugName(framebuf, VK_OBJECT_TYPE_FRAMEBUFFER, StringFromFormat("fb_%s", tag).c_str());
51 this->tag = tag;
52 }
53 }
54
~VKRFramebuffer()55 VKRFramebuffer::~VKRFramebuffer() {
56 if (color.image)
57 vulkan_->Delete().QueueDeleteImage(color.image);
58 if (depth.image)
59 vulkan_->Delete().QueueDeleteImage(depth.image);
60 if (color.imageView)
61 vulkan_->Delete().QueueDeleteImageView(color.imageView);
62 if (depth.imageView)
63 vulkan_->Delete().QueueDeleteImageView(depth.imageView);
64 if (depth.depthSampleView)
65 vulkan_->Delete().QueueDeleteImageView(depth.depthSampleView);
66 if (color.memory)
67 vulkan_->Delete().QueueDeleteDeviceMemory(color.memory);
68 if (depth.memory)
69 vulkan_->Delete().QueueDeleteDeviceMemory(depth.memory);
70 if (framebuf)
71 vulkan_->Delete().QueueDeleteFramebuffer(framebuf);
72 }
73
CreateImage(VulkanContext * vulkan,VkCommandBuffer cmd,VKRImage & img,int width,int height,VkFormat format,VkImageLayout initialLayout,bool color,const char * tag)74 void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
75 VkImageCreateInfo ici{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
76 ici.arrayLayers = 1;
77 ici.mipLevels = 1;
78 ici.extent.width = width;
79 ici.extent.height = height;
80 ici.extent.depth = 1;
81 ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
82 ici.imageType = VK_IMAGE_TYPE_2D;
83 ici.samples = VK_SAMPLE_COUNT_1_BIT;
84 ici.tiling = VK_IMAGE_TILING_OPTIMAL;
85 ici.format = format;
86 // Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
87 ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
88 if (color) {
89 ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
90 } else {
91 ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
92 }
93
94 VkResult res = vkCreateImage(vulkan->GetDevice(), &ici, nullptr, &img.image);
95 _dbg_assert_(res == VK_SUCCESS);
96
97 VkMemoryRequirements memreq;
98 bool dedicatedAllocation = false;
99 vulkan->GetImageMemoryRequirements(img.image, &memreq, &dedicatedAllocation);
100
101 VkMemoryAllocateInfo alloc{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
102 alloc.allocationSize = memreq.size;
103 VkMemoryDedicatedAllocateInfoKHR dedicatedAllocateInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
104 if (dedicatedAllocation) {
105 dedicatedAllocateInfo.image = img.image;
106 alloc.pNext = &dedicatedAllocateInfo;
107 }
108
109 vulkan->MemoryTypeFromProperties(memreq.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &alloc.memoryTypeIndex);
110
111 res = vkAllocateMemory(vulkan->GetDevice(), &alloc, nullptr, &img.memory);
112 _dbg_assert_(res == VK_SUCCESS);
113
114 res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0);
115 _dbg_assert_(res == VK_SUCCESS);
116
117 VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
118
119 VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
120 ivci.components = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY };
121 ivci.format = ici.format;
122 ivci.image = img.image;
123 ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
124 ivci.subresourceRange.aspectMask = aspects;
125 ivci.subresourceRange.layerCount = 1;
126 ivci.subresourceRange.levelCount = 1;
127 res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView);
128 _dbg_assert_(res == VK_SUCCESS);
129
130 // Separate view for texture sampling that only exposes depth.
131 if (!color) {
132 ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
133 res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.depthSampleView);
134 _dbg_assert_(res == VK_SUCCESS);
135 } else {
136 img.depthSampleView = VK_NULL_HANDLE;
137 }
138
139 VkPipelineStageFlags dstStage;
140 VkAccessFlagBits dstAccessMask;
141 switch (initialLayout) {
142 case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
143 dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
144 dstStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
145 break;
146 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
147 dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
148 dstStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
149 break;
150 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
151 dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
152 dstStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
153 break;
154 default:
155 Crash();
156 return;
157 }
158
159 TransitionImageLayout2(cmd, img.image, 0, 1, aspects,
160 VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
161 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, dstStage,
162 0, dstAccessMask);
163 img.layout = initialLayout;
164
165 img.format = format;
166 img.tag = tag ? tag : "N/A";
167 }
168
VulkanRenderManager(VulkanContext * vulkan)169 VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
170 VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
171 semaphoreCreateInfo.flags = 0;
172 VkResult res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore_);
173 _dbg_assert_(res == VK_SUCCESS);
174 res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &renderingCompleteSemaphore_);
175 _dbg_assert_(res == VK_SUCCESS);
176
177 inflightFramesAtStart_ = vulkan_->GetInflightFrames();
178 for (int i = 0; i < inflightFramesAtStart_; i++) {
179 VkCommandPoolCreateInfo cmd_pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
180 cmd_pool_info.queueFamilyIndex = vulkan_->GetGraphicsQueueFamilyIndex();
181 cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
182 VkResult res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolInit);
183 _dbg_assert_(res == VK_SUCCESS);
184 res = vkCreateCommandPool(vulkan_->GetDevice(), &cmd_pool_info, nullptr, &frameData_[i].cmdPoolMain);
185 _dbg_assert_(res == VK_SUCCESS);
186
187 VkCommandBufferAllocateInfo cmd_alloc = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
188 cmd_alloc.commandPool = frameData_[i].cmdPoolInit;
189 cmd_alloc.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
190 cmd_alloc.commandBufferCount = 1;
191
192 res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].initCmd);
193 _dbg_assert_(res == VK_SUCCESS);
194 cmd_alloc.commandPool = frameData_[i].cmdPoolMain;
195 res = vkAllocateCommandBuffers(vulkan_->GetDevice(), &cmd_alloc, &frameData_[i].mainCmd);
196 _dbg_assert_(res == VK_SUCCESS);
197
198 // Creating the frame fence with true so they can be instantly waited on the first frame
199 frameData_[i].fence = vulkan_->CreateFence(true);
200
201 // This fence one is used for synchronizing readbacks. Does not need preinitialization.
202 frameData_[i].readbackFence = vulkan_->CreateFence(false);
203
204 VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
205 query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
206 query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
207 res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
208 }
209
210 queueRunner_.CreateDeviceObjects();
211
212 // AMD hack for issue #10097 (older drivers only.)
213 const auto &props = vulkan_->GetPhysicalDeviceProperties().properties;
214 if (props.vendorID == VULKAN_VENDOR_AMD && props.apiVersion < VK_API_VERSION_1_1) {
215 useThread_ = false;
216 }
217 }
218
CreateBackbuffers()219 bool VulkanRenderManager::CreateBackbuffers() {
220 if (!vulkan_->GetSwapchain()) {
221 ERROR_LOG(G3D, "No swapchain - can't create backbuffers");
222 return false;
223 }
224 VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
225 _dbg_assert_(res == VK_SUCCESS);
226
227 VkImage *swapchainImages = new VkImage[swapchainImageCount_];
228 res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
229 if (res != VK_SUCCESS) {
230 ERROR_LOG(G3D, "vkGetSwapchainImagesKHR failed");
231 delete[] swapchainImages;
232 return false;
233 }
234
235 VkCommandBuffer cmdInit = GetInitCmd();
236
237 for (uint32_t i = 0; i < swapchainImageCount_; i++) {
238 SwapchainImageData sc_buffer{};
239 sc_buffer.image = swapchainImages[i];
240
241 VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
242 color_image_view.format = vulkan_->GetSwapchainFormat();
243 color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
244 color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
245 color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
246 color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
247 color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
248 color_image_view.subresourceRange.baseMipLevel = 0;
249 color_image_view.subresourceRange.levelCount = 1;
250 color_image_view.subresourceRange.baseArrayLayer = 0;
251 color_image_view.subresourceRange.layerCount = 1;
252 color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
253 color_image_view.flags = 0;
254 color_image_view.image = sc_buffer.image;
255
256 // We leave the images as UNDEFINED, there's no need to pre-transition them as
257 // the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
258 // Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
259
260 res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
261 swapchainImages_.push_back(sc_buffer);
262 _dbg_assert_(res == VK_SUCCESS);
263 }
264 delete[] swapchainImages;
265
266 // Must be before InitBackbufferRenderPass.
267 if (InitDepthStencilBuffer(cmdInit)) {
268 InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
269 }
270 curWidthRaw_ = -1;
271 curHeightRaw_ = -1;
272
273 if (HasBackbuffers()) {
274 VLOG("Backbuffers Created");
275 }
276
277 if (newInflightFrames_ != -1) {
278 INFO_LOG(G3D, "Updating inflight frames to %d", newInflightFrames_);
279 vulkan_->UpdateInflightFrames(newInflightFrames_);
280 newInflightFrames_ = -1;
281 }
282
283 outOfDateFrames_ = 0;
284
285 // Start the thread.
286 if (useThread_ && HasBackbuffers()) {
287 run_ = true;
288 // Won't necessarily be 0.
289 threadInitFrame_ = vulkan_->GetCurFrame();
290 INFO_LOG(G3D, "Starting Vulkan submission thread (threadInitFrame_ = %d)", vulkan_->GetCurFrame());
291 thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
292 }
293 return true;
294 }
295
StopThread()296 void VulkanRenderManager::StopThread() {
297 if (useThread_ && run_) {
298 run_ = false;
299 // Stop the thread.
300 for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
301 auto &frameData = frameData_[i];
302 {
303 std::unique_lock<std::mutex> lock(frameData.push_mutex);
304 frameData.push_condVar.notify_all();
305 }
306 {
307 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
308 frameData.pull_condVar.notify_all();
309 }
310 // Zero the queries so we don't try to pull them later.
311 frameData.profile.timestampDescriptions.clear();
312 }
313 thread_.join();
314 INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
315
316 // Eat whatever has been queued up for this frame if anything.
317 Wipe();
318
319 // Wait for any fences to finish and be resignaled, so we don't have sync issues.
320 // Also clean out any queued data, which might refer to things that might not be valid
321 // when we restart...
322 for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
323 auto &frameData = frameData_[i];
324 _assert_(!frameData.readyForRun);
325 _assert_(frameData.steps.empty());
326 if (frameData.hasInitCommands) {
327 // Clear 'em out. This can happen on restart sometimes.
328 vkEndCommandBuffer(frameData.initCmd);
329 frameData.hasInitCommands = false;
330 }
331 frameData.readyForRun = false;
332 for (size_t i = 0; i < frameData.steps.size(); i++) {
333 delete frameData.steps[i];
334 }
335 frameData.steps.clear();
336
337 std::unique_lock<std::mutex> lock(frameData.push_mutex);
338 while (!frameData.readyForFence) {
339 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
340 frameData.push_condVar.wait(lock);
341 }
342 }
343 } else {
344 INFO_LOG(G3D, "Vulkan submission thread was already stopped.");
345 }
346 }
347
DestroyBackbuffers()348 void VulkanRenderManager::DestroyBackbuffers() {
349 StopThread();
350 vulkan_->WaitUntilQueueIdle();
351
352 for (auto &image : swapchainImages_) {
353 vulkan_->Delete().QueueDeleteImageView(image.view);
354 }
355 swapchainImages_.clear();
356
357 if (depth_.view) {
358 vulkan_->Delete().QueueDeleteImageView(depth_.view);
359 }
360 if (depth_.image) {
361 vulkan_->Delete().QueueDeleteImage(depth_.image);
362 }
363 if (depth_.mem) {
364 vulkan_->Delete().QueueDeleteDeviceMemory(depth_.mem);
365 }
366 depth_ = {};
367 for (uint32_t i = 0; i < framebuffers_.size(); i++) {
368 _dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
369 vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
370 }
371 framebuffers_.clear();
372
373 INFO_LOG(G3D, "Backbuffers destroyed");
374 }
375
~VulkanRenderManager()376 VulkanRenderManager::~VulkanRenderManager() {
377 INFO_LOG(G3D, "VulkanRenderManager destructor");
378 StopThread();
379 vulkan_->WaitUntilQueueIdle();
380
381 VkDevice device = vulkan_->GetDevice();
382 vkDestroySemaphore(device, acquireSemaphore_, nullptr);
383 vkDestroySemaphore(device, renderingCompleteSemaphore_, nullptr);
384 for (int i = 0; i < inflightFramesAtStart_; i++) {
385 vkFreeCommandBuffers(device, frameData_[i].cmdPoolInit, 1, &frameData_[i].initCmd);
386 vkFreeCommandBuffers(device, frameData_[i].cmdPoolMain, 1, &frameData_[i].mainCmd);
387 vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
388 vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
389 vkDestroyFence(device, frameData_[i].fence, nullptr);
390 vkDestroyFence(device, frameData_[i].readbackFence, nullptr);
391 vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
392 }
393 queueRunner_.DestroyDeviceObjects();
394 }
395
ThreadFunc()396 void VulkanRenderManager::ThreadFunc() {
397 SetCurrentThreadName("RenderMan");
398 int threadFrame = threadInitFrame_;
399 bool nextFrame = false;
400 bool firstFrame = true;
401 while (true) {
402 {
403 if (nextFrame) {
404 threadFrame++;
405 if (threadFrame >= vulkan_->GetInflightFrames())
406 threadFrame = 0;
407 }
408 FrameData &frameData = frameData_[threadFrame];
409 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
410 while (!frameData.readyForRun && run_) {
411 VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame);
412 frameData.pull_condVar.wait(lock);
413 }
414 if (!frameData.readyForRun && !run_) {
415 // This means we're out of frames to render and run_ is false, so bail.
416 break;
417 }
418 VLOG("PULL: frame[%d].readyForRun = false", threadFrame);
419 frameData.readyForRun = false;
420 // Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
421 // but that created a race condition where frames could end up not finished properly on resize etc.
422
423 // Only increment next time if we're done.
424 nextFrame = frameData.type == VKRRunType::END;
425 _dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
426 }
427 VLOG("PULL: Running frame %d", threadFrame);
428 if (firstFrame) {
429 INFO_LOG(G3D, "Running first frame (%d)", threadFrame);
430 firstFrame = false;
431 }
432 Run(threadFrame);
433 VLOG("PULL: Finished frame %d", threadFrame);
434 }
435
436 // Wait for the device to be done with everything, before tearing stuff down.
437 vkDeviceWaitIdle(vulkan_->GetDevice());
438
439 VLOG("PULL: Quitting");
440 }
441
BeginFrame(bool enableProfiling)442 void VulkanRenderManager::BeginFrame(bool enableProfiling) {
443 VLOG("BeginFrame");
444 VkDevice device = vulkan_->GetDevice();
445
446 int curFrame = vulkan_->GetCurFrame();
447 FrameData &frameData = frameData_[curFrame];
448
449 // Make sure the very last command buffer from the frame before the previous has been fully executed.
450 if (useThread_) {
451 std::unique_lock<std::mutex> lock(frameData.push_mutex);
452 while (!frameData.readyForFence) {
453 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
454 frameData.push_condVar.wait(lock);
455 }
456 frameData.readyForFence = false;
457 }
458
459 VLOG("PUSH: Fencing %d", curFrame);
460
461 vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX);
462 vkResetFences(device, 1, &frameData.fence);
463
464 // Can't set this until after the fence.
465 frameData.profilingEnabled_ = enableProfiling;
466 frameData.readbackFenceUsed = false;
467
468 uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
469
470 if (frameData.profilingEnabled_) {
471 // Pull the profiling results from last time and produce a summary!
472 if (!frameData.profile.timestampDescriptions.empty()) {
473 int numQueries = (int)frameData.profile.timestampDescriptions.size();
474 VkResult res = vkGetQueryPoolResults(
475 vulkan_->GetDevice(),
476 frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),
477 VK_QUERY_RESULT_64_BIT);
478 if (res == VK_SUCCESS) {
479 double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
480 int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
481 uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);
482 std::stringstream str;
483
484 char line[256];
485 snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
486 str << line;
487 snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
488 str << line;
489 for (int i = 0; i < numQueries - 1; i++) {
490 uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
491 double milliseconds = (double)diff * timestampConversionFactor;
492 snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
493 str << line;
494 }
495 frameData.profile.profileSummary = str.str();
496 } else {
497 frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";
498 }
499 } else {
500 frameData.profile.profileSummary = "(no GPU profile data collected)";
501 }
502 }
503
504 // Must be after the fence - this performs deletes.
505 VLOG("PUSH: BeginFrame %d", curFrame);
506 if (!run_) {
507 WARN_LOG(G3D, "BeginFrame while !run_!");
508 }
509 vulkan_->BeginFrame();
510
511 insideFrame_ = true;
512 renderStepOffset_ = 0;
513
514 frameData.profile.timestampDescriptions.clear();
515 if (frameData.profilingEnabled_) {
516 // For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,
517 // unless we want to limit ourselves to only measure the main cmd buffer.
518 // Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.
519 // Reserve the first two queries for initCmd.
520 frameData.profile.timestampDescriptions.push_back("initCmd Begin");
521 frameData.profile.timestampDescriptions.push_back("initCmd");
522 VkCommandBuffer initCmd = GetInitCmd();
523 vkCmdResetQueryPool(initCmd, frameData.profile.queryPool, 0, MAX_TIMESTAMP_QUERIES);
524 vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.profile.queryPool, 0);
525 }
526 }
527
GetInitCmd()528 VkCommandBuffer VulkanRenderManager::GetInitCmd() {
529 int curFrame = vulkan_->GetCurFrame();
530 FrameData &frameData = frameData_[curFrame];
531 if (!frameData.hasInitCommands) {
532 VkCommandBufferBeginInfo begin = {
533 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
534 nullptr,
535 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
536 };
537 VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
538 if (res != VK_SUCCESS) {
539 return VK_NULL_HANDLE;
540 }
541 frameData.hasInitCommands = true;
542 }
543 return frameData_[curFrame].initCmd;
544 }
545
EndCurRenderStep()546 void VulkanRenderManager::EndCurRenderStep() {
547 // Save the accumulated pipeline flags so we can use that to configure the render pass.
548 // We'll often be able to avoid loading/saving the depth/stencil buffer.
549 if (curRenderStep_) {
550 curRenderStep_->render.pipelineFlags = curPipelineFlags_;
551 // We don't do this optimization for very small targets, probably not worth it.
552 if (!curRenderArea_.Empty() && (curWidth_ > 32 && curHeight_ > 32)) {
553 curRenderStep_->render.renderArea = curRenderArea_.ToVkRect2D();
554 } else {
555 curRenderStep_->render.renderArea.offset = {};
556 curRenderStep_->render.renderArea.extent = { (uint32_t)curWidth_, (uint32_t)curHeight_ };
557 }
558 curRenderArea_.Reset();
559
560 // We no longer have a current render step.
561 curRenderStep_ = nullptr;
562 curPipelineFlags_ = 0;
563 }
564 }
565
BindFramebufferAsRenderTarget(VKRFramebuffer * fb,VKRRenderPassAction color,VKRRenderPassAction depth,VKRRenderPassAction stencil,uint32_t clearColor,float clearDepth,uint8_t clearStencil,const char * tag)566 void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassAction color, VKRRenderPassAction depth, VKRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
567 _dbg_assert_(insideFrame_);
568 // Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.
569 if (!steps_.empty() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
570 u32 clearMask = 0;
571 if (color == VKRRenderPassAction::CLEAR) {
572 clearMask |= VK_IMAGE_ASPECT_COLOR_BIT;
573 }
574 if (depth == VKRRenderPassAction::CLEAR) {
575 clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
576 }
577 if (stencil == VKRRenderPassAction::CLEAR) {
578 clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
579 }
580
581 // If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.
582 // If there's no clear needed, let's also do that.
583 //
584 // However, if we do need a clear and there are no commands in the previous pass,
585 // we want the queuerunner to have the opportunity to merge, so we'll go ahead and make a new renderpass.
586 if (clearMask == 0 || !steps_.back()->commands.empty()) {
587 curRenderStep_ = steps_.back();
588 curStepHasViewport_ = false;
589 curStepHasScissor_ = false;
590 for (const auto &c : steps_.back()->commands) {
591 if (c.cmd == VKRRenderCommand::VIEWPORT) {
592 curStepHasViewport_ = true;
593 } else if (c.cmd == VKRRenderCommand::SCISSOR) {
594 curStepHasScissor_ = true;
595 }
596 }
597 if (clearMask != 0) {
598 VkRenderData data{ VKRRenderCommand::CLEAR };
599 data.clear.clearColor = clearColor;
600 data.clear.clearZ = clearDepth;
601 data.clear.clearStencil = clearStencil;
602 data.clear.clearMask = clearMask;
603 curRenderStep_->commands.push_back(data);
604 curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
605 }
606 return;
607 }
608 }
609
610 // More redundant bind elimination.
611 if (curRenderStep_) {
612 if (curRenderStep_->commands.empty()) {
613 if (curRenderStep_->render.color != VKRRenderPassAction::CLEAR && curRenderStep_->render.depth != VKRRenderPassAction::CLEAR && curRenderStep_->render.stencil != VKRRenderPassAction::CLEAR) {
614 // Can trivially kill the last empty render step.
615 _dbg_assert_(steps_.back() == curRenderStep_);
616 delete steps_.back();
617 steps_.pop_back();
618 curRenderStep_ = nullptr;
619 }
620 VLOG("Empty render step. Usually happens after uploading pixels..");
621 }
622
623 EndCurRenderStep();
624 }
625
626 // Older Mali drivers have issues with depth and stencil don't match load/clear/etc.
627 // TODO: Determine which versions and do this only where necessary.
628 u32 lateClearMask = 0;
629 if (depth != stencil && vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_ARM) {
630 if (stencil == VKRRenderPassAction::DONT_CARE) {
631 stencil = depth;
632 } else if (depth == VKRRenderPassAction::DONT_CARE) {
633 depth = stencil;
634 } else if (stencil == VKRRenderPassAction::CLEAR) {
635 depth = stencil;
636 lateClearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
637 } else if (depth == VKRRenderPassAction::CLEAR) {
638 stencil = depth;
639 lateClearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
640 }
641 }
642
643 VKRStep *step = new VKRStep{ VKRStepType::RENDER };
644 step->render.framebuffer = fb;
645 step->render.color = color;
646 step->render.depth = depth;
647 step->render.stencil = stencil;
648 step->render.clearColor = clearColor;
649 step->render.clearDepth = clearDepth;
650 step->render.clearStencil = clearStencil;
651 step->render.numDraws = 0;
652 step->render.numReads = 0;
653 step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
654 step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
655 step->tag = tag;
656 steps_.push_back(step);
657
658 if (fb) {
659 // If there's a KEEP, we naturally read from the framebuffer.
660 if (color == VKRRenderPassAction::KEEP || depth == VKRRenderPassAction::KEEP || stencil == VKRRenderPassAction::KEEP) {
661 step->dependencies.insert(fb);
662 }
663 }
664
665 curRenderStep_ = step;
666 curStepHasViewport_ = false;
667 curStepHasScissor_ = false;
668 if (fb) {
669 curWidthRaw_ = fb->width;
670 curHeightRaw_ = fb->height;
671 curWidth_ = fb->width;
672 curHeight_ = fb->height;
673 } else {
674 curWidthRaw_ = vulkan_->GetBackbufferWidth();
675 curHeightRaw_ = vulkan_->GetBackbufferHeight();
676 if (g_display_rotation == DisplayRotation::ROTATE_90 || g_display_rotation == DisplayRotation::ROTATE_270) {
677 curWidth_ = curHeightRaw_;
678 curHeight_ = curWidthRaw_;
679 } else {
680 curWidth_ = curWidthRaw_;
681 curHeight_ = curHeightRaw_;
682 }
683 }
684
685 if (color == VKRRenderPassAction::CLEAR || depth == VKRRenderPassAction::CLEAR || stencil == VKRRenderPassAction::CLEAR) {
686 curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
687 }
688
689 // See above - we add a clear afterward if only one side for depth/stencil CLEAR/KEEP.
690 if (lateClearMask != 0) {
691 VkRenderData data{ VKRRenderCommand::CLEAR };
692 data.clear.clearColor = clearColor;
693 data.clear.clearZ = clearDepth;
694 data.clear.clearStencil = clearStencil;
695 data.clear.clearMask = lateClearMask;
696 curRenderStep_->commands.push_back(data);
697 }
698 }
699
CopyFramebufferToMemorySync(VKRFramebuffer * src,VkImageAspectFlags aspectBits,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)700 bool VulkanRenderManager::CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
701 _dbg_assert_(insideFrame_);
702 for (int i = (int)steps_.size() - 1; i >= 0; i--) {
703 if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
704 steps_[i]->render.numReads++;
705 break;
706 }
707 }
708
709 EndCurRenderStep();
710
711 VKRStep *step = new VKRStep{ VKRStepType::READBACK };
712 step->readback.aspectMask = aspectBits;
713 step->readback.src = src;
714 step->readback.srcRect.offset = { x, y };
715 step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };
716 step->dependencies.insert(src);
717 step->tag = tag;
718 steps_.push_back(step);
719
720 FlushSync();
721
722 Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED;
723 if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) {
724 if (src) {
725 switch (src->color.format) {
726 case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;
727 default: _assert_(false);
728 }
729 } else {
730 // Backbuffer.
731 if (!(vulkan_->GetSurfaceCapabilities().supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) {
732 ERROR_LOG(G3D, "Copying from backbuffer not supported, can't take screenshots");
733 return false;
734 }
735 switch (vulkan_->GetSwapchainFormat()) {
736 case VK_FORMAT_B8G8R8A8_UNORM: srcFormat = Draw::DataFormat::B8G8R8A8_UNORM; break;
737 case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;
738 // NOTE: If you add supported formats here, make sure to also support them in VulkanQueueRunner::CopyReadbackBuffer.
739 default:
740 ERROR_LOG(G3D, "Unsupported backbuffer format for screenshots");
741 return false;
742 }
743 }
744 } else if (aspectBits & VK_IMAGE_ASPECT_STENCIL_BIT) {
745 // Copies from stencil are always S8.
746 srcFormat = Draw::DataFormat::S8;
747 } else if (aspectBits & VK_IMAGE_ASPECT_DEPTH_BIT) {
748 switch (src->depth.format) {
749 case VK_FORMAT_D24_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D24_S8; break;
750 case VK_FORMAT_D32_SFLOAT_S8_UINT: srcFormat = Draw::DataFormat::D32F; break;
751 case VK_FORMAT_D16_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D16; break;
752 default: _assert_(false);
753 }
754 } else {
755 _assert_(false);
756 }
757 // Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.
758 queueRunner_.CopyReadbackBuffer(w, h, srcFormat, destFormat, pixelStride, pixels);
759 return true;
760 }
761
CopyImageToMemorySync(VkImage image,int mipLevel,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)762 void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
763 _dbg_assert_(insideFrame_);
764
765 EndCurRenderStep();
766
767 VKRStep *step = new VKRStep{ VKRStepType::READBACK_IMAGE };
768 step->readback_image.image = image;
769 step->readback_image.srcRect.offset = { x, y };
770 step->readback_image.srcRect.extent = { (uint32_t)w, (uint32_t)h };
771 step->readback_image.mipLevel = mipLevel;
772 step->tag = tag;
773 steps_.push_back(step);
774
775 FlushSync();
776
777 // Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.
778 queueRunner_.CopyReadbackBuffer(w, h, destFormat, destFormat, pixelStride, pixels);
779 }
780
InitBackbufferFramebuffers(int width,int height)781 bool VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
782 VkResult res;
783 // We share the same depth buffer but have multiple color buffers, see the loop below.
784 VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
785
786 VLOG("InitFramebuffers: %dx%d", width, height);
787 VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
788 fb_info.renderPass = queueRunner_.GetBackbufferRenderPass();
789 fb_info.attachmentCount = 2;
790 fb_info.pAttachments = attachments;
791 fb_info.width = width;
792 fb_info.height = height;
793 fb_info.layers = 1;
794
795 framebuffers_.resize(swapchainImageCount_);
796
797 for (uint32_t i = 0; i < swapchainImageCount_; i++) {
798 attachments[0] = swapchainImages_[i].view;
799 res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
800 _dbg_assert_(res == VK_SUCCESS);
801 if (res != VK_SUCCESS) {
802 framebuffers_.clear();
803 return false;
804 }
805 }
806
807 return true;
808 }
809
InitDepthStencilBuffer(VkCommandBuffer cmd)810 bool VulkanRenderManager::InitDepthStencilBuffer(VkCommandBuffer cmd) {
811 VkResult res;
812 bool pass;
813
814 const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
815 int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
816 VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
817 image_info.imageType = VK_IMAGE_TYPE_2D;
818 image_info.format = depth_format;
819 image_info.extent.width = vulkan_->GetBackbufferWidth();
820 image_info.extent.height = vulkan_->GetBackbufferHeight();
821 image_info.extent.depth = 1;
822 image_info.mipLevels = 1;
823 image_info.arrayLayers = 1;
824 image_info.samples = VK_SAMPLE_COUNT_1_BIT;
825 image_info.queueFamilyIndexCount = 0;
826 image_info.pQueueFamilyIndices = nullptr;
827 image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
828 image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
829 image_info.flags = 0;
830
831 depth_.format = depth_format;
832
833 VkDevice device = vulkan_->GetDevice();
834 res = vkCreateImage(device, &image_info, nullptr, &depth_.image);
835 _dbg_assert_(res == VK_SUCCESS);
836 if (res != VK_SUCCESS)
837 return false;
838
839 vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
840
841 bool dedicatedAllocation = false;
842 VkMemoryRequirements mem_reqs;
843 vulkan_->GetImageMemoryRequirements(depth_.image, &mem_reqs, &dedicatedAllocation);
844
845 VkMemoryAllocateInfo mem_alloc = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
846 mem_alloc.allocationSize = mem_reqs.size;
847 mem_alloc.memoryTypeIndex = 0;
848
849 VkMemoryDedicatedAllocateInfoKHR dedicatedAllocateInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
850 if (dedicatedAllocation) {
851 dedicatedAllocateInfo.image = depth_.image;
852 mem_alloc.pNext = &dedicatedAllocateInfo;
853 }
854
855 // Use the memory properties to determine the type of memory required
856 pass = vulkan_->MemoryTypeFromProperties(mem_reqs.memoryTypeBits,
857 0, /* No requirements */
858 &mem_alloc.memoryTypeIndex);
859 _dbg_assert_(pass);
860 if (!pass)
861 return false;
862
863 res = vkAllocateMemory(device, &mem_alloc, NULL, &depth_.mem);
864 _dbg_assert_(res == VK_SUCCESS);
865 if (res != VK_SUCCESS)
866 return false;
867
868 res = vkBindImageMemory(device, depth_.image, depth_.mem, 0);
869 _dbg_assert_(res == VK_SUCCESS);
870 if (res != VK_SUCCESS)
871 return false;
872
873 TransitionImageLayout2(cmd, depth_.image, 0, 1,
874 aspectMask,
875 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
876 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
877 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
878
879 VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
880 depth_view_info.image = depth_.image;
881 depth_view_info.format = depth_format;
882 depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
883 depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
884 depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
885 depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
886 depth_view_info.subresourceRange.aspectMask = aspectMask;
887 depth_view_info.subresourceRange.baseMipLevel = 0;
888 depth_view_info.subresourceRange.levelCount = 1;
889 depth_view_info.subresourceRange.baseArrayLayer = 0;
890 depth_view_info.subresourceRange.layerCount = 1;
891 depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
892 depth_view_info.flags = 0;
893
894 res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
895 _dbg_assert_(res == VK_SUCCESS);
896 if (res != VK_SUCCESS)
897 return false;
898
899 return true;
900 }
901
RemoveDrawCommands(std::vector<VkRenderData> * cmds)902 static void RemoveDrawCommands(std::vector<VkRenderData> *cmds) {
903 // Here we remove any DRAW type commands when we hit a CLEAR.
904 for (auto &c : *cmds) {
905 if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {
906 c.cmd = VKRRenderCommand::REMOVED;
907 }
908 }
909 }
910
CleanupRenderCommands(std::vector<VkRenderData> * cmds)911 static void CleanupRenderCommands(std::vector<VkRenderData> *cmds) {
912 size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];
913 memset(lastCommand, -1, sizeof(lastCommand));
914
915 // Find any duplicate state commands (likely from RemoveDrawCommands.)
916 for (size_t i = 0; i < cmds->size(); ++i) {
917 auto &c = cmds->at(i);
918 auto &lastOfCmd = lastCommand[(uint8_t)c.cmd];
919
920 switch (c.cmd) {
921 case VKRRenderCommand::REMOVED:
922 continue;
923
924 case VKRRenderCommand::BIND_PIPELINE:
925 case VKRRenderCommand::VIEWPORT:
926 case VKRRenderCommand::SCISSOR:
927 case VKRRenderCommand::BLEND:
928 case VKRRenderCommand::STENCIL:
929 if (lastOfCmd != -1) {
930 cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;
931 }
932 break;
933
934 case VKRRenderCommand::PUSH_CONSTANTS:
935 // TODO: For now, we have to keep this one (it has an offset.) Still update lastCommand.
936 break;
937
938 case VKRRenderCommand::CLEAR:
939 // Ignore, doesn't participate in state.
940 continue;
941
942 case VKRRenderCommand::DRAW_INDEXED:
943 case VKRRenderCommand::DRAW:
944 default:
945 // Boundary - must keep state before this.
946 memset(lastCommand, -1, sizeof(lastCommand));
947 continue;
948 }
949
950 lastOfCmd = i;
951 }
952
953 // At this point, anything in lastCommand can be cleaned up too.
954 // Note that it's safe to remove the last unused PUSH_CONSTANTS here.
955 for (size_t i = 0; i < ARRAY_SIZE(lastCommand); ++i) {
956 auto &lastOfCmd = lastCommand[i];
957 if (lastOfCmd != -1) {
958 cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;
959 }
960 }
961 }
962
Clear(uint32_t clearColor,float clearZ,int clearStencil,int clearMask)963 void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {
964 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
965 if (!clearMask)
966 return;
967 // If this is the first drawing command or clears everything, merge it into the pass.
968 int allAspects = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
969 if (curRenderStep_->render.numDraws == 0 || clearMask == allAspects) {
970 curRenderStep_->render.clearColor = clearColor;
971 curRenderStep_->render.clearDepth = clearZ;
972 curRenderStep_->render.clearStencil = clearStencil;
973 curRenderStep_->render.color = (clearMask & VK_IMAGE_ASPECT_COLOR_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
974 curRenderStep_->render.depth = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
975 curRenderStep_->render.stencil = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassAction::CLEAR : VKRRenderPassAction::KEEP;
976
977 // In case there were commands already.
978 curRenderStep_->render.numDraws = 0;
979 RemoveDrawCommands(&curRenderStep_->commands);
980 } else {
981 VkRenderData data{ VKRRenderCommand::CLEAR };
982 data.clear.clearColor = clearColor;
983 data.clear.clearZ = clearZ;
984 data.clear.clearStencil = clearStencil;
985 data.clear.clearMask = clearMask;
986 curRenderStep_->commands.push_back(data);
987 }
988
989 curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);
990 }
991
CopyFramebuffer(VKRFramebuffer * src,VkRect2D srcRect,VKRFramebuffer * dst,VkOffset2D dstPos,VkImageAspectFlags aspectMask,const char * tag)992 void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, VkImageAspectFlags aspectMask, const char *tag) {
993 _dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);
994 _dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);
995 _dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);
996 _dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);
997
998 _dbg_assert_msg_(srcRect.extent.width > 0, "copy srcwidth == 0");
999 _dbg_assert_msg_(srcRect.extent.height > 0, "copy srcheight == 0");
1000
1001 _dbg_assert_msg_(dstPos.x >= 0, "dstPos offset x (%d) < 0", dstPos.x);
1002 _dbg_assert_msg_(dstPos.y >= 0, "dstPos offset y (%d) < 0", dstPos.y);
1003 _dbg_assert_msg_(dstPos.x + srcRect.extent.width <= (uint32_t)dst->width, "dstPos + extent x > width");
1004 _dbg_assert_msg_(dstPos.y + srcRect.extent.height <= (uint32_t)dst->height, "dstPos + extent y > height");
1005
1006 for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1007 if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
1008 if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1009 if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1010 steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1011 }
1012 }
1013 if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1014 if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1015 steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1016 }
1017 }
1018 steps_[i]->render.numReads++;
1019 break;
1020 }
1021 }
1022 for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1023 if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == dst) {
1024 if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1025 if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1026 steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1027 }
1028 }
1029 if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1030 if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1031 steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1032 }
1033 }
1034 break;
1035 }
1036 }
1037
1038 EndCurRenderStep();
1039
1040 VKRStep *step = new VKRStep{ VKRStepType::COPY };
1041
1042 step->copy.aspectMask = aspectMask;
1043 step->copy.src = src;
1044 step->copy.srcRect = srcRect;
1045 step->copy.dst = dst;
1046 step->copy.dstPos = dstPos;
1047 step->dependencies.insert(src);
1048 step->tag = tag;
1049 bool fillsDst = dst && srcRect.offset.x == 0 && srcRect.offset.y == 0 && srcRect.extent.width == dst->width && srcRect.extent.height == dst->height;
1050 if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
1051 step->dependencies.insert(dst);
1052
1053 std::unique_lock<std::mutex> lock(mutex_);
1054 steps_.push_back(step);
1055 }
1056
BlitFramebuffer(VKRFramebuffer * src,VkRect2D srcRect,VKRFramebuffer * dst,VkRect2D dstRect,VkImageAspectFlags aspectMask,VkFilter filter,const char * tag)1057 void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, VkImageAspectFlags aspectMask, VkFilter filter, const char *tag) {
1058 _dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);
1059 _dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);
1060 _dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);
1061 _dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);
1062
1063 _dbg_assert_msg_(srcRect.extent.width > 0, "blit srcwidth == 0");
1064 _dbg_assert_msg_(srcRect.extent.height > 0, "blit srcheight == 0");
1065
1066 _dbg_assert_msg_(dstRect.offset.x >= 0, "dstrect offset x < 0");
1067 _dbg_assert_msg_(dstRect.offset.y >= 0, "dstrect offset y < 0");
1068 _dbg_assert_msg_(dstRect.offset.x + dstRect.extent.width <= (uint32_t)dst->width, "dstrect offset x + extent > width");
1069 _dbg_assert_msg_(dstRect.offset.y + dstRect.extent.height <= (uint32_t)dst->height, "dstrect offset y + extent > height");
1070
1071 _dbg_assert_msg_(dstRect.extent.width > 0, "blit dstwidth == 0");
1072 _dbg_assert_msg_(dstRect.extent.height > 0, "blit dstheight == 0");
1073
1074 // TODO: Seem to be missing final layouts here like in Copy...
1075
1076 for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1077 if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {
1078 steps_[i]->render.numReads++;
1079 break;
1080 }
1081 }
1082
1083 EndCurRenderStep();
1084
1085 VKRStep *step = new VKRStep{ VKRStepType::BLIT };
1086
1087 step->blit.aspectMask = aspectMask;
1088 step->blit.src = src;
1089 step->blit.srcRect = srcRect;
1090 step->blit.dst = dst;
1091 step->blit.dstRect = dstRect;
1092 step->blit.filter = filter;
1093 step->dependencies.insert(src);
1094 step->tag = tag;
1095 bool fillsDst = dst && dstRect.offset.x == 0 && dstRect.offset.y == 0 && dstRect.extent.width == dst->width && dstRect.extent.height == dst->height;
1096 if (!fillsDst)
1097 step->dependencies.insert(dst);
1098
1099 std::unique_lock<std::mutex> lock(mutex_);
1100 steps_.push_back(step);
1101 }
1102
BindFramebufferAsTexture(VKRFramebuffer * fb,int binding,VkImageAspectFlags aspectBit,int attachment)1103 VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int attachment) {
1104 _dbg_assert_(curRenderStep_ != nullptr);
1105 // Mark the dependency, check for required transitions, and return the image.
1106
1107 // Optimization: If possible, use final*Layout to put the texture into the correct layout "early".
1108 for (int i = (int)steps_.size() - 1; i >= 0; i--) {
1109 if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {
1110 if (aspectBit == VK_IMAGE_ASPECT_COLOR_BIT) {
1111 // If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.
1112 if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1113 steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1114 }
1115 // If we find some other layout, a copy after this is likely involved. It's fine though,
1116 // we'll just transition it right as we need it and lose a tiny optimization.
1117 } else if (aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT) {
1118 // If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.
1119 if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
1120 steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1121 }
1122 } // We don't (yet?) support texturing from stencil images.
1123 steps_[i]->render.numReads++;
1124 break;
1125 }
1126 }
1127
1128 // Track dependencies fully.
1129 curRenderStep_->dependencies.insert(fb);
1130
1131 if (!curRenderStep_->preTransitions.empty() &&
1132 curRenderStep_->preTransitions.back().fb == fb &&
1133 curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
1134 // We're done.
1135 return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.depthSampleView;
1136 } else {
1137 curRenderStep_->preTransitions.push_back({ aspectBit, fb, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL });
1138 return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.depthSampleView;
1139 }
1140 }
1141
Finish()1142 void VulkanRenderManager::Finish() {
1143 EndCurRenderStep();
1144
1145 // Let's do just a bit of cleanup on render commands now.
1146 for (auto &step : steps_) {
1147 if (step->stepType == VKRStepType::RENDER) {
1148 CleanupRenderCommands(&step->commands);
1149 }
1150 }
1151
1152 int curFrame = vulkan_->GetCurFrame();
1153 FrameData &frameData = frameData_[curFrame];
1154 if (!useThread_) {
1155 frameData.steps = std::move(steps_);
1156 steps_.clear();
1157 frameData.type = VKRRunType::END;
1158 Run(curFrame);
1159 } else {
1160 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
1161 VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
1162 frameData.steps = std::move(steps_);
1163 steps_.clear();
1164 frameData.readyForRun = true;
1165 frameData.type = VKRRunType::END;
1166 frameData.pull_condVar.notify_all();
1167 }
1168 vulkan_->EndFrame();
1169
1170 insideFrame_ = false;
1171 }
1172
Wipe()1173 void VulkanRenderManager::Wipe() {
1174 for (auto step : steps_) {
1175 delete step;
1176 }
1177 steps_.clear();
1178 }
1179
1180 // Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
1181 // or stop it in the middle for a synchronous readback, then start over again mostly normally but without repeating
1182 // the backbuffer image acquisition.
BeginSubmitFrame(int frame)1183 void VulkanRenderManager::BeginSubmitFrame(int frame) {
1184 FrameData &frameData = frameData_[frame];
1185 if (!frameData.hasBegun) {
1186 // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
1187 // Now, I wonder if we should do this early in the frame or late? Right now we do it early, which should be fine.
1188 VkResult res = vkAcquireNextImageKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), UINT64_MAX, acquireSemaphore_, (VkFence)VK_NULL_HANDLE, &frameData.curSwapchainImage);
1189 if (res == VK_SUBOPTIMAL_KHR) {
1190 // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
1191 WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
1192 } else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
1193 WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
1194 frameData.skipSwap = true;
1195 } else {
1196 _assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
1197 }
1198
1199 VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
1200 begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
1201 res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
1202
1203 _assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));
1204
1205 queueRunner_.SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
1206
1207 frameData.hasBegun = true;
1208 }
1209 }
1210
Submit(int frame,bool triggerFrameFence)1211 void VulkanRenderManager::Submit(int frame, bool triggerFrameFence) {
1212 FrameData &frameData = frameData_[frame];
1213 if (frameData.hasInitCommands) {
1214 if (frameData.profilingEnabled_ && triggerFrameFence) {
1215 // Pre-allocated query ID 1.
1216 vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
1217 }
1218 VkResult res = vkEndCommandBuffer(frameData.initCmd);
1219 _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
1220 }
1221
1222 VkResult res = vkEndCommandBuffer(frameData.mainCmd);
1223 _assert_msg_(res == VK_SUCCESS, "vkEndCommandBuffer failed (main)! result=%s", VulkanResultToString(res));
1224
1225 VkCommandBuffer cmdBufs[2];
1226 int numCmdBufs = 0;
1227 if (frameData.hasInitCommands) {
1228 cmdBufs[numCmdBufs++] = frameData.initCmd;
1229 if (splitSubmit_) {
1230 // Send the init commands off separately. Used this once to confirm that the cause of a device loss was in the init cmdbuf.
1231 VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
1232 submit_info.commandBufferCount = (uint32_t)numCmdBufs;
1233 submit_info.pCommandBuffers = cmdBufs;
1234 res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE);
1235 if (res == VK_ERROR_DEVICE_LOST) {
1236 _assert_msg_(false, "Lost the Vulkan device in split submit! If this happens again, switch Graphics Backend away from Vulkan");
1237 } else {
1238 _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (init)! result=%s", VulkanResultToString(res));
1239 }
1240 numCmdBufs = 0;
1241 }
1242 }
1243 cmdBufs[numCmdBufs++] = frameData.mainCmd;
1244
1245 VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
1246 VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
1247 if (triggerFrameFence && !frameData.skipSwap) {
1248 submit_info.waitSemaphoreCount = 1;
1249 submit_info.pWaitSemaphores = &acquireSemaphore_;
1250 submit_info.pWaitDstStageMask = waitStage;
1251 }
1252 submit_info.commandBufferCount = (uint32_t)numCmdBufs;
1253 submit_info.pCommandBuffers = cmdBufs;
1254 if (triggerFrameFence && !frameData.skipSwap) {
1255 submit_info.signalSemaphoreCount = 1;
1256 submit_info.pSignalSemaphores = &renderingCompleteSemaphore_;
1257 }
1258 res = vkQueueSubmit(vulkan_->GetGraphicsQueue(), 1, &submit_info, triggerFrameFence ? frameData.fence : frameData.readbackFence);
1259 if (res == VK_ERROR_DEVICE_LOST) {
1260 _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
1261 } else {
1262 _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main, split=%d)! result=%s", (int)splitSubmit_, VulkanResultToString(res));
1263 }
1264
1265 // When !triggerFence, we notify after syncing with Vulkan.
1266 if (useThread_ && triggerFrameFence) {
1267 VLOG("PULL: Frame %d.readyForFence = true", frame);
1268 std::unique_lock<std::mutex> lock(frameData.push_mutex);
1269 frameData.readyForFence = true;
1270 frameData.push_condVar.notify_all();
1271 }
1272
1273 frameData.hasInitCommands = false;
1274 }
1275
EndSubmitFrame(int frame)1276 void VulkanRenderManager::EndSubmitFrame(int frame) {
1277 FrameData &frameData = frameData_[frame];
1278 frameData.hasBegun = false;
1279
1280 Submit(frame, true);
1281
1282 if (!frameData.skipSwap) {
1283 VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
1284 VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
1285 present.swapchainCount = 1;
1286 present.pSwapchains = &swapchain;
1287 present.pImageIndices = &frameData.curSwapchainImage;
1288 present.pWaitSemaphores = &renderingCompleteSemaphore_;
1289 present.waitSemaphoreCount = 1;
1290
1291 VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
1292 if (res == VK_ERROR_OUT_OF_DATE_KHR) {
1293 // We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
1294 // Do the increment.
1295 outOfDateFrames_++;
1296 } else if (res == VK_SUBOPTIMAL_KHR) {
1297 outOfDateFrames_++;
1298 } else if (res != VK_SUCCESS) {
1299 _assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));
1300 } else {
1301 // Success
1302 outOfDateFrames_ = 0;
1303 }
1304 } else {
1305 // We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.
1306 outOfDateFrames_++;
1307 frameData.skipSwap = false;
1308 }
1309 }
1310
Run(int frame)1311 void VulkanRenderManager::Run(int frame) {
1312 BeginSubmitFrame(frame);
1313
1314 FrameData &frameData = frameData_[frame];
1315 auto &stepsOnThread = frameData_[frame].steps;
1316 VkCommandBuffer cmd = frameData.mainCmd;
1317 queueRunner_.PreprocessSteps(stepsOnThread);
1318 //queueRunner_.LogSteps(stepsOnThread, false);
1319 queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
1320 stepsOnThread.clear();
1321
1322 switch (frameData.type) {
1323 case VKRRunType::END:
1324 EndSubmitFrame(frame);
1325 break;
1326
1327 case VKRRunType::SYNC:
1328 EndSyncFrame(frame);
1329 break;
1330
1331 default:
1332 _dbg_assert_(false);
1333 }
1334
1335 VLOG("PULL: Finished running frame %d", frame);
1336 }
1337
EndSyncFrame(int frame)1338 void VulkanRenderManager::EndSyncFrame(int frame) {
1339 FrameData &frameData = frameData_[frame];
1340
1341 frameData.readbackFenceUsed = true;
1342
1343 // The submit will trigger the readbackFence.
1344 Submit(frame, false);
1345
1346 // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
1347 vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
1348 vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
1349
1350 // At this point we can resume filling the command buffers for the current frame since
1351 // we know the device is idle - and thus all previously enqueued command buffers have been processed.
1352 // No need to switch to the next frame number.
1353 VkCommandBufferBeginInfo begin{
1354 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1355 nullptr,
1356 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
1357 };
1358 VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
1359 _assert_(res == VK_SUCCESS);
1360
1361 if (useThread_) {
1362 std::unique_lock<std::mutex> lock(frameData.push_mutex);
1363 frameData.readyForFence = true;
1364 frameData.push_condVar.notify_all();
1365 }
1366 }
1367
FlushSync()1368 void VulkanRenderManager::FlushSync() {
1369 renderStepOffset_ += (int)steps_.size();
1370
1371 int curFrame = vulkan_->GetCurFrame();
1372 FrameData &frameData = frameData_[curFrame];
1373 if (!useThread_) {
1374 frameData.steps = std::move(steps_);
1375 steps_.clear();
1376 frameData.type = VKRRunType::SYNC;
1377 Run(curFrame);
1378 } else {
1379 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
1380 VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
1381 frameData.steps = std::move(steps_);
1382 steps_.clear();
1383 frameData.readyForRun = true;
1384 _dbg_assert_(!frameData.readyForFence);
1385 frameData.type = VKRRunType::SYNC;
1386 frameData.pull_condVar.notify_all();
1387 }
1388
1389 if (useThread_) {
1390 std::unique_lock<std::mutex> lock(frameData.push_mutex);
1391 // Wait for the flush to be hit, since we're syncing.
1392 while (!frameData.readyForFence) {
1393 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
1394 frameData.push_condVar.wait(lock);
1395 }
1396 frameData.readyForFence = false;
1397 }
1398 }
1399