1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19
20 #include "Common/Data/Convert/SmallDataConvert.h"
21 #include "Common/Profiler/Profiler.h"
22 #include "Common/GPU/Vulkan/VulkanRenderManager.h"
23
24 #include "Common/Log.h"
25 #include "Common/MemoryUtil.h"
26 #include "Common/TimeUtil.h"
27 #include "Core/MemMap.h"
28 #include "Core/System.h"
29 #include "Core/Reporting.h"
30 #include "Core/Config.h"
31 #include "Core/CoreTiming.h"
32
33 #include "GPU/Math3D.h"
34 #include "GPU/GPUState.h"
35 #include "GPU/ge_constants.h"
36
37 #include "Common/GPU/Vulkan/VulkanContext.h"
38 #include "Common/GPU/Vulkan/VulkanMemory.h"
39
40 #include "GPU/Common/SplineCommon.h"
41 #include "GPU/Common/TransformCommon.h"
42 #include "GPU/Common/VertexDecoderCommon.h"
43 #include "GPU/Common/SoftwareTransformCommon.h"
44 #include "GPU/Common/DrawEngineCommon.h"
45 #include "GPU/Debugger/Debugger.h"
46 #include "GPU/Vulkan/DrawEngineVulkan.h"
47 #include "GPU/Vulkan/TextureCacheVulkan.h"
48 #include "GPU/Vulkan/ShaderManagerVulkan.h"
49 #include "GPU/Vulkan/PipelineManagerVulkan.h"
50 #include "GPU/Vulkan/FramebufferManagerVulkan.h"
51 #include "GPU/Vulkan/GPU_Vulkan.h"
52
53 using namespace PPSSPP_VK;
54
55 enum {
56 VERTEX_CACHE_SIZE = 8192 * 1024
57 };
58
59 #define VERTEXCACHE_DECIMATION_INTERVAL 17
60 #define DESCRIPTORSET_DECIMATION_INTERVAL 1 // Temporarily cut to 1. Handle reuse breaks this when textures get deleted.
61
62 enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
63
64 enum {
65 DRAW_BINDING_TEXTURE = 0,
66 DRAW_BINDING_2ND_TEXTURE = 1,
67 DRAW_BINDING_DEPAL_TEXTURE = 2,
68 DRAW_BINDING_DYNUBO_BASE = 3,
69 DRAW_BINDING_DYNUBO_LIGHT = 4,
70 DRAW_BINDING_DYNUBO_BONE = 5,
71 DRAW_BINDING_TESS_STORAGE_BUF = 6,
72 DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
73 DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
74 };
75
76 enum {
77 TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex)
78 };
79
DrawEngineVulkan(VulkanContext * vulkan,Draw::DrawContext * draw)80 DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw)
81 : vulkan_(vulkan),
82 draw_(draw),
83 vai_(1024) {
84 decOptions_.expandAllWeightsToFloat = false;
85 decOptions_.expand8BitNormalsToFloat = false;
86
87 // Allocate nicely aligned memory. Maybe graphics drivers will appreciate it.
88 // All this is a LOT of memory, need to see if we can cut down somehow.
89 decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
90 decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
91
92 indexGen.Setup(decIndex);
93
94 InitDeviceObjects();
95 }
96
InitDeviceObjects()97 void DrawEngineVulkan::InitDeviceObjects() {
98 // All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
99 VkDescriptorSetLayoutBinding bindings[9]{};
100 bindings[0].descriptorCount = 1;
101 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
102 bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
103 bindings[0].binding = DRAW_BINDING_TEXTURE;
104 bindings[1].descriptorCount = 1;
105 bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
106 bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
107 bindings[1].binding = DRAW_BINDING_2ND_TEXTURE;
108 bindings[2].descriptorCount = 1;
109 bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; // sampler is ignored though.
110 bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
111 bindings[2].binding = DRAW_BINDING_DEPAL_TEXTURE;
112 bindings[3].descriptorCount = 1;
113 bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
114 bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
115 bindings[3].binding = DRAW_BINDING_DYNUBO_BASE;
116 bindings[4].descriptorCount = 1;
117 bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
118 bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
119 bindings[4].binding = DRAW_BINDING_DYNUBO_LIGHT;
120 bindings[5].descriptorCount = 1;
121 bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
122 bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
123 bindings[5].binding = DRAW_BINDING_DYNUBO_BONE;
124 // Used only for hardware tessellation.
125 bindings[6].descriptorCount = 1;
126 bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
127 bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
128 bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;
129 bindings[7].descriptorCount = 1;
130 bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
131 bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
132 bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
133 bindings[8].descriptorCount = 1;
134 bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
135 bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
136 bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
137
138 VkDevice device = vulkan_->GetDevice();
139
140 VkDescriptorSetLayoutCreateInfo dsl{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
141 dsl.bindingCount = ARRAY_SIZE(bindings);
142 dsl.pBindings = bindings;
143 VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
144 _dbg_assert_(VK_SUCCESS == res);
145
146 // We are going to use one-shot descriptors in the initial implementation. Might look into caching them
147 // if creating and updating them turns out to be expensive.
148 for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
149 // We now create descriptor pools on demand, so removed from here.
150 // Note that pushUBO is also used for tessellation data (search for SetPushBuffer), and to upload
151 // the null texture. This should be cleaned up...
152 frame_[i].pushUBO = new VulkanPushBuffer(vulkan_, 8 * 1024 * 1024, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
153 frame_[i].pushVertex = new VulkanPushBuffer(vulkan_, 2 * 1024 * 1024, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
154 frame_[i].pushIndex = new VulkanPushBuffer(vulkan_, 1 * 1024 * 1024, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
155
156 frame_[i].pushLocal = new VulkanPushBuffer(vulkan_, 1 * 1024 * 1024, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
157 }
158
159 VkPipelineLayoutCreateInfo pl{ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
160 pl.pPushConstantRanges = nullptr;
161 pl.pushConstantRangeCount = 0;
162 pl.setLayoutCount = 1;
163 pl.pSetLayouts = &descriptorSetLayout_;
164 pl.flags = 0;
165 res = vkCreatePipelineLayout(device, &pl, nullptr, &pipelineLayout_);
166 _dbg_assert_(VK_SUCCESS == res);
167
168 VkSamplerCreateInfo samp{ VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
169 samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
170 samp.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
171 samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
172 samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
173 samp.flags = 0;
174 samp.magFilter = VK_FILTER_NEAREST;
175 samp.minFilter = VK_FILTER_NEAREST;
176 res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_);
177 _dbg_assert_(VK_SUCCESS == res);
178 res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
179 _dbg_assert_(VK_SUCCESS == res);
180
181 vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
182
183 tessDataTransferVulkan = new TessellationDataTransferVulkan(vulkan_);
184 tessDataTransfer = tessDataTransferVulkan;
185 }
186
~DrawEngineVulkan()187 DrawEngineVulkan::~DrawEngineVulkan() {
188 FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
189 FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
190
191 DestroyDeviceObjects();
192 }
193
Destroy(VulkanContext * vulkan)194 void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) {
195 if (descPool != VK_NULL_HANDLE) {
196 vulkan->Delete().QueueDeleteDescriptorPool(descPool);
197 }
198
199 if (pushUBO) {
200 pushUBO->Destroy(vulkan);
201 delete pushUBO;
202 pushUBO = nullptr;
203 }
204 if (pushVertex) {
205 pushVertex->Destroy(vulkan);
206 delete pushVertex;
207 pushVertex = nullptr;
208 }
209 if (pushIndex) {
210 pushIndex->Destroy(vulkan);
211 delete pushIndex;
212 pushIndex = nullptr;
213 }
214 if (pushLocal) {
215 pushLocal->Destroy(vulkan);
216 delete pushLocal;
217 pushLocal = nullptr;
218 }
219 }
220
DestroyDeviceObjects()221 void DrawEngineVulkan::DestroyDeviceObjects() {
222 delete tessDataTransferVulkan;
223 tessDataTransfer = nullptr;
224 tessDataTransferVulkan = nullptr;
225
226 for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
227 frame_[i].Destroy(vulkan_);
228 }
229 if (samplerSecondary_ != VK_NULL_HANDLE)
230 vulkan_->Delete().QueueDeleteSampler(samplerSecondary_);
231 if (nullSampler_ != VK_NULL_HANDLE)
232 vulkan_->Delete().QueueDeleteSampler(nullSampler_);
233 if (pipelineLayout_ != VK_NULL_HANDLE)
234 vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_);
235 if (descriptorSetLayout_ != VK_NULL_HANDLE)
236 vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_);
237 if (vertexCache_) {
238 vertexCache_->Destroy(vulkan_);
239 delete vertexCache_;
240 vertexCache_ = nullptr;
241 }
242 // Need to clear this to get rid of all remaining references to the dead buffers.
243 vai_.Iterate([](uint32_t hash, VertexArrayInfoVulkan *vai) {
244 delete vai;
245 });
246 vai_.Clear();
247 }
248
DeviceLost()249 void DrawEngineVulkan::DeviceLost() {
250 DestroyDeviceObjects();
251 DirtyAllUBOs();
252 }
253
DeviceRestore(VulkanContext * vulkan,Draw::DrawContext * draw)254 void DrawEngineVulkan::DeviceRestore(VulkanContext *vulkan, Draw::DrawContext *draw) {
255 vulkan_ = vulkan;
256 draw_ = draw;
257
258 InitDeviceObjects();
259 }
260
BeginFrame()261 void DrawEngineVulkan::BeginFrame() {
262 lastPipeline_ = nullptr;
263
264 lastRenderStepId_ = -1;
265
266 int curFrame = vulkan_->GetCurFrame();
267 FrameData *frame = &frame_[curFrame];
268
269 // First reset all buffers, then begin. This is so that Reset can free memory and Begin can allocate it,
270 // if growing the buffer is needed. Doing it this way will reduce fragmentation if more than one buffer
271 // needs to grow in the same frame. The state where many buffers are reset can also be used to
272 // defragment memory.
273 frame->pushUBO->Reset();
274 frame->pushVertex->Reset();
275 frame->pushIndex->Reset();
276 frame->pushLocal->Reset();
277
278 frame->pushUBO->Begin(vulkan_);
279 frame->pushVertex->Begin(vulkan_);
280 frame->pushIndex->Begin(vulkan_);
281 frame->pushLocal->Begin(vulkan_);
282
283 // TODO: How can we make this nicer...
284 tessDataTransferVulkan->SetPushBuffer(frame->pushUBO);
285
286 DirtyAllUBOs();
287
288 // Wipe the vertex cache if it's grown too large.
289 if (vertexCache_->GetTotalSize() > VERTEX_CACHE_SIZE) {
290 vertexCache_->Destroy(vulkan_);
291 delete vertexCache_; // orphans the buffers, they'll get deleted once no longer used by an in-flight frame.
292 vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
293 vai_.Iterate([&](uint32_t hash, VertexArrayInfoVulkan *vai) {
294 delete vai;
295 });
296 vai_.Clear();
297 }
298
299 vertexCache_->BeginNoReset();
300
301 if (--descDecimationCounter_ <= 0) {
302 if (frame->descPool != VK_NULL_HANDLE)
303 vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
304 frame->descSets.Clear();
305 frame->descCount = 0;
306 descDecimationCounter_ = DESCRIPTORSET_DECIMATION_INTERVAL;
307 }
308
309 if (--decimationCounter_ <= 0) {
310 decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
311
312 const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
313 const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
314 int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
315 vai_.Iterate([&](uint32_t hash, VertexArrayInfoVulkan *vai) {
316 bool kill;
317 if (vai->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
318 // We limit killing unreliable so we don't rehash too often.
319 kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
320 } else {
321 kill = vai->lastFrame < threshold;
322 }
323 if (kill) {
324 // This is actually quite safe.
325 vai_.Remove(hash);
326 delete vai;
327 }
328 });
329 }
330 vai_.Maintain();
331 }
332
EndFrame()333 void DrawEngineVulkan::EndFrame() {
334 FrameData *frame = &frame_[vulkan_->GetCurFrame()];
335 stats_.pushUBOSpaceUsed = (int)frame->pushUBO->GetOffset();
336 stats_.pushVertexSpaceUsed = (int)frame->pushVertex->GetOffset();
337 stats_.pushIndexSpaceUsed = (int)frame->pushIndex->GetOffset();
338 frame->pushUBO->End();
339 frame->pushVertex->End();
340 frame->pushIndex->End();
341 frame->pushLocal->End();
342 vertexCache_->End();
343 }
344
DecodeVertsToPushBuffer(VulkanPushBuffer * push,uint32_t * bindOffset,VkBuffer * vkbuf)345 void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
346 u8 *dest = decoded;
347
348 // Figure out how much pushbuffer space we need to allocate.
349 if (push) {
350 int vertsToDecode = ComputeNumVertsToDecode();
351 dest = (u8 *)push->Push(vertsToDecode * dec_->GetDecVtxFmt().stride, bindOffset, vkbuf);
352 }
353 DecodeVerts(dest);
354 }
355
SetLineWidth(float lineWidth)356 void DrawEngineVulkan::SetLineWidth(float lineWidth) {
357 pipelineManager_->SetLineWidth(lineWidth);
358 }
359
RecreateDescriptorPool(FrameData & frame,int newSize)360 VkResult DrawEngineVulkan::RecreateDescriptorPool(FrameData &frame, int newSize) {
361 // Reallocate this desc pool larger, and "wipe" the cache. We might lose a tiny bit of descriptor set reuse but
362 // only for this frame.
363 if (frame.descPool) {
364 DEBUG_LOG(G3D, "Reallocating desc pool from %d to %d", frame.descPoolSize, newSize);
365 vulkan_->Delete().QueueDeleteDescriptorPool(frame.descPool);
366 frame.descSets.Clear();
367 frame.descCount = 0;
368 }
369 frame.descPoolSize = newSize;
370
371 VkDescriptorPoolSize dpTypes[3];
372 dpTypes[0].descriptorCount = frame.descPoolSize * 3;
373 dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
374 dpTypes[1].descriptorCount = frame.descPoolSize * 3; // Don't use these for tess anymore, need max three per set.
375 dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
376 dpTypes[2].descriptorCount = frame.descPoolSize * 3; // TODO: Use a separate layout when no spline stuff is needed to reduce the need for these.
377 dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
378
379 VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
380 dp.flags = 0; // Don't want to mess around with individually freeing these.
381 // We zap the whole pool every few frames.
382 dp.maxSets = frame.descPoolSize;
383 dp.pPoolSizes = dpTypes;
384 dp.poolSizeCount = ARRAY_SIZE(dpTypes);
385
386 VkResult res = vkCreateDescriptorPool(vulkan_->GetDevice(), &dp, nullptr, &frame.descPool);
387 return res;
388 }
389
GetOrCreateDescriptorSet(VkImageView imageView,VkSampler sampler,VkBuffer base,VkBuffer light,VkBuffer bone,bool tess)390 VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) {
391 _dbg_assert_(base != VK_NULL_HANDLE);
392 _dbg_assert_(light != VK_NULL_HANDLE);
393 _dbg_assert_(bone != VK_NULL_HANDLE);
394
395 DescriptorSetKey key;
396 key.imageView_ = imageView;
397 key.sampler_ = sampler;
398 key.secondaryImageView_ = boundSecondary_;
399 key.depalImageView_ = boundDepal_;
400 key.base_ = base;
401 key.light_ = light;
402 key.bone_ = bone;
403
404 FrameData &frame = frame_[vulkan_->GetCurFrame()];
405 // See if we already have this descriptor set cached.
406 if (!tess) { // Don't cache descriptors for HW tessellation.
407 VkDescriptorSet d = frame.descSets.Get(key);
408 if (d != VK_NULL_HANDLE)
409 return d;
410 }
411
412 if (!frame.descPool || frame.descPoolSize < frame.descCount + 1) {
413 VkResult res = RecreateDescriptorPool(frame, frame.descPoolSize * 2);
414 _dbg_assert_(res == VK_SUCCESS);
415 }
416
417 // Didn't find one in the frame descriptor set cache, let's make a new one.
418 // We wipe the cache on every frame.
419
420 VkDescriptorSet desc;
421 VkDescriptorSetAllocateInfo descAlloc{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
422 descAlloc.pSetLayouts = &descriptorSetLayout_;
423 descAlloc.descriptorPool = frame.descPool;
424 descAlloc.descriptorSetCount = 1;
425 VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc);
426
427 if (result == VK_ERROR_FRAGMENTED_POOL || result < 0) {
428 // There seems to have been a spec revision. Here we should apparently recreate the descriptor pool,
429 // so let's do that. See https://www.khronos.org/registry/vulkan/specs/1.0/man/html/vkAllocateDescriptorSets.html
430 // Fragmentation shouldn't really happen though since we wipe the pool every frame..
431 VkResult res = RecreateDescriptorPool(frame, frame.descPoolSize);
432 _assert_msg_(res == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to recreate a descriptor pool. sz=%d res=%d", (int)frame.descSets.size(), (int)res);
433 descAlloc.descriptorPool = frame.descPool; // Need to update this pointer since we have allocated a new one.
434 result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc);
435 _assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to allocate after recreating a descriptor pool. res=%d", (int)result);
436 }
437
438 // Even in release mode, this is bad.
439 _assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space in pool. sz=%d res=%d", (int)frame.descSets.size(), (int)result);
440
441 // We just don't write to the slots we don't care about, which is fine.
442 VkWriteDescriptorSet writes[9]{};
443 // Main texture
444 int n = 0;
445 VkDescriptorImageInfo tex[3]{};
446 if (imageView) {
447 _dbg_assert_(sampler != VK_NULL_HANDLE);
448
449 tex[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
450 tex[0].imageView = imageView;
451 tex[0].sampler = sampler;
452 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
453 writes[n].pNext = nullptr;
454 writes[n].dstBinding = DRAW_BINDING_TEXTURE;
455 writes[n].pImageInfo = &tex[0];
456 writes[n].descriptorCount = 1;
457 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
458 writes[n].dstSet = desc;
459 n++;
460 }
461
462 if (boundSecondary_) {
463 tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
464 tex[1].imageView = boundSecondary_;
465 tex[1].sampler = samplerSecondary_;
466 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
467 writes[n].pNext = nullptr;
468 writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE;
469 writes[n].pImageInfo = &tex[1];
470 writes[n].descriptorCount = 1;
471 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
472 writes[n].dstSet = desc;
473 n++;
474 }
475
476 if (boundDepal_) {
477 tex[2].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
478 tex[2].imageView = boundDepal_;
479 tex[2].sampler = samplerSecondary_; // doesn't matter, we use load
480 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
481 writes[n].pNext = nullptr;
482 writes[n].dstBinding = DRAW_BINDING_DEPAL_TEXTURE;
483 writes[n].pImageInfo = &tex[2];
484 writes[n].descriptorCount = 1;
485 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
486 writes[n].dstSet = desc;
487 n++;
488 }
489
490 // Tessellation data buffer.
491 if (tess) {
492 const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo();
493 // Control Points
494 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
495 writes[n].pNext = nullptr;
496 writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF;
497 writes[n].pBufferInfo = &bufInfo[0];
498 writes[n].descriptorCount = 1;
499 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
500 writes[n].dstSet = desc;
501 n++;
502 // Weights U
503 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
504 writes[n].pNext = nullptr;
505 writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
506 writes[n].pBufferInfo = &bufInfo[1];
507 writes[n].descriptorCount = 1;
508 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
509 writes[n].dstSet = desc;
510 n++;
511 // Weights V
512 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
513 writes[n].pNext = nullptr;
514 writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
515 writes[n].pBufferInfo = &bufInfo[2];
516 writes[n].descriptorCount = 1;
517 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
518 writes[n].dstSet = desc;
519 n++;
520 }
521
522 // Uniform buffer objects
523 VkDescriptorBufferInfo buf[3]{};
524 int count = 0;
525 buf[count].buffer = base;
526 buf[count].offset = 0;
527 buf[count].range = sizeof(UB_VS_FS_Base);
528 count++;
529 buf[count].buffer = light;
530 buf[count].offset = 0;
531 buf[count].range = sizeof(UB_VS_Lights);
532 count++;
533 buf[count].buffer = bone;
534 buf[count].offset = 0;
535 buf[count].range = sizeof(UB_VS_Bones);
536 count++;
537 for (int i = 0; i < count; i++) {
538 writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
539 writes[n].pNext = nullptr;
540 writes[n].dstBinding = DRAW_BINDING_DYNUBO_BASE + i;
541 writes[n].dstArrayElement = 0;
542 writes[n].pBufferInfo = &buf[i];
543 writes[n].dstSet = desc;
544 writes[n].descriptorCount = 1;
545 writes[n].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
546 n++;
547 }
548
549 vkUpdateDescriptorSets(vulkan_->GetDevice(), n, writes, 0, nullptr);
550
551 if (!tess) // Again, avoid caching when HW tessellation.
552 frame.descSets.Insert(key, desc);
553 frame.descCount++;
554 return desc;
555 }
556
DirtyAllUBOs()557 void DrawEngineVulkan::DirtyAllUBOs() {
558 baseUBOOffset = 0;
559 lightUBOOffset = 0;
560 boneUBOOffset = 0;
561 baseBuf = VK_NULL_HANDLE;
562 lightBuf = VK_NULL_HANDLE;
563 boneBuf = VK_NULL_HANDLE;
564 dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
565 imageView = VK_NULL_HANDLE;
566 sampler = VK_NULL_HANDLE;
567 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
568 }
569
MarkUnreliable(VertexArrayInfoVulkan * vai)570 void MarkUnreliable(VertexArrayInfoVulkan *vai) {
571 vai->status = VertexArrayInfoVulkan::VAI_UNRELIABLE;
572 // TODO: If we change to a real allocator, free the data here.
573 // For now we just leave it in the pushbuffer.
574 }
575
576 // The inline wrapper in the header checks for numDrawCalls == 0
DoFlush()577 void DrawEngineVulkan::DoFlush() {
578 PROFILE_THIS_SCOPE("Flush");
579 gpuStats.numFlushes++;
580 // TODO: Should be enough to update this once per frame?
581 gpuStats.numTrackedVertexArrays = (int)vai_.size();
582
583 VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
584
585 // TODO: Needs to be behind a check for changed render pass, at an appropriate time in this function.
586 // Similar issues as with the lastRenderStepId_ check. Will need a bit of a rethink.
587 lastPipeline_ = nullptr;
588 // If have a new render pass, dirty our dynamic state so it gets re-set.
589 // We have to do this again after the last possible place in DoFlush that can cause a renderpass switch
590 // like a shader blend blit or similar. But before we actually set the state!
591 int curRenderStepId = renderManager->GetCurrentStepId();
592 if (lastRenderStepId_ != curRenderStepId) {
593 // Dirty everything that has dynamic state that will need re-recording.
594 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
595 textureCache_->ForgetLastTexture();
596 lastRenderStepId_ = curRenderStepId;
597 }
598
599 FrameData *frame = &frame_[vulkan_->GetCurFrame()];
600
601 bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
602
603 bool textureNeedsApply = false;
604 if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
605 textureCache_->SetTexture();
606 gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
607 textureNeedsApply = true;
608 } else if (gstate.getTextureAddress(0) == ((gstate.getFrameBufRawAddress() | 0x04000000) & 0x3FFFFFFF)) {
609 // This catches the case of clearing a texture.
610 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
611 }
612
613 GEPrimitiveType prim = prevPrim_;
614
615 // Always use software for flat shading to fix the provoking index.
616 bool useHWTransform = CanUseHardwareTransform(prim) && (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
617
618 VulkanVertexShader *vshader = nullptr;
619 VulkanFragmentShader *fshader = nullptr;
620
621 uint32_t ibOffset;
622 uint32_t vbOffset;
623
624 if (useHWTransform) {
625 // We don't detect clears in this path, so here we can switch framebuffers if necessary.
626
627 int vertexCount = 0;
628 int maxIndex;
629 bool useElements = true;
630
631 // Cannot cache vertex data with morph enabled.
632 bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
633 // Also avoid caching when software skinning.
634 VkBuffer vbuf = VK_NULL_HANDLE;
635 VkBuffer ibuf = VK_NULL_HANDLE;
636 if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
637 useCache = false;
638 }
639
640 if (useCache) {
641 PROFILE_THIS_SCOPE("vcache");
642 u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
643 VertexArrayInfoVulkan *vai = vai_.Get(id);
644 if (!vai) {
645 vai = new VertexArrayInfoVulkan();
646 vai_.Insert(id, vai);
647 }
648
649 switch (vai->status) {
650 case VertexArrayInfoVulkan::VAI_NEW:
651 {
652 // Haven't seen this one before. We don't actually upload the vertex data yet.
653 uint64_t dataHash = ComputeHash();
654 vai->hash = dataHash;
655 vai->minihash = ComputeMiniHash();
656 vai->status = VertexArrayInfoVulkan::VAI_HASHING;
657 vai->drawsUntilNextFullHash = 0;
658 DecodeVertsToPushBuffer(frame->pushVertex, &vbOffset, &vbuf); // writes to indexGen
659 vai->numVerts = indexGen.VertexCount();
660 vai->prim = indexGen.Prim();
661 vai->maxIndex = indexGen.MaxIndex();
662 vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
663 goto rotateVBO;
664 }
665
666 // Hashing - still gaining confidence about the buffer.
667 // But if we get this far it's likely to be worth uploading the data.
668 case VertexArrayInfoVulkan::VAI_HASHING:
669 {
670 PROFILE_THIS_SCOPE("vcachehash");
671 vai->numDraws++;
672 if (vai->lastFrame != gpuStats.numFlips) {
673 vai->numFrames++;
674 }
675 if (vai->drawsUntilNextFullHash == 0) {
676 // Let's try to skip a full hash if mini would fail.
677 const u32 newMiniHash = ComputeMiniHash();
678 uint64_t newHash = vai->hash;
679 if (newMiniHash == vai->minihash) {
680 newHash = ComputeHash();
681 }
682 if (newMiniHash != vai->minihash || newHash != vai->hash) {
683 MarkUnreliable(vai);
684 DecodeVertsToPushBuffer(frame->pushVertex, &vbOffset, &vbuf);
685 goto rotateVBO;
686 }
687 if (vai->numVerts > 64) {
688 // exponential backoff up to 16 draws, then every 24
689 vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
690 } else {
691 // Lower numbers seem much more likely to change.
692 vai->drawsUntilNextFullHash = 0;
693 }
694 // TODO: tweak
695 //if (vai->numFrames > 1000) {
696 // vai->status = VertexArrayInfo::VAI_RELIABLE;
697 //}
698 } else {
699 vai->drawsUntilNextFullHash--;
700 u32 newMiniHash = ComputeMiniHash();
701 if (newMiniHash != vai->minihash) {
702 MarkUnreliable(vai);
703 DecodeVertsToPushBuffer(frame->pushVertex, &vbOffset, &vbuf);
704 goto rotateVBO;
705 }
706 }
707
708 if (!vai->vb) {
709 // Directly push to the vertex cache.
710 DecodeVertsToPushBuffer(vertexCache_, &vai->vbOffset, &vai->vb);
711 _dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
712 vai->numVerts = indexGen.VertexCount();
713 vai->prim = indexGen.Prim();
714 vai->maxIndex = indexGen.MaxIndex();
715 vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
716 useElements = !indexGen.SeenOnlyPurePrims();
717 if (!useElements && indexGen.PureCount()) {
718 vai->numVerts = indexGen.PureCount();
719 }
720 if (useElements) {
721 u32 size = sizeof(uint16_t) * indexGen.VertexCount();
722 void *dest = vertexCache_->Push(size, &vai->ibOffset, &vai->ib);
723 memcpy(dest, decIndex, size);
724 } else {
725 vai->ib = VK_NULL_HANDLE;
726 vai->ibOffset = 0;
727 }
728 } else {
729 gpuStats.numCachedDrawCalls++;
730 useElements = vai->ib ? true : false;
731 gpuStats.numCachedVertsDrawn += vai->numVerts;
732 gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
733 }
734 vbuf = vai->vb;
735 ibuf = vai->ib;
736 vbOffset = vai->vbOffset;
737 ibOffset = vai->ibOffset;
738 vertexCount = vai->numVerts;
739 maxIndex = vai->maxIndex;
740 prim = static_cast<GEPrimitiveType>(vai->prim);
741 break;
742 }
743
744 // Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
745 case VertexArrayInfoVulkan::VAI_RELIABLE:
746 {
747 vai->numDraws++;
748 if (vai->lastFrame != gpuStats.numFlips) {
749 vai->numFrames++;
750 }
751 gpuStats.numCachedDrawCalls++;
752 gpuStats.numCachedVertsDrawn += vai->numVerts;
753 vbuf = vai->vb;
754 ibuf = vai->ib;
755 vbOffset = vai->vbOffset;
756 ibOffset = vai->ibOffset;
757 vertexCount = vai->numVerts;
758 maxIndex = vai->maxIndex;
759 prim = static_cast<GEPrimitiveType>(vai->prim);
760
761 gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
762 break;
763 }
764
765 case VertexArrayInfoVulkan::VAI_UNRELIABLE:
766 {
767 vai->numDraws++;
768 if (vai->lastFrame != gpuStats.numFlips) {
769 vai->numFrames++;
770 }
771 DecodeVertsToPushBuffer(frame->pushVertex, &vbOffset, &vbuf);
772 goto rotateVBO;
773 }
774 default:
775 break;
776 }
777 } else {
778 if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
779 // If software skinning, we've already predecoded into "decoded". So push that content.
780 VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
781 u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf);
782 memcpy(dest, decoded, size);
783 } else {
784 // Decode directly into the pushbuffer
785 DecodeVertsToPushBuffer(frame->pushVertex, &vbOffset, &vbuf);
786 }
787
788 rotateVBO:
789 gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
790 useElements = !indexGen.SeenOnlyPurePrims();
791 vertexCount = indexGen.VertexCount();
792 if (!useElements && indexGen.PureCount()) {
793 vertexCount = indexGen.PureCount();
794 }
795 prim = indexGen.Prim();
796 }
797
798 bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
799 if (gstate.isModeThrough()) {
800 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
801 } else {
802 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
803 }
804
805 PROFILE_THIS_SCOPE("updatestate");
806
807 if (textureNeedsApply) {
808 textureCache_->ApplyTexture();
809 textureCache_->GetVulkanHandles(imageView, sampler);
810 if (imageView == VK_NULL_HANDLE)
811 imageView = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::NULL_IMAGEVIEW);
812 if (sampler == VK_NULL_HANDLE)
813 sampler = nullSampler_;
814 }
815
816 if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) {
817 if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
818 ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
819 }
820
821 shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform
822 if (!vshader) {
823 // We're screwed.
824 return;
825 }
826 _dbg_assert_msg_(vshader->UseHWTransform(), "Bad vshader");
827
828 Draw::NativeObject object = framebufferManager_->UseBufferedRendering() ? Draw::NativeObject::FRAMEBUFFER_RENDERPASS : Draw::NativeObject::BACKBUFFER_RENDERPASS;
829 VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(object);
830 VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, &dec_->decFmt, vshader, fshader, true);
831 if (!pipeline || !pipeline->pipeline) {
832 // Already logged, let's bail out.
833 return;
834 }
835 BindShaderBlendTex(); // This might cause copies so important to do before BindPipeline.
836
837 // If have a new render pass, dirty our dynamic state so it gets re-set.
838 // WARNING: We have to do this AFTER the last possible place in DoFlush that can cause a renderpass switch
839 // like a shader blend blit or similar. But before we actually set the state!
840 int curRenderStepId = renderManager->GetCurrentStepId();
841 if (lastRenderStepId_ != curRenderStepId) {
842 // Dirty everything that has dynamic state that will need re-recording.
843 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE);
844 lastRenderStepId_ = curRenderStepId;
845 }
846
847 renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags);
848 if (pipeline != lastPipeline_) {
849 if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
850 gstate_c.Dirty(DIRTY_BLEND_STATE);
851 }
852 lastPipeline_ = pipeline;
853 }
854 ApplyDrawStateLate(renderManager, false, 0, pipeline->UsesBlendConstant());
855 gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
856 lastPipeline_ = pipeline;
857
858 // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
859 if (fboTexBound_)
860 gstate_c.Dirty(DIRTY_BLEND_STATE);
861 }
862 lastPrim_ = prim;
863
864 dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
865 UpdateUBOs(frame);
866
867 VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
868
869 const uint32_t dynamicUBOOffsets[3] = {
870 baseUBOOffset, lightUBOOffset, boneUBOOffset,
871 };
872
873 if (useElements) {
874 if (!ibuf) {
875 ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &ibuf);
876 }
877 renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1, VK_INDEX_TYPE_UINT16);
878 } else {
879 renderManager->Draw(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
880 }
881 } else {
882 PROFILE_THIS_SCOPE("soft");
883 // Decode to "decoded"
884 DecodeVertsToPushBuffer(nullptr, nullptr, nullptr);
885 bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
886 if (gstate.isModeThrough()) {
887 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
888 } else {
889 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
890 }
891
892 gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
893 prim = indexGen.Prim();
894 // Undo the strip optimization, not supported by the SW code yet.
895 if (prim == GE_PRIM_TRIANGLE_STRIP)
896 prim = GE_PRIM_TRIANGLES;
897 VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
898
899 u16 *inds = decIndex;
900 SoftwareTransformResult result{};
901 SoftwareTransformParams params{};
902 params.decoded = decoded;
903 params.transformed = transformed;
904 params.transformedExpanded = transformedExpanded;
905 params.fbman = framebufferManager_;
906 params.texCache = textureCache_;
907 // We have to force drawing of primitives if !framebufferManager_->UseBufferedRendering() because Vulkan clears
908 // do not respect scissor rects.
909 params.allowClear = framebufferManager_->UseBufferedRendering();
910 params.allowSeparateAlphaClear = false;
911 params.provokeFlatFirst = true;
912
913 // We need to update the viewport early because it's checked for flipping in SoftwareTransform.
914 // We don't have a "DrawStateEarly" in vulkan, so...
915 // TODO: Probably should eventually refactor this and feed the vp size into SoftwareTransform directly (Unknown's idea).
916 if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
917 gstate_c.vpWidth = gstate.getViewportXScale() * 2.0f;
918 gstate_c.vpHeight = gstate.getViewportYScale() * 2.0f;
919 }
920
921 int maxIndex = indexGen.MaxIndex();
922 SoftwareTransform swTransform(params);
923 swTransform.Decode(prim, dec_->VertexType(), dec_->GetDecVtxFmt(), maxIndex, &result);
924 if (result.action == SW_NOT_READY) {
925 swTransform.DetectOffsetTexture(maxIndex);
926 swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
927 }
928
929 if (result.setSafeSize)
930 framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
931
932 // Only here, where we know whether to clear or to draw primitives, should we actually set the current framebuffer! Because that gives use the opportunity
933 // to use a "pre-clear" render pass, for high efficiency on tilers.
934 if (result.action == SW_DRAW_PRIMITIVES) {
935 if (textureNeedsApply) {
936 textureCache_->ApplyTexture();
937 textureCache_->GetVulkanHandles(imageView, sampler);
938 if (imageView == VK_NULL_HANDLE)
939 imageView = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::NULL_IMAGEVIEW);
940 if (sampler == VK_NULL_HANDLE)
941 sampler = nullSampler_;
942 }
943 if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) {
944 shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform
945 _dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
946 if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
947 ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
948 }
949 Draw::NativeObject object = framebufferManager_->UseBufferedRendering() ? Draw::NativeObject::FRAMEBUFFER_RENDERPASS : Draw::NativeObject::BACKBUFFER_RENDERPASS;
950 VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(object);
951 VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, renderPass, pipelineKey_, &dec_->decFmt, vshader, fshader, false);
952 if (!pipeline || !pipeline->pipeline) {
953 // Already logged, let's bail out.
954 return;
955 }
956 BindShaderBlendTex(); // This might cause copies so super important to do before BindPipeline.
957
958 // If have a new render pass, dirty our dynamic state so it gets re-set.
959 // WARNING: We have to do this AFTER the last possible place in DoFlush that can cause a renderpass switch
960 // like a shader blend blit or similar. But before we actually set the state!
961 int curRenderStepId = renderManager->GetCurrentStepId();
962 if (lastRenderStepId_ != curRenderStepId) {
963 // Dirty everything that has dynamic state that will need re-recording.
964 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE);
965 lastRenderStepId_ = curRenderStepId;
966 }
967
968 renderManager->BindPipeline(pipeline->pipeline, (PipelineFlags)pipeline->flags);
969 if (pipeline != lastPipeline_) {
970 if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
971 gstate_c.Dirty(DIRTY_BLEND_STATE);
972 }
973 lastPipeline_ = pipeline;
974 }
975 ApplyDrawStateLate(renderManager, result.setStencil, result.stencilValue, pipeline->UsesBlendConstant());
976 gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
977 lastPipeline_ = pipeline;
978
979 // Must dirty blend state here so we re-copy next time. Example: Lunar's spell effects.
980 if (fboTexBound_)
981 gstate_c.Dirty(DIRTY_BLEND_STATE);
982 }
983 lastPrim_ = prim;
984
985 dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
986
987 // Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
988 UpdateUBOs(frame);
989
990 VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
991 const uint32_t dynamicUBOOffsets[3] = {
992 baseUBOOffset, lightUBOOffset, boneUBOOffset,
993 };
994
995 PROFILE_THIS_SCOPE("renderman_q");
996
997 if (result.drawIndexed) {
998 VkBuffer vbuf, ibuf;
999 vbOffset = (uint32_t)frame->pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), &vbuf);
1000 ibOffset = (uint32_t)frame->pushIndex->Push(inds, sizeof(short) * result.drawNumTrans, &ibuf);
1001 renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1, VK_INDEX_TYPE_UINT16);
1002 } else {
1003 VkBuffer vbuf;
1004 vbOffset = (uint32_t)frame->pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), &vbuf);
1005 renderManager->Draw(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);
1006 }
1007 } else if (result.action == SW_CLEAR) {
1008 // Note: we won't get here if the clear is alpha but not color, or color but not alpha.
1009
1010 // We let the framebuffer manager handle the clear. It can use renderpasses to optimize on tilers.
1011 // If non-buffered though, it'll just do a plain clear.
1012 framebufferManager_->NotifyClear(gstate.isClearModeColorMask(), gstate.isClearModeAlphaMask(), gstate.isClearModeDepthMask(), result.color, result.depth);
1013
1014 if (gstate_c.Supports(GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) {
1015 int scissorX1 = gstate.getScissorX1();
1016 int scissorY1 = gstate.getScissorY1();
1017 int scissorX2 = gstate.getScissorX2() + 1;
1018 int scissorY2 = gstate.getScissorY2() + 1;
1019 framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color);
1020 }
1021 }
1022 }
1023
1024 gpuStats.numDrawCalls += numDrawCalls;
1025 gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
1026
1027 indexGen.Reset();
1028 decodedVerts_ = 0;
1029 numDrawCalls = 0;
1030 vertexCountInDrawCalls_ = 0;
1031 decodeCounter_ = 0;
1032 dcid_ = 0;
1033 prevPrim_ = GE_PRIM_INVALID;
1034 gstate_c.vertexFullAlpha = true;
1035 framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
1036
1037 // Now seems as good a time as any to reset the min/max coords, which we may examine later.
1038 gstate_c.vertBounds.minU = 512;
1039 gstate_c.vertBounds.minV = 512;
1040 gstate_c.vertBounds.maxU = 0;
1041 gstate_c.vertBounds.maxV = 0;
1042
1043 GPUDebug::NotifyDraw();
1044 }
1045
UpdateUBOs(FrameData * frame)1046 void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
1047 if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) {
1048 baseUBOOffset = shaderManager_->PushBaseBuffer(frame->pushUBO, &baseBuf);
1049 dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS;
1050 }
1051 if ((dirtyUniforms_ & DIRTY_LIGHT_UNIFORMS) || lightBuf == VK_NULL_HANDLE) {
1052 lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf);
1053 dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
1054 }
1055 if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
1056 boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf);
1057 dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
1058 }
1059 }
1060
SendDataToShader(const SimpleVertex * const * points,int size_u,int size_v,u32 vertType,const Spline::Weight2D & weights)1061 void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
1062 // SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members
1063 // also need to be 16-byte aligned, hence the padding.
1064 struct TessData {
1065 float pos[3]; float pad1;
1066 float uv[2]; float pad2[2];
1067 float color[4];
1068 };
1069
1070 int size = size_u * size_v;
1071
1072 int ssboAlignment = vulkan_->GetPhysicalDeviceProperties().properties.limits.minStorageBufferOffsetAlignment;
1073 uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), (uint32_t *)&bufInfo_[0].offset, &bufInfo_[0].buffer, ssboAlignment);
1074 bufInfo_[0].range = size * sizeof(TessData);
1075
1076 float *pos = (float *)(data);
1077 float *tex = (float *)(data + offsetof(TessData, uv));
1078 float *col = (float *)(data + offsetof(TessData, color));
1079 int stride = sizeof(TessData) / sizeof(float);
1080
1081 CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
1082
1083 using Spline::Weight;
1084
1085 // Weights U
1086 data = (uint8_t *)push_->PushAligned(weights.size_u * sizeof(Weight), (uint32_t *)&bufInfo_[1].offset, &bufInfo_[1].buffer, ssboAlignment);
1087 memcpy(data, weights.u, weights.size_u * sizeof(Weight));
1088 bufInfo_[1].range = weights.size_u * sizeof(Weight);
1089
1090 // Weights V
1091 data = (uint8_t *)push_->PushAligned(weights.size_v * sizeof(Weight), (uint32_t *)&bufInfo_[2].offset, &bufInfo_[2].buffer, ssboAlignment);
1092 memcpy(data, weights.v, weights.size_v * sizeof(Weight));
1093 bufInfo_[2].range = weights.size_v * sizeof(Weight);
1094 }
1095