1 #include "GSH_VulkanDraw.h"
2 #include "GSH_VulkanMemoryUtils.h"
3 #include "MemStream.h"
4 #include "vulkan/StructDefs.h"
5 #include "vulkan/Utils.h"
6 #include "nuanceur/Builder.h"
7 #include "nuanceur/generators/SpirvShaderGenerator.h"
8 #include "../GSHandler.h"
9 #include "../GsPixelFormats.h"
10 
11 using namespace GSH_Vulkan;
12 
13 #define VERTEX_ATTRIB_LOCATION_POSITION 0
14 #define VERTEX_ATTRIB_LOCATION_DEPTH 1
15 #define VERTEX_ATTRIB_LOCATION_COLOR 2
16 #define VERTEX_ATTRIB_LOCATION_TEXCOORD 3
17 #define VERTEX_ATTRIB_LOCATION_FOG 4
18 
19 #define DESCRIPTOR_LOCATION_BUFFER_MEMORY 0
20 #define DESCRIPTOR_LOCATION_IMAGE_CLUT 1
21 #define DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_TEX 2
22 #define DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_FB 3
23 #define DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_DEPTH 4
24 
25 #define DRAW_AREA_SIZE 2048
26 #define MAX_VERTEX_COUNT 1024 * 512
27 
28 #define DEPTH_MAX (4294967296.0f)
29 
CDraw(const ContextPtr & context,const FrameCommandBufferPtr & frameCommandBuffer)30 CDraw::CDraw(const ContextPtr& context, const FrameCommandBufferPtr& frameCommandBuffer)
31     : m_context(context)
32     , m_frameCommandBuffer(frameCommandBuffer)
33     , m_pipelineCache(context->device)
34 {
35 	CreateRenderPass();
36 	CreateDrawImage();
37 	CreateFramebuffer();
38 
39 	for(auto& frame : m_frames)
40 	{
41 		frame.vertexBuffer = Framework::Vulkan::CBuffer(
42 		    m_context->device, m_context->physicalDeviceMemoryProperties,
43 		    VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, sizeof(PRIM_VERTEX) * MAX_VERTEX_COUNT);
44 
45 		auto result = m_context->device.vkMapMemory(m_context->device, frame.vertexBuffer.GetMemory(),
46 		                                            0, VK_WHOLE_SIZE, 0, reinterpret_cast<void**>(&frame.vertexBufferPtr));
47 		CHECKVULKANERROR(result);
48 	}
49 
50 	m_pipelineCaps <<= 0;
51 }
52 
~CDraw()53 CDraw::~CDraw()
54 {
55 	for(auto& frame : m_frames)
56 	{
57 		m_context->device.vkUnmapMemory(m_context->device, frame.vertexBuffer.GetMemory());
58 	}
59 	m_context->device.vkDestroyFramebuffer(m_context->device, m_framebuffer, nullptr);
60 	m_context->device.vkDestroyRenderPass(m_context->device, m_renderPass, nullptr);
61 	m_context->device.vkDestroyImageView(m_context->device, m_drawImageView, nullptr);
62 }
63 
SetPipelineCaps(const PIPELINE_CAPS & caps)64 void CDraw::SetPipelineCaps(const PIPELINE_CAPS& caps)
65 {
66 	bool changed = static_cast<uint64>(caps) != static_cast<uint64>(m_pipelineCaps);
67 	if(!changed) return;
68 	FlushVertices();
69 	m_pipelineCaps = caps;
70 }
71 
SetFramebufferParams(uint32 addr,uint32 width,uint32 writeMask)72 void CDraw::SetFramebufferParams(uint32 addr, uint32 width, uint32 writeMask)
73 {
74 	bool changed =
75 	    (m_pushConstants.fbBufAddr != addr) ||
76 	    (m_pushConstants.fbBufWidth != width) ||
77 	    (m_pushConstants.fbWriteMask != writeMask);
78 	if(!changed) return;
79 	FlushVertices();
80 	m_pushConstants.fbBufAddr = addr;
81 	m_pushConstants.fbBufWidth = width;
82 	m_pushConstants.fbWriteMask = writeMask;
83 }
84 
SetDepthbufferParams(uint32 addr,uint32 width)85 void CDraw::SetDepthbufferParams(uint32 addr, uint32 width)
86 {
87 	bool changed =
88 	    (m_pushConstants.depthBufAddr != addr) ||
89 	    (m_pushConstants.depthBufWidth != width);
90 	if(!changed) return;
91 	FlushVertices();
92 	m_pushConstants.depthBufAddr = addr;
93 	m_pushConstants.depthBufWidth = width;
94 }
95 
SetTextureParams(uint32 bufAddr,uint32 bufWidth,uint32 width,uint32 height,uint32 csa)96 void CDraw::SetTextureParams(uint32 bufAddr, uint32 bufWidth, uint32 width, uint32 height, uint32 csa)
97 {
98 	bool changed =
99 	    (m_pushConstants.texBufAddr != bufAddr) ||
100 	    (m_pushConstants.texBufWidth != bufWidth) ||
101 	    (m_pushConstants.texWidth != width) ||
102 	    (m_pushConstants.texHeight != height) ||
103 	    (m_pushConstants.texCsa != csa);
104 	if(!changed) return;
105 	FlushVertices();
106 	m_pushConstants.texBufAddr = bufAddr;
107 	m_pushConstants.texBufWidth = bufWidth;
108 	m_pushConstants.texWidth = width;
109 	m_pushConstants.texHeight = height;
110 	m_pushConstants.texCsa = csa;
111 }
112 
SetClutBufferOffset(uint32 clutBufferOffset)113 void CDraw::SetClutBufferOffset(uint32 clutBufferOffset)
114 {
115 	bool changed = m_clutBufferOffset != clutBufferOffset;
116 	if(!changed) return;
117 	FlushVertices();
118 	m_clutBufferOffset = clutBufferOffset;
119 }
120 
SetTextureAlphaParams(uint32 texA0,uint32 texA1)121 void CDraw::SetTextureAlphaParams(uint32 texA0, uint32 texA1)
122 {
123 	bool changed =
124 	    (m_pushConstants.texA0 != texA0) ||
125 	    (m_pushConstants.texA1 != texA1);
126 	if(!changed) return;
127 	FlushVertices();
128 	m_pushConstants.texA0 = texA0;
129 	m_pushConstants.texA1 = texA1;
130 }
131 
SetTextureClampParams(uint32 clampMinU,uint32 clampMinV,uint32 clampMaxU,uint32 clampMaxV)132 void CDraw::SetTextureClampParams(uint32 clampMinU, uint32 clampMinV, uint32 clampMaxU, uint32 clampMaxV)
133 {
134 	bool changed =
135 	    (m_pushConstants.clampMin[0] != clampMinU) ||
136 	    (m_pushConstants.clampMin[1] != clampMinV) ||
137 	    (m_pushConstants.clampMax[0] != clampMaxU) ||
138 	    (m_pushConstants.clampMax[1] != clampMaxV);
139 	if(!changed) return;
140 	FlushVertices();
141 	m_pushConstants.clampMin[0] = clampMinU;
142 	m_pushConstants.clampMin[1] = clampMinV;
143 	m_pushConstants.clampMax[0] = clampMaxU;
144 	m_pushConstants.clampMax[1] = clampMaxV;
145 }
146 
SetFogParams(float fogR,float fogG,float fogB)147 void CDraw::SetFogParams(float fogR, float fogG, float fogB)
148 {
149 	bool changed =
150 	    (m_pushConstants.fogColor[0] != fogR) ||
151 	    (m_pushConstants.fogColor[1] != fogG) ||
152 	    (m_pushConstants.fogColor[2] != fogB);
153 	if(!changed) return;
154 	FlushVertices();
155 	m_pushConstants.fogColor[0] = fogR;
156 	m_pushConstants.fogColor[1] = fogG;
157 	m_pushConstants.fogColor[2] = fogB;
158 	m_pushConstants.fogColor[3] = 0;
159 }
160 
SetAlphaTestParams(uint32 alphaRef)161 void CDraw::SetAlphaTestParams(uint32 alphaRef)
162 {
163 	bool changed = (m_pushConstants.alphaRef != alphaRef);
164 	if(!changed) return;
165 	FlushVertices();
166 	m_pushConstants.alphaRef = alphaRef;
167 }
168 
SetAlphaBlendingParams(uint32 alphaFix)169 void CDraw::SetAlphaBlendingParams(uint32 alphaFix)
170 {
171 	bool changed =
172 	    (m_pushConstants.alphaFix != alphaFix);
173 	if(!changed) return;
174 	FlushVertices();
175 	m_pushConstants.alphaFix = alphaFix;
176 }
177 
SetScissor(uint32 scissorX,uint32 scissorY,uint32 scissorWidth,uint32 scissorHeight)178 void CDraw::SetScissor(uint32 scissorX, uint32 scissorY, uint32 scissorWidth, uint32 scissorHeight)
179 {
180 	bool changed =
181 	    (m_scissorX != scissorX) ||
182 	    (m_scissorY != scissorY) ||
183 	    (m_scissorWidth != scissorWidth) ||
184 	    (m_scissorHeight != scissorHeight);
185 	if(!changed) return;
186 	FlushVertices();
187 	m_scissorX = scissorX;
188 	m_scissorY = scissorY;
189 	m_scissorWidth = scissorWidth;
190 	m_scissorHeight = scissorHeight;
191 }
192 
AddVertices(const PRIM_VERTEX * vertexBeginPtr,const PRIM_VERTEX * vertexEndPtr)193 void CDraw::AddVertices(const PRIM_VERTEX* vertexBeginPtr, const PRIM_VERTEX* vertexEndPtr)
194 {
195 	auto amount = vertexEndPtr - vertexBeginPtr;
196 	if((m_passVertexEnd + amount) > MAX_VERTEX_COUNT)
197 	{
198 		m_frameCommandBuffer->Flush();
199 		assert((m_passVertexEnd + amount) <= MAX_VERTEX_COUNT);
200 	}
201 	auto& frame = m_frames[m_frameCommandBuffer->GetCurrentFrame()];
202 	memcpy(frame.vertexBufferPtr + m_passVertexEnd, vertexBeginPtr, amount * sizeof(PRIM_VERTEX));
203 	m_passVertexEnd += amount;
204 }
205 
FlushVertices()206 void CDraw::FlushVertices()
207 {
208 	uint32 vertexCount = m_passVertexEnd - m_passVertexStart;
209 	if(vertexCount == 0) return;
210 
211 	auto& frame = m_frames[m_frameCommandBuffer->GetCurrentFrame()];
212 	auto commandBuffer = m_frameCommandBuffer->GetCommandBuffer();
213 
214 	//Find pipeline and create it if we've never encountered it before
215 	auto drawPipeline = m_pipelineCache.TryGetPipeline(m_pipelineCaps);
216 	if(!drawPipeline)
217 	{
218 		drawPipeline = m_pipelineCache.RegisterPipeline(m_pipelineCaps, CreateDrawPipeline(m_pipelineCaps));
219 	}
220 
221 	{
222 		VkViewport viewport = {};
223 		viewport.width = DRAW_AREA_SIZE;
224 		viewport.height = DRAW_AREA_SIZE;
225 		viewport.maxDepth = 1.0f;
226 		m_context->device.vkCmdSetViewport(commandBuffer, 0, 1, &viewport);
227 
228 		VkRect2D scissor = {};
229 		scissor.offset.x = m_scissorX;
230 		scissor.offset.y = m_scissorY;
231 		scissor.extent.width = m_scissorWidth;
232 		scissor.extent.height = m_scissorHeight;
233 		m_context->device.vkCmdSetScissor(commandBuffer, 0, 1, &scissor);
234 	}
235 
236 	if(!m_renderPassBegun)
237 	{
238 		auto renderPassBeginInfo = Framework::Vulkan::RenderPassBeginInfo();
239 		renderPassBeginInfo.renderPass = m_renderPass;
240 		renderPassBeginInfo.renderArea.extent.width = DRAW_AREA_SIZE;
241 		renderPassBeginInfo.renderArea.extent.height = DRAW_AREA_SIZE;
242 		renderPassBeginInfo.framebuffer = m_framebuffer;
243 		m_context->device.vkCmdBeginRenderPass(commandBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
244 
245 		m_renderPassBegun = true;
246 	}
247 
248 	//Add a barrier to ensure reads are complete before writing to GS memory
249 	{
250 		auto memoryBarrier = Framework::Vulkan::MemoryBarrier();
251 		memoryBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
252 		memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
253 
254 		m_context->device.vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
255 		                                       VK_DEPENDENCY_BY_REGION_BIT, 1, &memoryBarrier, 0, nullptr, 0, nullptr);
256 	}
257 
258 	auto descriptorSetCaps = make_convertible<DESCRIPTORSET_CAPS>(0);
259 	descriptorSetCaps.hasTexture = m_pipelineCaps.hasTexture;
260 	descriptorSetCaps.framebufferFormat = m_pipelineCaps.framebufferFormat;
261 	descriptorSetCaps.depthbufferFormat = m_pipelineCaps.depthbufferFormat;
262 	descriptorSetCaps.textureFormat = m_pipelineCaps.textureFormat;
263 
264 	auto descriptorSet = PrepareDescriptorSet(drawPipeline->descriptorSetLayout, descriptorSetCaps);
265 
266 	std::vector<uint32> descriptorDynamicOffsets;
267 
268 	if(m_pipelineCaps.hasTexture && CGsPixelFormats::IsPsmIDTEX(m_pipelineCaps.textureFormat))
269 	{
270 		descriptorDynamicOffsets.push_back(m_clutBufferOffset);
271 	}
272 
273 	m_context->device.vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, drawPipeline->pipelineLayout,
274 	                                          0, 1, &descriptorSet, static_cast<uint32_t>(descriptorDynamicOffsets.size()), descriptorDynamicOffsets.data());
275 
276 	m_context->device.vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, drawPipeline->pipeline);
277 
278 	VkDeviceSize vertexBufferOffset = (m_passVertexStart * sizeof(PRIM_VERTEX));
279 	VkBuffer vertexBuffer = frame.vertexBuffer;
280 	m_context->device.vkCmdBindVertexBuffers(commandBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
281 
282 	m_context->device.vkCmdPushConstants(commandBuffer, drawPipeline->pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT,
283 	                                     0, sizeof(DRAW_PIPELINE_PUSHCONSTANTS), &m_pushConstants);
284 
285 	m_context->device.vkCmdDraw(commandBuffer, vertexCount, 1, 0, 0);
286 
287 	m_passVertexStart = m_passVertexEnd;
288 }
289 
FlushRenderPass()290 void CDraw::FlushRenderPass()
291 {
292 	FlushVertices();
293 	if(m_renderPassBegun)
294 	{
295 		auto commandBuffer = m_frameCommandBuffer->GetCommandBuffer();
296 		m_context->device.vkCmdEndRenderPass(commandBuffer);
297 		m_renderPassBegun = false;
298 	}
299 }
300 
PreFlushFrameCommandBuffer()301 void CDraw::PreFlushFrameCommandBuffer()
302 {
303 	FlushRenderPass();
304 }
305 
PostFlushFrameCommandBuffer()306 void CDraw::PostFlushFrameCommandBuffer()
307 {
308 	m_passVertexStart = m_passVertexEnd = 0;
309 }
310 
PrepareDescriptorSet(VkDescriptorSetLayout descriptorSetLayout,const DESCRIPTORSET_CAPS & caps)311 VkDescriptorSet CDraw::PrepareDescriptorSet(VkDescriptorSetLayout descriptorSetLayout, const DESCRIPTORSET_CAPS& caps)
312 {
313 	auto descriptorSetIterator = m_descriptorSetCache.find(caps);
314 	if(descriptorSetIterator != std::end(m_descriptorSetCache))
315 	{
316 		return descriptorSetIterator->second;
317 	}
318 
319 	auto result = VK_SUCCESS;
320 	VkDescriptorSet descriptorSet = VK_NULL_HANDLE;
321 
322 	//Allocate descriptor set
323 	{
324 		auto setAllocateInfo = Framework::Vulkan::DescriptorSetAllocateInfo();
325 		setAllocateInfo.descriptorPool = m_context->descriptorPool;
326 		setAllocateInfo.descriptorSetCount = 1;
327 		setAllocateInfo.pSetLayouts = &descriptorSetLayout;
328 
329 		result = m_context->device.vkAllocateDescriptorSets(m_context->device, &setAllocateInfo, &descriptorSet);
330 		CHECKVULKANERROR(result);
331 	}
332 
333 	//Update descriptor set
334 	{
335 		VkDescriptorBufferInfo descriptorMemoryBufferInfo = {};
336 		descriptorMemoryBufferInfo.buffer = m_context->memoryBuffer;
337 		descriptorMemoryBufferInfo.range = VK_WHOLE_SIZE;
338 
339 		VkDescriptorBufferInfo descriptorClutBufferInfo = {};
340 		descriptorClutBufferInfo.buffer = m_context->clutBuffer;
341 		descriptorClutBufferInfo.range = VK_WHOLE_SIZE;
342 
343 		VkDescriptorImageInfo descriptorTexSwizzleTableImageInfo = {};
344 		descriptorTexSwizzleTableImageInfo.imageView = m_context->GetSwizzleTable(caps.textureFormat);
345 		descriptorTexSwizzleTableImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
346 
347 		VkDescriptorImageInfo descriptorFbSwizzleTableImageInfo = {};
348 		descriptorFbSwizzleTableImageInfo.imageView = m_context->GetSwizzleTable(caps.framebufferFormat);
349 		descriptorFbSwizzleTableImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
350 
351 		VkDescriptorImageInfo descriptorDepthSwizzleTableImageInfo = {};
352 		descriptorDepthSwizzleTableImageInfo.imageView = m_context->GetSwizzleTable(caps.depthbufferFormat);
353 		descriptorDepthSwizzleTableImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
354 
355 		std::vector<VkWriteDescriptorSet> writes;
356 
357 		{
358 			auto writeSet = Framework::Vulkan::WriteDescriptorSet();
359 			writeSet.dstSet = descriptorSet;
360 			writeSet.dstBinding = DESCRIPTOR_LOCATION_BUFFER_MEMORY;
361 			writeSet.descriptorCount = 1;
362 			writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
363 			writeSet.pBufferInfo = &descriptorMemoryBufferInfo;
364 			writes.push_back(writeSet);
365 		}
366 
367 		{
368 			auto writeSet = Framework::Vulkan::WriteDescriptorSet();
369 			writeSet.dstSet = descriptorSet;
370 			writeSet.dstBinding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_FB;
371 			writeSet.descriptorCount = 1;
372 			writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
373 			writeSet.pImageInfo = &descriptorFbSwizzleTableImageInfo;
374 			writes.push_back(writeSet);
375 		}
376 
377 		{
378 			auto writeSet = Framework::Vulkan::WriteDescriptorSet();
379 			writeSet.dstSet = descriptorSet;
380 			writeSet.dstBinding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_DEPTH;
381 			writeSet.descriptorCount = 1;
382 			writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
383 			writeSet.pImageInfo = &descriptorDepthSwizzleTableImageInfo;
384 			writes.push_back(writeSet);
385 		}
386 
387 		if(caps.hasTexture)
388 		{
389 			{
390 				auto writeSet = Framework::Vulkan::WriteDescriptorSet();
391 				writeSet.dstSet = descriptorSet;
392 				writeSet.dstBinding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_TEX;
393 				writeSet.descriptorCount = 1;
394 				writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
395 				writeSet.pImageInfo = &descriptorTexSwizzleTableImageInfo;
396 				writes.push_back(writeSet);
397 			}
398 
399 			if(CGsPixelFormats::IsPsmIDTEX(caps.textureFormat))
400 			{
401 				auto writeSet = Framework::Vulkan::WriteDescriptorSet();
402 				writeSet.dstSet = descriptorSet;
403 				writeSet.dstBinding = DESCRIPTOR_LOCATION_IMAGE_CLUT;
404 				writeSet.descriptorCount = 1;
405 				writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
406 				writeSet.pBufferInfo = &descriptorClutBufferInfo;
407 				writes.push_back(writeSet);
408 			}
409 		}
410 
411 		m_context->device.vkUpdateDescriptorSets(m_context->device, static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
412 	}
413 
414 	m_descriptorSetCache.insert(std::make_pair(caps, descriptorSet));
415 
416 	return descriptorSet;
417 }
418 
CreateFramebuffer()419 void CDraw::CreateFramebuffer()
420 {
421 	assert(m_renderPass != VK_NULL_HANDLE);
422 	assert(m_framebuffer == VK_NULL_HANDLE);
423 
424 	auto frameBufferCreateInfo = Framework::Vulkan::FramebufferCreateInfo();
425 	frameBufferCreateInfo.renderPass = m_renderPass;
426 	frameBufferCreateInfo.width = DRAW_AREA_SIZE;
427 	frameBufferCreateInfo.height = DRAW_AREA_SIZE;
428 	frameBufferCreateInfo.layers = 1;
429 	frameBufferCreateInfo.attachmentCount = 1;
430 	frameBufferCreateInfo.pAttachments = &m_drawImageView;
431 
432 	auto result = m_context->device.vkCreateFramebuffer(m_context->device, &frameBufferCreateInfo, nullptr, &m_framebuffer);
433 	CHECKVULKANERROR(result);
434 }
435 
CreateRenderPass()436 void CDraw::CreateRenderPass()
437 {
438 	assert(m_renderPass == VK_NULL_HANDLE);
439 
440 	auto result = VK_SUCCESS;
441 
442 	VkAttachmentDescription colorAttachment = {};
443 	colorAttachment.format = VK_FORMAT_R8G8B8A8_UNORM;
444 	colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
445 	colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
446 	colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
447 	colorAttachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
448 	colorAttachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
449 
450 	VkAttachmentReference colorRef = {};
451 	colorRef.attachment = 0;
452 	colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
453 
454 	VkSubpassDescription subpass = {};
455 	subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
456 	subpass.pColorAttachments = &colorRef;
457 	subpass.colorAttachmentCount = 1;
458 
459 	VkSubpassDependency subpassDependency = {};
460 	subpassDependency.srcSubpass = 0;
461 	subpassDependency.srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
462 	subpassDependency.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
463 	subpassDependency.dstSubpass = 0;
464 	subpassDependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
465 	subpassDependency.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
466 	subpassDependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
467 
468 	auto renderPassCreateInfo = Framework::Vulkan::RenderPassCreateInfo();
469 	renderPassCreateInfo.subpassCount = 1;
470 	renderPassCreateInfo.pSubpasses = &subpass;
471 	renderPassCreateInfo.attachmentCount = 1;
472 	renderPassCreateInfo.pAttachments = &colorAttachment;
473 	renderPassCreateInfo.dependencyCount = 1;
474 	renderPassCreateInfo.pDependencies = &subpassDependency;
475 
476 	result = m_context->device.vkCreateRenderPass(m_context->device, &renderPassCreateInfo, nullptr, &m_renderPass);
477 	CHECKVULKANERROR(result);
478 }
479 
CreateDrawPipeline(const PIPELINE_CAPS & caps)480 PIPELINE CDraw::CreateDrawPipeline(const PIPELINE_CAPS& caps)
481 {
482 	PIPELINE drawPipeline;
483 
484 	auto vertexShader = CreateVertexShader();
485 	auto fragmentShader = CreateFragmentShader(caps);
486 
487 	auto result = VK_SUCCESS;
488 
489 	{
490 		std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings;
491 
492 		{
493 			VkDescriptorSetLayoutBinding setLayoutBinding = {};
494 			setLayoutBinding.binding = DESCRIPTOR_LOCATION_BUFFER_MEMORY;
495 			setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
496 			setLayoutBinding.descriptorCount = 1;
497 			setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
498 			setLayoutBindings.push_back(setLayoutBinding);
499 		}
500 
501 		{
502 			VkDescriptorSetLayoutBinding setLayoutBinding = {};
503 			setLayoutBinding.binding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_FB;
504 			setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
505 			setLayoutBinding.descriptorCount = 1;
506 			setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
507 			setLayoutBindings.push_back(setLayoutBinding);
508 		}
509 
510 		{
511 			VkDescriptorSetLayoutBinding setLayoutBinding = {};
512 			setLayoutBinding.binding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_DEPTH;
513 			setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
514 			setLayoutBinding.descriptorCount = 1;
515 			setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
516 			setLayoutBindings.push_back(setLayoutBinding);
517 		}
518 
519 		if(caps.hasTexture)
520 		{
521 			{
522 				VkDescriptorSetLayoutBinding setLayoutBinding = {};
523 				setLayoutBinding.binding = DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_TEX;
524 				setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
525 				setLayoutBinding.descriptorCount = 1;
526 				setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
527 				setLayoutBindings.push_back(setLayoutBinding);
528 			}
529 
530 			if(CGsPixelFormats::IsPsmIDTEX(caps.textureFormat))
531 			{
532 				VkDescriptorSetLayoutBinding setLayoutBinding = {};
533 				setLayoutBinding.binding = DESCRIPTOR_LOCATION_IMAGE_CLUT;
534 				setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
535 				setLayoutBinding.descriptorCount = 1;
536 				setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
537 				setLayoutBindings.push_back(setLayoutBinding);
538 			}
539 		}
540 
541 		auto setLayoutCreateInfo = Framework::Vulkan::DescriptorSetLayoutCreateInfo();
542 		setLayoutCreateInfo.bindingCount = static_cast<uint32>(setLayoutBindings.size());
543 		setLayoutCreateInfo.pBindings = setLayoutBindings.data();
544 
545 		result = m_context->device.vkCreateDescriptorSetLayout(m_context->device, &setLayoutCreateInfo, nullptr, &drawPipeline.descriptorSetLayout);
546 		CHECKVULKANERROR(result);
547 	}
548 
549 	{
550 		VkPushConstantRange pushConstantInfo = {};
551 		pushConstantInfo.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
552 		pushConstantInfo.offset = 0;
553 		pushConstantInfo.size = sizeof(DRAW_PIPELINE_PUSHCONSTANTS);
554 
555 		auto pipelineLayoutCreateInfo = Framework::Vulkan::PipelineLayoutCreateInfo();
556 		pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
557 		pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantInfo;
558 		pipelineLayoutCreateInfo.setLayoutCount = 1;
559 		pipelineLayoutCreateInfo.pSetLayouts = &drawPipeline.descriptorSetLayout;
560 
561 		result = m_context->device.vkCreatePipelineLayout(m_context->device, &pipelineLayoutCreateInfo, nullptr, &drawPipeline.pipelineLayout);
562 		CHECKVULKANERROR(result);
563 	}
564 
565 	auto inputAssemblyInfo = Framework::Vulkan::PipelineInputAssemblyStateCreateInfo();
566 	switch(caps.primitiveType)
567 	{
568 	default:
569 		assert(false);
570 	case PIPELINE_PRIMITIVE_TRIANGLE:
571 		inputAssemblyInfo.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
572 		break;
573 	case PIPELINE_PRIMITIVE_LINE:
574 		inputAssemblyInfo.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
575 		break;
576 	}
577 
578 	std::vector<VkVertexInputAttributeDescription> vertexAttributes;
579 
580 	{
581 		VkVertexInputAttributeDescription vertexAttributeDesc = {};
582 		vertexAttributeDesc.format = VK_FORMAT_R32G32_SFLOAT;
583 		vertexAttributeDesc.offset = offsetof(PRIM_VERTEX, x);
584 		vertexAttributeDesc.location = VERTEX_ATTRIB_LOCATION_POSITION;
585 		vertexAttributes.push_back(vertexAttributeDesc);
586 	}
587 
588 	{
589 		VkVertexInputAttributeDescription vertexAttributeDesc = {};
590 		vertexAttributeDesc.format = VK_FORMAT_R32_UINT;
591 		vertexAttributeDesc.offset = offsetof(PRIM_VERTEX, z);
592 		vertexAttributeDesc.location = VERTEX_ATTRIB_LOCATION_DEPTH;
593 		vertexAttributes.push_back(vertexAttributeDesc);
594 	}
595 
596 	{
597 		VkVertexInputAttributeDescription vertexAttributeDesc = {};
598 		vertexAttributeDesc.format = VK_FORMAT_R8G8B8A8_UNORM;
599 		vertexAttributeDesc.offset = offsetof(PRIM_VERTEX, color);
600 		vertexAttributeDesc.location = VERTEX_ATTRIB_LOCATION_COLOR;
601 		vertexAttributes.push_back(vertexAttributeDesc);
602 	}
603 
604 	{
605 		VkVertexInputAttributeDescription vertexAttributeDesc = {};
606 		vertexAttributeDesc.format = VK_FORMAT_R32G32B32_SFLOAT;
607 		vertexAttributeDesc.offset = offsetof(PRIM_VERTEX, s);
608 		vertexAttributeDesc.location = VERTEX_ATTRIB_LOCATION_TEXCOORD;
609 		vertexAttributes.push_back(vertexAttributeDesc);
610 	}
611 
612 	{
613 		VkVertexInputAttributeDescription vertexAttributeDesc = {};
614 		vertexAttributeDesc.format = VK_FORMAT_R32_SFLOAT;
615 		vertexAttributeDesc.offset = offsetof(PRIM_VERTEX, f);
616 		vertexAttributeDesc.location = VERTEX_ATTRIB_LOCATION_FOG;
617 		vertexAttributes.push_back(vertexAttributeDesc);
618 	}
619 
620 	VkVertexInputBindingDescription binding = {};
621 	binding.stride = sizeof(PRIM_VERTEX);
622 	binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
623 
624 	auto vertexInputInfo = Framework::Vulkan::PipelineVertexInputStateCreateInfo();
625 	vertexInputInfo.vertexBindingDescriptionCount = 1;
626 	vertexInputInfo.pVertexBindingDescriptions = &binding;
627 	vertexInputInfo.vertexAttributeDescriptionCount = static_cast<uint32>(vertexAttributes.size());
628 	vertexInputInfo.pVertexAttributeDescriptions = vertexAttributes.data();
629 
630 	auto rasterStateInfo = Framework::Vulkan::PipelineRasterizationStateCreateInfo();
631 	rasterStateInfo.polygonMode = VK_POLYGON_MODE_FILL;
632 	rasterStateInfo.cullMode = VK_CULL_MODE_NONE;
633 	rasterStateInfo.lineWidth = 1.0f;
634 
635 	// Our attachment will write to all color channels, but no blending is enabled.
636 	VkPipelineColorBlendAttachmentState blendAttachment = {};
637 	blendAttachment.colorWriteMask = 0xf;
638 
639 	auto colorBlendStateInfo = Framework::Vulkan::PipelineColorBlendStateCreateInfo();
640 	colorBlendStateInfo.attachmentCount = 1;
641 	colorBlendStateInfo.pAttachments = &blendAttachment;
642 
643 	auto viewportStateInfo = Framework::Vulkan::PipelineViewportStateCreateInfo();
644 	viewportStateInfo.viewportCount = 1;
645 	viewportStateInfo.scissorCount = 1;
646 
647 	auto depthStencilStateInfo = Framework::Vulkan::PipelineDepthStencilStateCreateInfo();
648 
649 	auto multisampleStateInfo = Framework::Vulkan::PipelineMultisampleStateCreateInfo();
650 	multisampleStateInfo.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
651 
652 	static const VkDynamicState dynamicStates[] =
653 	    {
654 	        VK_DYNAMIC_STATE_VIEWPORT,
655 	        VK_DYNAMIC_STATE_SCISSOR,
656 	    };
657 	auto dynamicStateInfo = Framework::Vulkan::PipelineDynamicStateCreateInfo();
658 	dynamicStateInfo.pDynamicStates = dynamicStates;
659 	dynamicStateInfo.dynamicStateCount = sizeof(dynamicStates) / sizeof(dynamicStates[0]);
660 
661 	VkPipelineShaderStageCreateInfo shaderStages[2] =
662 	    {
663 	        {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO},
664 	        {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO},
665 	    };
666 
667 	shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
668 	shaderStages[0].module = vertexShader;
669 	shaderStages[0].pName = "main";
670 	shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
671 	shaderStages[1].module = fragmentShader;
672 	shaderStages[1].pName = "main";
673 
674 	auto pipelineCreateInfo = Framework::Vulkan::GraphicsPipelineCreateInfo();
675 	pipelineCreateInfo.stageCount = 2;
676 	pipelineCreateInfo.pStages = shaderStages;
677 	pipelineCreateInfo.pInputAssemblyState = &inputAssemblyInfo;
678 	pipelineCreateInfo.pVertexInputState = &vertexInputInfo;
679 	pipelineCreateInfo.pRasterizationState = &rasterStateInfo;
680 	pipelineCreateInfo.pColorBlendState = &colorBlendStateInfo;
681 	pipelineCreateInfo.pViewportState = &viewportStateInfo;
682 	pipelineCreateInfo.pDepthStencilState = &depthStencilStateInfo;
683 	pipelineCreateInfo.pMultisampleState = &multisampleStateInfo;
684 	pipelineCreateInfo.pDynamicState = &dynamicStateInfo;
685 	pipelineCreateInfo.renderPass = m_renderPass;
686 	pipelineCreateInfo.layout = drawPipeline.pipelineLayout;
687 
688 	result = m_context->device.vkCreateGraphicsPipelines(m_context->device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &drawPipeline.pipeline);
689 	CHECKVULKANERROR(result);
690 
691 	return drawPipeline;
692 }
693 
CreateVertexShader()694 Framework::Vulkan::CShaderModule CDraw::CreateVertexShader()
695 {
696 	using namespace Nuanceur;
697 
698 	auto b = CShaderBuilder();
699 
700 	{
701 		//Vertex Inputs
702 		auto inputPosition = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_POSITION));
703 		auto inputDepth = CUint4Lvalue(b.CreateInputUint(Nuanceur::SEMANTIC_TEXCOORD, VERTEX_ATTRIB_LOCATION_DEPTH - 1));
704 		auto inputColor = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, VERTEX_ATTRIB_LOCATION_COLOR - 1));
705 		auto inputTexCoord = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, VERTEX_ATTRIB_LOCATION_TEXCOORD - 1));
706 		auto inputFog = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, VERTEX_ATTRIB_LOCATION_FOG - 1));
707 
708 		//Outputs
709 		auto outputPosition = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_SYSTEM_POSITION));
710 		auto outputDepth = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_TEXCOORD, 1));
711 		auto outputColor = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_TEXCOORD, 2));
712 		auto outputTexCoord = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_TEXCOORD, 3));
713 		auto outputFog = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_TEXCOORD, 4));
714 
715 		auto position = ((inputPosition->xy() + NewFloat2(b, 0.5f, 0.5f)) * NewFloat2(b, 2.f / DRAW_AREA_SIZE, 2.f / DRAW_AREA_SIZE) + NewFloat2(b, -1, -1));
716 
717 		outputPosition = NewFloat4(position, NewFloat2(b, 0.f, 1.f));
718 		outputDepth = ToFloat(inputDepth) / NewFloat4(b, DEPTH_MAX, DEPTH_MAX, DEPTH_MAX, DEPTH_MAX);
719 		outputColor = inputColor->xyzw();
720 		outputTexCoord = inputTexCoord->xyzw();
721 		outputFog = inputFog->xyzw();
722 	}
723 
724 	Framework::CMemStream shaderStream;
725 	Nuanceur::CSpirvShaderGenerator::Generate(shaderStream, b, Nuanceur::CSpirvShaderGenerator::SHADER_TYPE_VERTEX);
726 	shaderStream.Seek(0, Framework::STREAM_SEEK_SET);
727 	return Framework::Vulkan::CShaderModule(m_context->device, shaderStream);
728 }
729 
GetDepth(Nuanceur::CShaderBuilder & b,uint32 depthFormat,Nuanceur::CIntValue depthAddress,Nuanceur::CArrayUintValue memoryBuffer)730 static Nuanceur::CUintRvalue GetDepth(Nuanceur::CShaderBuilder& b, uint32 depthFormat,
731                                       Nuanceur::CIntValue depthAddress, Nuanceur::CArrayUintValue memoryBuffer)
732 {
733 	switch(depthFormat)
734 	{
735 	default:
736 		assert(false);
737 	case CGSHandler::PSMZ32:
738 		return CMemoryUtils::Memory_Read32(b, memoryBuffer, depthAddress);
739 	case CGSHandler::PSMZ24:
740 		return CMemoryUtils::Memory_Read24(b, memoryBuffer, depthAddress);
741 	case CGSHandler::PSMZ16:
742 	case CGSHandler::PSMZ16S:
743 		return CMemoryUtils::Memory_Read16(b, memoryBuffer, depthAddress);
744 	}
745 }
746 
ClampTexCoord(Nuanceur::CShaderBuilder & b,uint32 clampMode,Nuanceur::CIntValue texCoord,Nuanceur::CIntValue texSize,Nuanceur::CIntValue clampMin,Nuanceur::CIntValue clampMax)747 static Nuanceur::CIntRvalue ClampTexCoord(Nuanceur::CShaderBuilder& b, uint32 clampMode, Nuanceur::CIntValue texCoord, Nuanceur::CIntValue texSize,
748                                           Nuanceur::CIntValue clampMin, Nuanceur::CIntValue clampMax)
749 {
750 	using namespace Nuanceur;
751 
752 	switch(clampMode)
753 	{
754 	default:
755 		assert(false);
756 	case CGSHandler::CLAMP_MODE_REPEAT:
757 		return texCoord & (texSize - NewInt(b, 1));
758 	case CGSHandler::CLAMP_MODE_CLAMP:
759 		return Clamp(texCoord, NewInt(b, 0), texSize - NewInt(b, 1));
760 	case CGSHandler::CLAMP_MODE_REGION_CLAMP:
761 		return Clamp(texCoord, clampMin, clampMax);
762 	case CGSHandler::CLAMP_MODE_REGION_REPEAT:
763 		return (texCoord & clampMin) | clampMax;
764 	}
765 };
766 
GetClutColor(Nuanceur::CShaderBuilder & b,uint32 textureFormat,uint32 clutFormat,Nuanceur::CUintValue texPixel,Nuanceur::CArrayUintValue clutBuffer,Nuanceur::CIntValue texCsa)767 static Nuanceur::CFloat4Rvalue GetClutColor(Nuanceur::CShaderBuilder& b,
768                                             uint32 textureFormat, uint32 clutFormat, Nuanceur::CUintValue texPixel,
769                                             Nuanceur::CArrayUintValue clutBuffer, Nuanceur::CIntValue texCsa)
770 {
771 	using namespace Nuanceur;
772 
773 	assert(CGsPixelFormats::IsPsmIDTEX(textureFormat));
774 
775 	bool idx8 = CGsPixelFormats::IsPsmIDTEX8(textureFormat) ? 1 : 0;
776 	auto clutIndex = CIntLvalue(b.CreateTemporaryInt());
777 
778 	if(idx8)
779 	{
780 		clutIndex = ToInt(texPixel);
781 	}
782 	else
783 	{
784 		clutIndex = ToInt(texPixel) + texCsa;
785 	}
786 
787 	switch(clutFormat)
788 	{
789 	default:
790 		assert(false);
791 	case CGSHandler::PSMCT32:
792 	case CGSHandler::PSMCT24:
793 	{
794 		auto clutIndexLo = clutIndex;
795 		auto clutIndexHi = clutIndex + NewInt(b, 0x100);
796 		auto clutPixelLo = Load(clutBuffer, clutIndexLo);
797 		auto clutPixelHi = Load(clutBuffer, clutIndexHi);
798 		auto clutPixel = clutPixelLo | (clutPixelHi << NewUint(b, 16));
799 		return CMemoryUtils::PSM32ToVec4(b, clutPixel);
800 	}
801 	case CGSHandler::PSMCT16:
802 	{
803 		auto clutPixel = Load(clutBuffer, clutIndex);
804 		return CMemoryUtils::PSM16ToVec4(b, clutPixel);
805 	}
806 	}
807 }
808 
GetTextureColor(Nuanceur::CShaderBuilder & b,uint32 textureFormat,uint32 clutFormat,Nuanceur::CInt2Value texelPos,Nuanceur::CArrayUintValue memoryBuffer,Nuanceur::CArrayUintValue clutBuffer,Nuanceur::CImageUint2DValue texSwizzleTable,Nuanceur::CIntValue texBufAddress,Nuanceur::CIntValue texBufWidth,Nuanceur::CIntValue texCsa)809 static Nuanceur::CFloat4Rvalue GetTextureColor(Nuanceur::CShaderBuilder& b, uint32 textureFormat, uint32 clutFormat,
810                                                Nuanceur::CInt2Value texelPos, Nuanceur::CArrayUintValue memoryBuffer, Nuanceur::CArrayUintValue clutBuffer,
811                                                Nuanceur::CImageUint2DValue texSwizzleTable, Nuanceur::CIntValue texBufAddress, Nuanceur::CIntValue texBufWidth,
812                                                Nuanceur::CIntValue texCsa)
813 {
814 	using namespace Nuanceur;
815 
816 	switch(textureFormat)
817 	{
818 	default:
819 		assert(false);
820 	case CGSHandler::PSMCT32:
821 	case CGSHandler::PSMZ32:
822 	{
823 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
824 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
825 		auto texPixel = CMemoryUtils::Memory_Read32(b, memoryBuffer, texAddress);
826 		return CMemoryUtils::PSM32ToVec4(b, texPixel);
827 	}
828 	case CGSHandler::PSMCT24:
829 	case CGSHandler::PSMCT24_UNK:
830 	case CGSHandler::PSMZ24:
831 	{
832 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
833 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
834 		auto texPixel = CMemoryUtils::Memory_Read24(b, memoryBuffer, texAddress);
835 		return CMemoryUtils::PSM32ToVec4(b, texPixel);
836 	}
837 	case CGSHandler::PSMCT16:
838 	case CGSHandler::PSMCT16S:
839 	case CGSHandler::PSMZ16:
840 	{
841 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT16>(
842 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
843 		auto texPixel = CMemoryUtils::Memory_Read16(b, memoryBuffer, texAddress);
844 		return CMemoryUtils::PSM16ToVec4(b, texPixel);
845 	}
846 	case CGSHandler::PSMT8:
847 	{
848 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMT8>(
849 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
850 		auto texPixel = CMemoryUtils::Memory_Read8(b, memoryBuffer, texAddress);
851 		return GetClutColor(b, textureFormat, clutFormat, texPixel, clutBuffer, texCsa);
852 	}
853 	case CGSHandler::PSMT4:
854 	{
855 		auto texAddress = CMemoryUtils::GetPixelAddress_PSMT4(
856 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
857 		auto texPixel = CMemoryUtils::Memory_Read4(b, memoryBuffer, texAddress);
858 		return GetClutColor(b, textureFormat, clutFormat, texPixel, clutBuffer, texCsa);
859 	}
860 	case CGSHandler::PSMT8H:
861 	{
862 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
863 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
864 		auto texPixel = CMemoryUtils::Memory_Read8(b, memoryBuffer, texAddress + NewInt(b, 3));
865 		return GetClutColor(b, textureFormat, clutFormat, texPixel, clutBuffer, texCsa);
866 	}
867 	case CGSHandler::PSMT4HL:
868 	{
869 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
870 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
871 		auto texNibAddress = (texAddress + NewInt(b, 3)) * NewInt(b, 2);
872 		auto texPixel = CMemoryUtils::Memory_Read4(b, memoryBuffer, texNibAddress);
873 		return GetClutColor(b, textureFormat, clutFormat, texPixel, clutBuffer, texCsa);
874 	}
875 	case CGSHandler::PSMT4HH:
876 	{
877 		auto texAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
878 		    b, texSwizzleTable, texBufAddress, texBufWidth, texelPos);
879 		auto texNibAddress = ((texAddress + NewInt(b, 3)) * NewInt(b, 2)) | NewInt(b, 1);
880 		auto texPixel = CMemoryUtils::Memory_Read4(b, memoryBuffer, texNibAddress);
881 		return GetClutColor(b, textureFormat, clutFormat, texPixel, clutBuffer, texCsa);
882 	}
883 	}
884 }
885 
ExpandAlpha(Nuanceur::CShaderBuilder & b,uint32 textureFormat,uint32 clutFormat,uint32 texBlackIsTransparent,Nuanceur::CFloat4Lvalue & textureColor,Nuanceur::CFloatValue textureA0,Nuanceur::CFloatValue textureA1)886 static void ExpandAlpha(Nuanceur::CShaderBuilder& b, uint32 textureFormat, uint32 clutFormat,
887                         uint32 texBlackIsTransparent, Nuanceur::CFloat4Lvalue& textureColor,
888                         Nuanceur::CFloatValue textureA0, Nuanceur::CFloatValue textureA1)
889 {
890 	using namespace Nuanceur;
891 
892 	bool requiresExpansion = false;
893 	if(CGsPixelFormats::IsPsmIDTEX(textureFormat))
894 	{
895 		requiresExpansion =
896 		    (clutFormat == CGSHandler::PSMCT16) ||
897 		    (clutFormat == CGSHandler::PSMCT16S);
898 	}
899 	else
900 	{
901 		requiresExpansion =
902 		    (textureFormat == CGSHandler::PSMCT24) ||
903 		    (textureFormat == CGSHandler::PSMCT16) ||
904 		    (textureFormat == CGSHandler::PSMCT16S);
905 	}
906 
907 	if(!requiresExpansion)
908 	{
909 		return;
910 	}
911 
912 	auto alpha = Mix(textureA0, textureA1, textureColor->w());
913 	textureColor = NewFloat4(textureColor->xyz(), alpha);
914 
915 	if(texBlackIsTransparent)
916 	{
917 		//Add rgb and check if it is zero (assume rgb is positive)
918 		//Set alpha to 0 if it is
919 		auto colorSum = textureColor->x() + textureColor->y() + textureColor->z();
920 		BeginIf(b, colorSum == NewFloat(b, 0));
921 		{
922 			textureColor = NewFloat4(textureColor->xyz(), NewFloat(b, 0));
923 		}
924 		EndIf(b);
925 	}
926 }
927 
GetAlphaABD(Nuanceur::CShaderBuilder & b,uint32 alphaABD,Nuanceur::CFloat4Value srcColor,Nuanceur::CFloat4Value dstColor)928 static Nuanceur::CFloat3Rvalue GetAlphaABD(Nuanceur::CShaderBuilder& b, uint32 alphaABD,
929                                            Nuanceur::CFloat4Value srcColor, Nuanceur::CFloat4Value dstColor)
930 {
931 	switch(alphaABD)
932 	{
933 	default:
934 		assert(false);
935 	case CGSHandler::ALPHABLEND_ABD_CS:
936 		return srcColor->xyz();
937 	case CGSHandler::ALPHABLEND_ABD_CD:
938 		return dstColor->xyz();
939 	case CGSHandler::ALPHABLEND_ABD_ZERO:
940 		return NewFloat3(b, 0, 0, 0);
941 	}
942 }
943 
GetAlphaC(Nuanceur::CShaderBuilder & b,uint32 alphaC,Nuanceur::CFloat4Value srcColor,Nuanceur::CFloat4Value dstColor,Nuanceur::CFloatValue alphaFix)944 static Nuanceur::CFloat3Rvalue GetAlphaC(Nuanceur::CShaderBuilder& b, uint32 alphaC,
945                                          Nuanceur::CFloat4Value srcColor, Nuanceur::CFloat4Value dstColor, Nuanceur::CFloatValue alphaFix)
946 {
947 	switch(alphaC)
948 	{
949 	default:
950 		assert(false);
951 	case CGSHandler::ALPHABLEND_C_AS:
952 		return srcColor->www();
953 	case CGSHandler::ALPHABLEND_C_AD:
954 		return dstColor->www();
955 	case CGSHandler::ALPHABLEND_C_FIX:
956 		return alphaFix->xxx();
957 	}
958 }
959 
DestinationAlphaTest(Nuanceur::CShaderBuilder & b,uint32 framebufferFormat,uint32 dstAlphaTestRef,Nuanceur::CUintValue dstPixel,Nuanceur::CBoolLvalue writeColor,Nuanceur::CBoolLvalue writeDepth)960 static void DestinationAlphaTest(Nuanceur::CShaderBuilder& b, uint32 framebufferFormat,
961                                  uint32 dstAlphaTestRef, Nuanceur::CUintValue dstPixel,
962                                  Nuanceur::CBoolLvalue writeColor, Nuanceur::CBoolLvalue writeDepth)
963 {
964 	using namespace Nuanceur;
965 
966 	auto alphaBit = CUintLvalue(b.CreateTemporaryUint());
967 	switch(framebufferFormat)
968 	{
969 	case CGSHandler::PSMCT32:
970 		alphaBit = dstPixel & NewUint(b, 0x80000000);
971 		break;
972 	case CGSHandler::PSMCT16:
973 	case CGSHandler::PSMCT16S:
974 		alphaBit = dstPixel & NewUint(b, 0x8000);
975 		break;
976 	default:
977 		assert(false);
978 		break;
979 	}
980 
981 	auto dstAlphaTestResult = CBoolLvalue(b.CreateTemporaryBool());
982 	if(dstAlphaTestRef)
983 	{
984 		//Pixels with alpha bit set pass
985 		dstAlphaTestResult = (alphaBit != NewUint(b, 0));
986 	}
987 	else
988 	{
989 		dstAlphaTestResult = (alphaBit == NewUint(b, 0));
990 	}
991 
992 	BeginIf(b, !dstAlphaTestResult);
993 	{
994 		writeColor = NewBool(b, false);
995 		writeDepth = NewBool(b, false);
996 	}
997 	EndIf(b);
998 }
999 
WriteToFramebuffer(Nuanceur::CShaderBuilder & b,uint32 framebufferFormat,Nuanceur::CArrayUintValue memoryBuffer,Nuanceur::CIntValue fbAddress,Nuanceur::CUintValue fbWriteMask,Nuanceur::CUintValue dstPixel,Nuanceur::CFloat4Value dstColor)1000 static void WriteToFramebuffer(Nuanceur::CShaderBuilder& b, uint32 framebufferFormat,
1001                                Nuanceur::CArrayUintValue memoryBuffer, Nuanceur::CIntValue fbAddress,
1002                                Nuanceur::CUintValue fbWriteMask, Nuanceur::CUintValue dstPixel, Nuanceur::CFloat4Value dstColor)
1003 {
1004 	switch(framebufferFormat)
1005 	{
1006 	default:
1007 		assert(false);
1008 	case CGSHandler::PSMCT32:
1009 	{
1010 		dstPixel = (CMemoryUtils::Vec4ToPSM32(b, dstColor) & fbWriteMask) | (dstPixel & ~fbWriteMask);
1011 		CMemoryUtils::Memory_Write32(b, memoryBuffer, fbAddress, dstPixel);
1012 	}
1013 	break;
1014 	case CGSHandler::PSMCT24:
1015 	case CGSHandler::PSMZ24:
1016 	{
1017 		dstPixel = (CMemoryUtils::Vec4ToPSM32(b, dstColor) & fbWriteMask) | (dstPixel & ~fbWriteMask);
1018 		CMemoryUtils::Memory_Write24(b, memoryBuffer, fbAddress, dstPixel);
1019 	}
1020 	break;
1021 	case CGSHandler::PSMCT16:
1022 	case CGSHandler::PSMCT16S:
1023 	{
1024 		dstPixel = (CMemoryUtils::Vec4ToPSM16(b, dstColor) & fbWriteMask) | (dstPixel & ~fbWriteMask);
1025 		CMemoryUtils::Memory_Write16(b, memoryBuffer, fbAddress, dstPixel);
1026 	}
1027 	break;
1028 	}
1029 }
1030 
WriteToDepthbuffer(Nuanceur::CShaderBuilder & b,uint32 depthbufferFormat,Nuanceur::CArrayUintValue memoryBuffer,Nuanceur::CIntValue depthAddress,Nuanceur::CUintValue srcDepth)1031 static void WriteToDepthbuffer(Nuanceur::CShaderBuilder& b, uint32 depthbufferFormat,
1032                                Nuanceur::CArrayUintValue memoryBuffer, Nuanceur::CIntValue depthAddress, Nuanceur::CUintValue srcDepth)
1033 {
1034 	switch(depthbufferFormat)
1035 	{
1036 	case CGSHandler::PSMZ32:
1037 	{
1038 		CMemoryUtils::Memory_Write32(b, memoryBuffer, depthAddress, srcDepth);
1039 	}
1040 	break;
1041 	case CGSHandler::PSMZ24:
1042 	{
1043 		auto dstDepth = srcDepth & NewUint(b, 0xFFFFFF);
1044 		CMemoryUtils::Memory_Write24(b, memoryBuffer, depthAddress, dstDepth);
1045 	}
1046 	break;
1047 	case CGSHandler::PSMZ16:
1048 	case CGSHandler::PSMZ16S:
1049 	{
1050 		auto dstDepth = srcDepth & NewUint(b, 0xFFFF);
1051 		CMemoryUtils::Memory_Write16(b, memoryBuffer, depthAddress, dstDepth);
1052 	}
1053 	break;
1054 	default:
1055 		assert(false);
1056 		break;
1057 	}
1058 }
1059 
CreateFragmentShader(const PIPELINE_CAPS & caps)1060 Framework::Vulkan::CShaderModule CDraw::CreateFragmentShader(const PIPELINE_CAPS& caps)
1061 {
1062 	using namespace Nuanceur;
1063 
1064 	auto b = CShaderBuilder();
1065 
1066 	{
1067 		//Inputs
1068 		auto inputPosition = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_SYSTEM_POSITION));
1069 		auto inputDepth = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, 1));
1070 		auto inputColor = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, 2));
1071 		auto inputTexCoord = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, 3));
1072 		auto inputFog = CFloat4Lvalue(b.CreateInput(Nuanceur::SEMANTIC_TEXCOORD, 4));
1073 
1074 		//Outputs
1075 		auto outputColor = CFloat4Lvalue(b.CreateOutput(Nuanceur::SEMANTIC_SYSTEM_COLOR));
1076 
1077 		auto memoryBuffer = CArrayUintValue(b.CreateUniformArrayUint("memoryBuffer", DESCRIPTOR_LOCATION_BUFFER_MEMORY));
1078 		auto clutBuffer = CArrayUintValue(b.CreateUniformArrayUint("clutBuffer", DESCRIPTOR_LOCATION_IMAGE_CLUT));
1079 		auto texSwizzleTable = CImageUint2DValue(b.CreateImage2DUint(DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_TEX));
1080 		auto fbSwizzleTable = CImageUint2DValue(b.CreateImage2DUint(DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_FB));
1081 		auto depthSwizzleTable = CImageUint2DValue(b.CreateImage2DUint(DESCRIPTOR_LOCATION_IMAGE_SWIZZLETABLE_DEPTH));
1082 
1083 		//Push constants
1084 		auto fbDepthParams = CInt4Lvalue(b.CreateUniformInt4("fbDepthParams", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1085 		auto texParams0 = CInt4Lvalue(b.CreateUniformInt4("texParams0", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1086 		auto texParams1 = CInt4Lvalue(b.CreateUniformInt4("texParams1", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1087 		auto texParams2 = CInt4Lvalue(b.CreateUniformInt4("texParams2", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1088 		auto alphaFbParams = CInt4Lvalue(b.CreateUniformInt4("alphaFbParams", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1089 		auto fogColor = CFloat4Lvalue(b.CreateUniformFloat4("fogColor", Nuanceur::UNIFORM_UNIT_PUSHCONSTANT));
1090 
1091 		auto fbBufAddress = fbDepthParams->x();
1092 		auto fbBufWidth = fbDepthParams->y();
1093 		auto depthBufAddress = fbDepthParams->z();
1094 		auto depthBufWidth = fbDepthParams->w();
1095 
1096 		auto texBufAddress = texParams0->x();
1097 		auto texBufWidth = texParams0->y();
1098 		auto texSize = texParams0->zw();
1099 
1100 		auto texCsa = texParams1->x();
1101 		auto texA0 = ToFloat(texParams1->y()) / NewFloat(b, 255.f);
1102 		auto texA1 = ToFloat(texParams1->z()) / NewFloat(b, 255.f);
1103 
1104 		auto clampMin = texParams2->xy();
1105 		auto clampMax = texParams2->zw();
1106 
1107 		auto fbWriteMask = ToUint(alphaFbParams->x());
1108 		auto alphaFix = ToFloat(alphaFbParams->y()) / NewFloat(b, 255.f);
1109 		auto alphaRef = ToUint(alphaFbParams->z());
1110 
1111 		auto srcDepth = ToUint(inputDepth->x() * NewFloat(b, DEPTH_MAX));
1112 
1113 		//TODO: Try vectorized shift
1114 		//auto imageColor = ToUint(inputColor * NewFloat4(b, 255.f, 255.f, 255.f, 255.f));
1115 
1116 		auto textureColor = CFloat4Lvalue(b.CreateVariableFloat("textureColor"));
1117 		textureColor = NewFloat4(b, 1, 1, 1, 1);
1118 
1119 		if(caps.hasTexture)
1120 		{
1121 			auto clampCoordinates =
1122 			    [&](CInt2Value textureIuv) {
1123 				    auto clampU = ClampTexCoord(b, caps.texClampU, textureIuv->x(), texSize->x(), clampMin->x(), clampMax->x());
1124 				    auto clampV = ClampTexCoord(b, caps.texClampV, textureIuv->y(), texSize->y(), clampMin->y(), clampMax->y());
1125 				    return NewInt2(clampU, clampV);
1126 			    };
1127 
1128 			auto getTextureColor =
1129 			    [&](CInt2Value textureIuv, CFloat4Lvalue& textureColor) {
1130 				    textureColor = GetTextureColor(b, caps.textureFormat, caps.clutFormat, textureIuv,
1131 				                                   memoryBuffer, clutBuffer, texSwizzleTable, texBufAddress, texBufWidth, texCsa);
1132 				    if(caps.textureHasAlpha)
1133 				    {
1134 					    ExpandAlpha(b, caps.textureFormat, caps.clutFormat, caps.textureBlackIsTransparent, textureColor, texA0, texA1);
1135 				    }
1136 			    };
1137 
1138 			auto textureSt = CFloat2Lvalue(b.CreateVariableFloat("textureSt"));
1139 			textureSt = inputTexCoord->xy() / inputTexCoord->zz();
1140 
1141 			if(caps.textureUseLinearFiltering)
1142 			{
1143 				//Linear Sampling
1144 				//-------------------------------------
1145 
1146 				auto textureLinearPos = CFloat2Lvalue(b.CreateVariableFloat("textureLinearPos"));
1147 				auto textureLinearAb = CFloat2Lvalue(b.CreateVariableFloat("textureLinearAb"));
1148 				auto textureIuv0 = CInt2Lvalue(b.CreateVariableInt("textureIuv0"));
1149 				auto textureIuv1 = CInt2Lvalue(b.CreateVariableInt("textureIuv1"));
1150 				auto textureColorA = CFloat4Lvalue(b.CreateVariableFloat("textureColorA"));
1151 				auto textureColorB = CFloat4Lvalue(b.CreateVariableFloat("textureColorB"));
1152 				auto textureColorC = CFloat4Lvalue(b.CreateVariableFloat("textureColorC"));
1153 				auto textureColorD = CFloat4Lvalue(b.CreateVariableFloat("textureColorD"));
1154 
1155 				textureLinearPos = (textureSt * ToFloat(texSize)) + NewFloat2(b, -0.5f, -0.5f);
1156 				textureLinearAb = Fract(textureLinearPos);
1157 
1158 				textureIuv0 = ToInt(textureLinearPos);
1159 				textureIuv1 = textureIuv0 + NewInt2(b, 1, 1);
1160 
1161 				auto textureClampIuv0 = clampCoordinates(textureIuv0);
1162 				auto textureClampIuv1 = clampCoordinates(textureIuv1);
1163 
1164 				getTextureColor(NewInt2(textureClampIuv0->x(), textureClampIuv0->y()), textureColorA);
1165 				getTextureColor(NewInt2(textureClampIuv1->x(), textureClampIuv0->y()), textureColorB);
1166 				getTextureColor(NewInt2(textureClampIuv0->x(), textureClampIuv1->y()), textureColorC);
1167 				getTextureColor(NewInt2(textureClampIuv1->x(), textureClampIuv1->y()), textureColorD);
1168 
1169 				auto factorA = (NewFloat(b, 1.0f) - textureLinearAb->x()) * (NewFloat(b, 1.0f) - textureLinearAb->y());
1170 				auto factorB = textureLinearAb->x() * (NewFloat(b, 1.0f) - textureLinearAb->y());
1171 				auto factorC = (NewFloat(b, 1.0f) - textureLinearAb->x()) * textureLinearAb->y();
1172 				auto factorD = textureLinearAb->x() * textureLinearAb->y();
1173 
1174 				textureColor =
1175 				    textureColorA * factorA->xxxx() +
1176 				    textureColorB * factorB->xxxx() +
1177 				    textureColorC * factorC->xxxx() +
1178 				    textureColorD * factorD->xxxx();
1179 			}
1180 			else
1181 			{
1182 				//Point Sampling
1183 				//------------------------------
1184 				auto texelPos = ToInt(textureSt * ToFloat(texSize));
1185 				auto clampTexPos = clampCoordinates(texelPos);
1186 				getTextureColor(clampTexPos, textureColor);
1187 			}
1188 
1189 			switch(caps.textureFunction)
1190 			{
1191 			case CGSHandler::TEX0_FUNCTION_MODULATE:
1192 				textureColor = textureColor * inputColor * NewFloat4(b, 2, 2, 2, 2);
1193 				textureColor = Clamp(textureColor, NewFloat4(b, 0, 0, 0, 0), NewFloat4(b, 1, 1, 1, 1));
1194 				if(!caps.textureHasAlpha)
1195 				{
1196 					textureColor = NewFloat4(textureColor->xyz(), inputColor->w());
1197 				}
1198 				break;
1199 			case CGSHandler::TEX0_FUNCTION_DECAL:
1200 				//Nothing to do
1201 				break;
1202 			case CGSHandler::TEX0_FUNCTION_HIGHLIGHT:
1203 			{
1204 				auto tempColor = (textureColor->xyz() * inputColor->xyz() * NewFloat3(b, 2, 2, 2)) + inputColor->www();
1205 				if(caps.textureHasAlpha)
1206 				{
1207 					textureColor = NewFloat4(tempColor, inputColor->w() + textureColor->w());
1208 				}
1209 				else
1210 				{
1211 					textureColor = NewFloat4(tempColor, inputColor->w());
1212 				}
1213 				textureColor = Clamp(textureColor, NewFloat4(b, 0, 0, 0, 0), NewFloat4(b, 1, 1, 1, 1));
1214 			}
1215 			break;
1216 			case CGSHandler::TEX0_FUNCTION_HIGHLIGHT2:
1217 			{
1218 				auto tempColor = (textureColor->xyz() * inputColor->xyz() * NewFloat3(b, 2, 2, 2)) + inputColor->www();
1219 				if(caps.textureHasAlpha)
1220 				{
1221 					textureColor = NewFloat4(tempColor, textureColor->w());
1222 				}
1223 				else
1224 				{
1225 					textureColor = NewFloat4(tempColor, inputColor->w());
1226 				}
1227 				textureColor = Clamp(textureColor, NewFloat4(b, 0, 0, 0, 0), NewFloat4(b, 1, 1, 1, 1));
1228 			}
1229 			break;
1230 			default:
1231 				assert(false);
1232 				break;
1233 			}
1234 		}
1235 		else
1236 		{
1237 			textureColor = inputColor->xyzw();
1238 		}
1239 
1240 		auto writeColor = CBoolLvalue(b.CreateVariableBool("writeColor"));
1241 		auto writeDepth = CBoolLvalue(b.CreateVariableBool("writeDepth"));
1242 		auto writeAlpha = CBoolLvalue(b.CreateVariableBool("writeAlpha"));
1243 
1244 		writeColor = NewBool(b, true);
1245 		writeDepth = NewBool(b, true);
1246 		writeAlpha = NewBool(b, true);
1247 
1248 		//---------------------------------------------------------------------------
1249 		//Alpha Test
1250 
1251 		bool canDiscardAlpha =
1252 		    (caps.alphaTestFunction != CGSHandler::ALPHA_TEST_ALWAYS) &&
1253 		    (caps.alphaTestFailAction == CGSHandler::ALPHA_TEST_FAIL_RGBONLY);
1254 		auto alphaUint = ToUint(textureColor->w() * NewFloat(b, 255.f));
1255 		auto alphaTestResult = CBoolLvalue(b.CreateTemporaryBool());
1256 		switch(caps.alphaTestFunction)
1257 		{
1258 		default:
1259 			assert(false);
1260 		case CGSHandler::ALPHA_TEST_ALWAYS:
1261 			alphaTestResult = NewBool(b, true);
1262 			break;
1263 		case CGSHandler::ALPHA_TEST_NEVER:
1264 			alphaTestResult = NewBool(b, false);
1265 			break;
1266 		case CGSHandler::ALPHA_TEST_LESS:
1267 			alphaTestResult = alphaUint < alphaRef;
1268 			break;
1269 		case CGSHandler::ALPHA_TEST_LEQUAL:
1270 			alphaTestResult = alphaUint <= alphaRef;
1271 			break;
1272 		case CGSHandler::ALPHA_TEST_EQUAL:
1273 			alphaTestResult = alphaUint == alphaRef;
1274 			break;
1275 		case CGSHandler::ALPHA_TEST_GEQUAL:
1276 			alphaTestResult = alphaUint >= alphaRef;
1277 			break;
1278 		case CGSHandler::ALPHA_TEST_GREATER:
1279 			alphaTestResult = alphaUint > alphaRef;
1280 			break;
1281 		case CGSHandler::ALPHA_TEST_NOTEQUAL:
1282 			alphaTestResult = alphaUint != alphaRef;
1283 			break;
1284 		}
1285 
1286 		BeginIf(b, !alphaTestResult);
1287 		{
1288 			switch(caps.alphaTestFailAction)
1289 			{
1290 			default:
1291 				assert(false);
1292 			case CGSHandler::ALPHA_TEST_FAIL_KEEP:
1293 				writeColor = NewBool(b, false);
1294 				writeDepth = NewBool(b, false);
1295 				break;
1296 			case CGSHandler::ALPHA_TEST_FAIL_FBONLY:
1297 				writeDepth = NewBool(b, false);
1298 				break;
1299 			case CGSHandler::ALPHA_TEST_FAIL_ZBONLY:
1300 				writeColor = NewBool(b, false);
1301 				break;
1302 			case CGSHandler::ALPHA_TEST_FAIL_RGBONLY:
1303 				writeDepth = NewBool(b, false);
1304 				writeAlpha = NewBool(b, false);
1305 				break;
1306 			}
1307 		}
1308 		EndIf(b);
1309 
1310 		if(caps.hasFog)
1311 		{
1312 			auto fogMixColor = Mix(textureColor->xyz(), fogColor->xyz(), inputFog->xxx());
1313 			textureColor = NewFloat4(fogMixColor, textureColor->w());
1314 		}
1315 
1316 		auto screenPos = ToInt(inputPosition->xy());
1317 
1318 		auto fbAddress = CIntLvalue(b.CreateTemporaryInt());
1319 		auto depthAddress = CIntLvalue(b.CreateTemporaryInt());
1320 
1321 		switch(caps.framebufferFormat)
1322 		{
1323 		default:
1324 			assert(false);
1325 		case CGSHandler::PSMCT32:
1326 		case CGSHandler::PSMCT24:
1327 		case CGSHandler::PSMZ24:
1328 			fbAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT32>(
1329 			    b, fbSwizzleTable, fbBufAddress, fbBufWidth, screenPos);
1330 			break;
1331 		case CGSHandler::PSMCT16:
1332 		case CGSHandler::PSMCT16S:
1333 			fbAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMCT16>(
1334 			    b, fbSwizzleTable, fbBufAddress, fbBufWidth, screenPos);
1335 			break;
1336 		}
1337 
1338 		switch(caps.depthbufferFormat)
1339 		{
1340 		default:
1341 			assert(false);
1342 		case CGSHandler::PSMZ32:
1343 		case CGSHandler::PSMZ24:
1344 			depthAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMZ32>(
1345 			    b, depthSwizzleTable, depthBufAddress, depthBufWidth, screenPos);
1346 			break;
1347 		case CGSHandler::PSMZ16:
1348 		case CGSHandler::PSMZ16S:
1349 			//TODO: Use real swizzle table
1350 			depthAddress = CMemoryUtils::GetPixelAddress<CGsPixelFormats::STORAGEPSMZ16>(
1351 			    b, depthSwizzleTable, depthBufAddress, depthBufWidth, screenPos);
1352 			break;
1353 		}
1354 
1355 		//Prevent writing out of bounds (seems to cause wierd issues
1356 		//on Intel GPUs with games such as SNK vs. Capcom: SVC Chaos)
1357 		fbAddress = fbAddress & NewInt(b, CGSHandler::RAMSIZE - 1);
1358 		depthAddress = depthAddress & NewInt(b, CGSHandler::RAMSIZE - 1);
1359 
1360 		BeginInvocationInterlock(b);
1361 
1362 		auto dstPixel = CUintLvalue(b.CreateVariableUint("dstPixel"));
1363 		auto dstColor = CFloat4Lvalue(b.CreateVariableFloat("dstColor"));
1364 		auto dstAlpha = CFloat4Lvalue(b.CreateVariableFloat("dstAlpha"));
1365 		auto dstDepth = CUintLvalue(b.CreateVariableUint("dstDepth"));
1366 
1367 		bool needsDstColor = (caps.hasAlphaBlending != 0) || (caps.maskColor != 0) || canDiscardAlpha || (caps.hasDstAlphaTest != 0);
1368 		if(needsDstColor)
1369 		{
1370 			switch(caps.framebufferFormat)
1371 			{
1372 			default:
1373 				assert(false);
1374 			case CGSHandler::PSMCT32:
1375 			{
1376 				dstPixel = CMemoryUtils::Memory_Read32(b, memoryBuffer, fbAddress);
1377 				dstColor = CMemoryUtils::PSM32ToVec4(b, dstPixel);
1378 			}
1379 			break;
1380 			case CGSHandler::PSMCT24:
1381 			{
1382 				dstPixel = CMemoryUtils::Memory_Read24(b, memoryBuffer, fbAddress);
1383 				dstColor = CMemoryUtils::PSM32ToVec4(b, dstPixel);
1384 			}
1385 			break;
1386 			case CGSHandler::PSMCT16:
1387 			case CGSHandler::PSMCT16S:
1388 			{
1389 				dstPixel = CMemoryUtils::Memory_Read16(b, memoryBuffer, fbAddress);
1390 				dstColor = CMemoryUtils::PSM16ToVec4(b, dstPixel);
1391 			}
1392 			break;
1393 			}
1394 
1395 			if(canDiscardAlpha)
1396 			{
1397 				dstAlpha = dstColor->wwww();
1398 			}
1399 		}
1400 		else
1401 		{
1402 			dstPixel = NewUint(b, 0);
1403 		}
1404 
1405 		if(caps.hasDstAlphaTest)
1406 		{
1407 			DestinationAlphaTest(b, caps.framebufferFormat, caps.dstAlphaTestRef, dstPixel, writeColor, writeDepth);
1408 		}
1409 
1410 		bool needsDstDepth = (caps.depthTestFunction == CGSHandler::DEPTH_TEST_GEQUAL) ||
1411 		                     (caps.depthTestFunction == CGSHandler::DEPTH_TEST_GREATER);
1412 		if(needsDstDepth)
1413 		{
1414 			dstDepth = GetDepth(b, caps.depthbufferFormat, depthAddress, memoryBuffer);
1415 		}
1416 
1417 		auto depthTestResult = CBoolLvalue(b.CreateTemporaryBool());
1418 		switch(caps.depthTestFunction)
1419 		{
1420 		case CGSHandler::DEPTH_TEST_ALWAYS:
1421 			depthTestResult = NewBool(b, true);
1422 			break;
1423 		case CGSHandler::DEPTH_TEST_NEVER:
1424 			depthTestResult = NewBool(b, false);
1425 			break;
1426 		case CGSHandler::DEPTH_TEST_GEQUAL:
1427 			depthTestResult = srcDepth >= dstDepth;
1428 			break;
1429 		case CGSHandler::DEPTH_TEST_GREATER:
1430 			depthTestResult = srcDepth > dstDepth;
1431 			break;
1432 		}
1433 
1434 		BeginIf(b, !depthTestResult);
1435 		{
1436 			writeColor = NewBool(b, false);
1437 			writeDepth = NewBool(b, false);
1438 		}
1439 		EndIf(b);
1440 
1441 		if(caps.hasAlphaBlending)
1442 		{
1443 			//Blend
1444 			auto alphaA = GetAlphaABD(b, caps.alphaA, textureColor, dstColor);
1445 			auto alphaB = GetAlphaABD(b, caps.alphaB, textureColor, dstColor);
1446 			auto alphaC = GetAlphaC(b, caps.alphaC, textureColor, dstColor, alphaFix);
1447 			auto alphaD = GetAlphaABD(b, caps.alphaD, textureColor, dstColor);
1448 
1449 			auto blendedColor = ((alphaA - alphaB) * alphaC * NewFloat3(b, 2, 2, 2)) + alphaD;
1450 			auto finalColor = NewFloat4(blendedColor, textureColor->w());
1451 			dstColor = Clamp(finalColor, NewFloat4(b, 0, 0, 0, 0), NewFloat4(b, 1, 1, 1, 1));
1452 		}
1453 		else
1454 		{
1455 			dstColor = textureColor->xyzw();
1456 		}
1457 
1458 		if(canDiscardAlpha)
1459 		{
1460 			BeginIf(b, !writeAlpha);
1461 			{
1462 				dstColor = NewFloat4(dstColor->xyz(), dstAlpha->x());
1463 			}
1464 			EndIf(b);
1465 		}
1466 
1467 		BeginIf(b, writeColor);
1468 		{
1469 			WriteToFramebuffer(b, caps.framebufferFormat, memoryBuffer, fbAddress, fbWriteMask, dstPixel, dstColor);
1470 		}
1471 		EndIf(b);
1472 
1473 		if(caps.writeDepth)
1474 		{
1475 			BeginIf(b, writeDepth);
1476 			{
1477 				WriteToDepthbuffer(b, caps.depthbufferFormat, memoryBuffer, depthAddress, srcDepth);
1478 			}
1479 			EndIf(b);
1480 		}
1481 
1482 		EndInvocationInterlock(b);
1483 
1484 		outputColor = dstColor->xyzw();
1485 	}
1486 
1487 	Framework::CMemStream shaderStream;
1488 	Nuanceur::CSpirvShaderGenerator::Generate(shaderStream, b, Nuanceur::CSpirvShaderGenerator::SHADER_TYPE_FRAGMENT);
1489 	shaderStream.Seek(0, Framework::STREAM_SEEK_SET);
1490 	return Framework::Vulkan::CShaderModule(m_context->device, shaderStream);
1491 }
1492 
CreateDrawImage()1493 void CDraw::CreateDrawImage()
1494 {
1495 	//This image is needed for MoltenVK/Metal which seem to discard pixels
1496 	//that don't write to any color attachment
1497 
1498 	m_drawImage = Framework::Vulkan::CImage(m_context->device, m_context->physicalDeviceMemoryProperties,
1499 	                                        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, VK_FORMAT_R8G8B8A8_UNORM, DRAW_AREA_SIZE, DRAW_AREA_SIZE);
1500 
1501 	m_drawImage.SetLayout(m_context->queue, m_context->commandBufferPool,
1502 	                      VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1503 
1504 	m_drawImageView = m_drawImage.CreateImageView();
1505 }
1506