1 // Copyright 2017 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include "VideoCommon/TextureConverterShaderGen.h"
6 
7 #include "Common/Assert.h"
8 #include "Common/CommonTypes.h"
9 #include "VideoCommon/BPMemory.h"
10 #include "VideoCommon/VideoCommon.h"
11 #include "VideoCommon/VideoConfig.h"
12 
13 namespace TextureConversionShaderGen
14 {
GetShaderUid(EFBCopyFormat dst_format,bool is_depth_copy,bool is_intensity,bool scale_by_half,bool copy_filter)15 TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
16                          bool scale_by_half, bool copy_filter)
17 {
18   TCShaderUid out;
19 
20   UidData* const uid_data = out.GetUidData();
21   uid_data->dst_format = dst_format;
22   uid_data->efb_has_alpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
23   uid_data->is_depth_copy = is_depth_copy;
24   uid_data->is_intensity = is_intensity;
25   uid_data->scale_by_half = scale_by_half;
26   uid_data->copy_filter = copy_filter;
27 
28   return out;
29 }
30 
WriteHeader(APIType api_type,ShaderCode & out)31 static void WriteHeader(APIType api_type, ShaderCode& out)
32 {
33   if (api_type == APIType::D3D)
34   {
35     out.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
36                  "  float2 src_offset, src_size;\n"
37                  "  float3 filter_coefficients;\n"
38                  "  float gamma_rcp;\n"
39                  "  float2 clamp_tb;\n"
40                  "  float pixel_height;\n"
41                  "}};\n\n");
42   }
43   else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
44   {
45     out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
46                  "  float2 src_offset, src_size;\n"
47                  "  float3 filter_coefficients;\n"
48                  "  float gamma_rcp;\n"
49                  "  float2 clamp_tb;\n"
50                  "  float pixel_height;\n"
51                  "}};\n");
52   }
53 }
54 
GenerateVertexShader(APIType api_type)55 ShaderCode GenerateVertexShader(APIType api_type)
56 {
57   ShaderCode out;
58   WriteHeader(api_type, out);
59 
60   if (api_type == APIType::D3D)
61   {
62     out.WriteFmt("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
63                  "          out float4 opos : SV_Position) {{\n");
64   }
65   else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
66   {
67     if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
68     {
69       out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"
70                    "  float3 v_tex0;\n"
71                    "}};\n");
72     }
73     else
74     {
75       out.WriteFmt("VARYING_LOCATION(0) out float3 v_tex0;\n");
76     }
77     out.WriteFmt("#define id gl_VertexID\n"
78                  "#define opos gl_Position\n"
79                  "void main() {{\n");
80   }
81   out.WriteFmt("  v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
82   out.WriteFmt(
83       "  opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
84   out.WriteFmt("  v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
85 
86   // NDC space is flipped in Vulkan
87   if (api_type == APIType::Vulkan)
88     out.WriteFmt("  opos.y = -opos.y;\n");
89 
90   out.WriteFmt("}}\n");
91 
92   return out;
93 }
94 
GeneratePixelShader(APIType api_type,const UidData * uid_data)95 ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
96 {
97   const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
98 
99   ShaderCode out;
100   WriteHeader(api_type, out);
101 
102   if (api_type == APIType::D3D)
103   {
104     out.WriteFmt("Texture2DArray tex0 : register(t0);\n"
105                  "SamplerState samp0 : register(s0);\n"
106                  "float4 SampleEFB(float3 uv, float y_offset) {{\n"
107                  "  return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
108                  "clamp_tb.x, clamp_tb.y), {}));\n"
109                  "}}\n\n",
110                  mono_depth ? "0.0" : "uv.z");
111     out.WriteFmt("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
112   }
113   else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
114   {
115     out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
116     out.WriteFmt("float4 SampleEFB(float3 uv, float y_offset) {{\n"
117                  "  return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
118                  "clamp_tb.x, clamp_tb.y), {}));\n"
119                  "}}\n",
120                  mono_depth ? "0.0" : "uv.z");
121     if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
122     {
123       out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
124                    "  float3 v_tex0;\n"
125                    "}};\n");
126     }
127     else
128     {
129       out.WriteFmt("VARYING_LOCATION(0) in vec3 v_tex0;\n");
130     }
131     out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
132                  "void main()\n{{\n");
133   }
134 
135   // The copy filter applies to both color and depth copies. This has been verified on hardware.
136   // The filter is only applied to the RGB channels, the alpha channel is left intact.
137   if (uid_data->copy_filter)
138   {
139     out.WriteFmt("  float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
140                  "  float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
141                  "  float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
142                  "  float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
143                  "                               current_row.rgb * filter_coefficients[1] +\n"
144                  "                               next_row.rgb * filter_coefficients[2], \n"
145                  "                             float3(1, 1, 1)), current_row.a);\n");
146   }
147   else
148   {
149     out.WriteFmt(
150         "  float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
151         "  float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
152         "                         current_row.a);\n");
153   }
154 
155   if (uid_data->is_depth_copy)
156   {
157     if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
158       out.WriteFmt("texcol.x = 1.0 - texcol.x;\n");
159 
160     out.WriteFmt("  int depth = int(texcol.x * 16777216.0);\n"
161 
162                  // Convert to Z24 format
163                  "  int4 workspace;\n"
164                  "  workspace.r = (depth >> 16) & 255;\n"
165                  "  workspace.g = (depth >> 8) & 255;\n"
166                  "  workspace.b = depth & 255;\n"
167 
168                  // Convert to Z4 format
169                  "  workspace.a = (depth >> 16) & 0xF0;\n"
170 
171                  // Normalize components to [0.0..1.0]
172                  "  texcol = float4(workspace) / 255.0;\n");
173     switch (uid_data->dst_format)
174     {
175     case EFBCopyFormat::R4:  // Z4
176       out.WriteFmt("  ocol0 = texcol.aaaa;\n");
177       break;
178 
179     case EFBCopyFormat::R8_0x1:  // Z8
180     case EFBCopyFormat::R8:      // Z8H
181       out.WriteFmt("  ocol0 = texcol.rrrr;\n");
182       break;
183 
184     case EFBCopyFormat::RA8:  // Z16
185       out.WriteFmt("  ocol0 = texcol.gggr;\n");
186       break;
187 
188     case EFBCopyFormat::RG8:  // Z16 (reverse order)
189       out.WriteFmt("  ocol0 = texcol.rrrg;\n");
190       break;
191 
192     case EFBCopyFormat::RGBA8:  // Z24X8
193       out.WriteFmt("  ocol0 = float4(texcol.rgb, 1.0);\n");
194       break;
195 
196     case EFBCopyFormat::G8:  // Z8M
197       out.WriteFmt("  ocol0 = texcol.gggg;\n");
198       break;
199 
200     case EFBCopyFormat::B8:  // Z8L
201       out.WriteFmt("  ocol0 = texcol.bbbb;\n");
202       break;
203 
204     case EFBCopyFormat::GB8:  // Z16L - copy lower 16 depth bits
205       // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
206       // stored as alpha)
207       // Used e.g. in Zelda: Skyward Sword
208       out.WriteFmt("  ocol0 = texcol.gggb;\n");
209       break;
210 
211     default:
212       ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
213       out.WriteFmt("  ocol0 = float4(texcol.bgr, 0.0);\n");
214       break;
215     }
216   }
217   else if (uid_data->is_intensity)
218   {
219     bool has_four_bits =
220         (uid_data->dst_format == EFBCopyFormat::R4 || uid_data->dst_format == EFBCopyFormat::RA4);
221     bool has_alpha =
222         (uid_data->dst_format == EFBCopyFormat::RA4 || uid_data->dst_format == EFBCopyFormat::RA8);
223 
224     switch (uid_data->dst_format)
225     {
226     case EFBCopyFormat::R4:      // I4
227     case EFBCopyFormat::R8_0x1:  // I8
228     case EFBCopyFormat::R8:      // I8
229     case EFBCopyFormat::RA4:     // IA4
230     case EFBCopyFormat::RA8:     // IA8
231       if (has_four_bits)
232         out.WriteFmt("  texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
233 
234       // TODO - verify these coefficients
235       out.WriteFmt("  const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
236                    "  float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
237                    "  ocol0 = float4(intensity, intensity, intensity, {});\n",
238                    has_alpha ? "texcol.a" : "intensity");
239       break;
240 
241     default:
242       ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X",
243                 static_cast<int>(uid_data->dst_format));
244       out.WriteFmt("  ocol0 = texcol;\n");
245       break;
246     }
247   }
248   else
249   {
250     if (!uid_data->efb_has_alpha)
251       out.WriteFmt("  texcol.a = 1.0;\n");
252 
253     switch (uid_data->dst_format)
254     {
255     case EFBCopyFormat::R4:  // R4
256       out.WriteFmt("  float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
257                    "  ocol0 = float4(red, red, red, red);\n");
258       break;
259 
260     case EFBCopyFormat::R8_0x1:  // R8
261     case EFBCopyFormat::R8:      // R8
262       out.WriteFmt("  ocol0 = texcol.rrrr;\n");
263       break;
264 
265     case EFBCopyFormat::RA4:  // RA4
266       out.WriteFmt("  float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
267                    "  ocol0 = red_alpha.rrrg;\n");
268       break;
269 
270     case EFBCopyFormat::RA8:  // RA8
271       out.WriteFmt("  ocol0 = texcol.rrra;\n");
272       break;
273 
274     case EFBCopyFormat::A8:  // A8
275       out.WriteFmt("  ocol0 = texcol.aaaa;\n");
276       break;
277 
278     case EFBCopyFormat::G8:  // G8
279       out.WriteFmt("  ocol0 = texcol.gggg;\n");
280       break;
281 
282     case EFBCopyFormat::B8:  // B8
283       out.WriteFmt("  ocol0 = texcol.bbbb;\n");
284       break;
285 
286     case EFBCopyFormat::RG8:  // RG8
287       out.WriteFmt("  ocol0 = texcol.rrrg;\n");
288       break;
289 
290     case EFBCopyFormat::GB8:  // GB8
291       out.WriteFmt("  ocol0 = texcol.gggb;\n");
292       break;
293 
294     case EFBCopyFormat::RGB565:  // RGB565
295       out.WriteFmt("  float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
296                    "  float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
297                    "  ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
298       break;
299 
300     case EFBCopyFormat::RGB5A3:  // RGB5A3
301       // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
302       // will need to be implemented once we move away from floats.
303       out.WriteFmt("  float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
304                    "  float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
305                    "  ocol0 = float4(color, alpha);\n");
306       break;
307 
308     case EFBCopyFormat::RGBA8:  // RGBA8
309       out.WriteFmt("  ocol0 = texcol;\n");
310       break;
311 
312     case EFBCopyFormat::XFB:
313       out.WriteFmt(
314           "  ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n");
315       break;
316 
317     default:
318       ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(uid_data->dst_format));
319       out.WriteFmt("  ocol0 = texcol;\n");
320       break;
321     }
322   }
323 
324   out.WriteFmt("}}\n");
325 
326   return out;
327 }
328 
329 }  // namespace TextureConversionShaderGen
330