1 // Copyright 2017 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #include "VideoCommon/TextureConverterShaderGen.h"
6
7 #include "Common/Assert.h"
8 #include "Common/CommonTypes.h"
9 #include "VideoCommon/BPMemory.h"
10 #include "VideoCommon/VideoCommon.h"
11 #include "VideoCommon/VideoConfig.h"
12
13 namespace TextureConversionShaderGen
14 {
GetShaderUid(EFBCopyFormat dst_format,bool is_depth_copy,bool is_intensity,bool scale_by_half,bool copy_filter)15 TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
16 bool scale_by_half, bool copy_filter)
17 {
18 TCShaderUid out;
19
20 UidData* const uid_data = out.GetUidData();
21 uid_data->dst_format = dst_format;
22 uid_data->efb_has_alpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
23 uid_data->is_depth_copy = is_depth_copy;
24 uid_data->is_intensity = is_intensity;
25 uid_data->scale_by_half = scale_by_half;
26 uid_data->copy_filter = copy_filter;
27
28 return out;
29 }
30
WriteHeader(APIType api_type,ShaderCode & out)31 static void WriteHeader(APIType api_type, ShaderCode& out)
32 {
33 if (api_type == APIType::D3D)
34 {
35 out.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
36 " float2 src_offset, src_size;\n"
37 " float3 filter_coefficients;\n"
38 " float gamma_rcp;\n"
39 " float2 clamp_tb;\n"
40 " float pixel_height;\n"
41 "}};\n\n");
42 }
43 else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
44 {
45 out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
46 " float2 src_offset, src_size;\n"
47 " float3 filter_coefficients;\n"
48 " float gamma_rcp;\n"
49 " float2 clamp_tb;\n"
50 " float pixel_height;\n"
51 "}};\n");
52 }
53 }
54
GenerateVertexShader(APIType api_type)55 ShaderCode GenerateVertexShader(APIType api_type)
56 {
57 ShaderCode out;
58 WriteHeader(api_type, out);
59
60 if (api_type == APIType::D3D)
61 {
62 out.WriteFmt("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
63 " out float4 opos : SV_Position) {{\n");
64 }
65 else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
66 {
67 if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
68 {
69 out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"
70 " float3 v_tex0;\n"
71 "}};\n");
72 }
73 else
74 {
75 out.WriteFmt("VARYING_LOCATION(0) out float3 v_tex0;\n");
76 }
77 out.WriteFmt("#define id gl_VertexID\n"
78 "#define opos gl_Position\n"
79 "void main() {{\n");
80 }
81 out.WriteFmt(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
82 out.WriteFmt(
83 " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
84 out.WriteFmt(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
85
86 // NDC space is flipped in Vulkan
87 if (api_type == APIType::Vulkan)
88 out.WriteFmt(" opos.y = -opos.y;\n");
89
90 out.WriteFmt("}}\n");
91
92 return out;
93 }
94
GeneratePixelShader(APIType api_type,const UidData * uid_data)95 ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
96 {
97 const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
98
99 ShaderCode out;
100 WriteHeader(api_type, out);
101
102 if (api_type == APIType::D3D)
103 {
104 out.WriteFmt("Texture2DArray tex0 : register(t0);\n"
105 "SamplerState samp0 : register(s0);\n"
106 "float4 SampleEFB(float3 uv, float y_offset) {{\n"
107 " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
108 "clamp_tb.x, clamp_tb.y), {}));\n"
109 "}}\n\n",
110 mono_depth ? "0.0" : "uv.z");
111 out.WriteFmt("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
112 }
113 else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
114 {
115 out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
116 out.WriteFmt("float4 SampleEFB(float3 uv, float y_offset) {{\n"
117 " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
118 "clamp_tb.x, clamp_tb.y), {}));\n"
119 "}}\n",
120 mono_depth ? "0.0" : "uv.z");
121 if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
122 {
123 out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
124 " float3 v_tex0;\n"
125 "}};\n");
126 }
127 else
128 {
129 out.WriteFmt("VARYING_LOCATION(0) in vec3 v_tex0;\n");
130 }
131 out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
132 "void main()\n{{\n");
133 }
134
135 // The copy filter applies to both color and depth copies. This has been verified on hardware.
136 // The filter is only applied to the RGB channels, the alpha channel is left intact.
137 if (uid_data->copy_filter)
138 {
139 out.WriteFmt(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
140 " float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
141 " float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
142 " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
143 " current_row.rgb * filter_coefficients[1] +\n"
144 " next_row.rgb * filter_coefficients[2], \n"
145 " float3(1, 1, 1)), current_row.a);\n");
146 }
147 else
148 {
149 out.WriteFmt(
150 " float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
151 " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
152 " current_row.a);\n");
153 }
154
155 if (uid_data->is_depth_copy)
156 {
157 if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
158 out.WriteFmt("texcol.x = 1.0 - texcol.x;\n");
159
160 out.WriteFmt(" int depth = int(texcol.x * 16777216.0);\n"
161
162 // Convert to Z24 format
163 " int4 workspace;\n"
164 " workspace.r = (depth >> 16) & 255;\n"
165 " workspace.g = (depth >> 8) & 255;\n"
166 " workspace.b = depth & 255;\n"
167
168 // Convert to Z4 format
169 " workspace.a = (depth >> 16) & 0xF0;\n"
170
171 // Normalize components to [0.0..1.0]
172 " texcol = float4(workspace) / 255.0;\n");
173 switch (uid_data->dst_format)
174 {
175 case EFBCopyFormat::R4: // Z4
176 out.WriteFmt(" ocol0 = texcol.aaaa;\n");
177 break;
178
179 case EFBCopyFormat::R8_0x1: // Z8
180 case EFBCopyFormat::R8: // Z8H
181 out.WriteFmt(" ocol0 = texcol.rrrr;\n");
182 break;
183
184 case EFBCopyFormat::RA8: // Z16
185 out.WriteFmt(" ocol0 = texcol.gggr;\n");
186 break;
187
188 case EFBCopyFormat::RG8: // Z16 (reverse order)
189 out.WriteFmt(" ocol0 = texcol.rrrg;\n");
190 break;
191
192 case EFBCopyFormat::RGBA8: // Z24X8
193 out.WriteFmt(" ocol0 = float4(texcol.rgb, 1.0);\n");
194 break;
195
196 case EFBCopyFormat::G8: // Z8M
197 out.WriteFmt(" ocol0 = texcol.gggg;\n");
198 break;
199
200 case EFBCopyFormat::B8: // Z8L
201 out.WriteFmt(" ocol0 = texcol.bbbb;\n");
202 break;
203
204 case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
205 // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
206 // stored as alpha)
207 // Used e.g. in Zelda: Skyward Sword
208 out.WriteFmt(" ocol0 = texcol.gggb;\n");
209 break;
210
211 default:
212 ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
213 out.WriteFmt(" ocol0 = float4(texcol.bgr, 0.0);\n");
214 break;
215 }
216 }
217 else if (uid_data->is_intensity)
218 {
219 bool has_four_bits =
220 (uid_data->dst_format == EFBCopyFormat::R4 || uid_data->dst_format == EFBCopyFormat::RA4);
221 bool has_alpha =
222 (uid_data->dst_format == EFBCopyFormat::RA4 || uid_data->dst_format == EFBCopyFormat::RA8);
223
224 switch (uid_data->dst_format)
225 {
226 case EFBCopyFormat::R4: // I4
227 case EFBCopyFormat::R8_0x1: // I8
228 case EFBCopyFormat::R8: // I8
229 case EFBCopyFormat::RA4: // IA4
230 case EFBCopyFormat::RA8: // IA8
231 if (has_four_bits)
232 out.WriteFmt(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
233
234 // TODO - verify these coefficients
235 out.WriteFmt(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
236 " float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
237 " ocol0 = float4(intensity, intensity, intensity, {});\n",
238 has_alpha ? "texcol.a" : "intensity");
239 break;
240
241 default:
242 ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X",
243 static_cast<int>(uid_data->dst_format));
244 out.WriteFmt(" ocol0 = texcol;\n");
245 break;
246 }
247 }
248 else
249 {
250 if (!uid_data->efb_has_alpha)
251 out.WriteFmt(" texcol.a = 1.0;\n");
252
253 switch (uid_data->dst_format)
254 {
255 case EFBCopyFormat::R4: // R4
256 out.WriteFmt(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
257 " ocol0 = float4(red, red, red, red);\n");
258 break;
259
260 case EFBCopyFormat::R8_0x1: // R8
261 case EFBCopyFormat::R8: // R8
262 out.WriteFmt(" ocol0 = texcol.rrrr;\n");
263 break;
264
265 case EFBCopyFormat::RA4: // RA4
266 out.WriteFmt(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
267 " ocol0 = red_alpha.rrrg;\n");
268 break;
269
270 case EFBCopyFormat::RA8: // RA8
271 out.WriteFmt(" ocol0 = texcol.rrra;\n");
272 break;
273
274 case EFBCopyFormat::A8: // A8
275 out.WriteFmt(" ocol0 = texcol.aaaa;\n");
276 break;
277
278 case EFBCopyFormat::G8: // G8
279 out.WriteFmt(" ocol0 = texcol.gggg;\n");
280 break;
281
282 case EFBCopyFormat::B8: // B8
283 out.WriteFmt(" ocol0 = texcol.bbbb;\n");
284 break;
285
286 case EFBCopyFormat::RG8: // RG8
287 out.WriteFmt(" ocol0 = texcol.rrrg;\n");
288 break;
289
290 case EFBCopyFormat::GB8: // GB8
291 out.WriteFmt(" ocol0 = texcol.gggb;\n");
292 break;
293
294 case EFBCopyFormat::RGB565: // RGB565
295 out.WriteFmt(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
296 " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
297 " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
298 break;
299
300 case EFBCopyFormat::RGB5A3: // RGB5A3
301 // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
302 // will need to be implemented once we move away from floats.
303 out.WriteFmt(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
304 " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
305 " ocol0 = float4(color, alpha);\n");
306 break;
307
308 case EFBCopyFormat::RGBA8: // RGBA8
309 out.WriteFmt(" ocol0 = texcol;\n");
310 break;
311
312 case EFBCopyFormat::XFB:
313 out.WriteFmt(
314 " ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n");
315 break;
316
317 default:
318 ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(uid_data->dst_format));
319 out.WriteFmt(" ocol0 = texcol;\n");
320 break;
321 }
322 }
323
324 out.WriteFmt("}}\n");
325
326 return out;
327 }
328
329 } // namespace TextureConversionShaderGen
330