1 // Copyright 2015 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #include "VideoCommon/UberShaderVertex.h"
6
7 #include "VideoCommon/DriverDetails.h"
8 #include "VideoCommon/NativeVertexFormat.h"
9 #include "VideoCommon/UberShaderCommon.h"
10 #include "VideoCommon/VertexShaderGen.h"
11 #include "VideoCommon/VideoCommon.h"
12 #include "VideoCommon/XFMemory.h"
13
14 namespace UberShader
15 {
GetVertexShaderUid()16 VertexShaderUid GetVertexShaderUid()
17 {
18 VertexShaderUid out;
19
20 vertex_ubershader_uid_data* const uid_data = out.GetUidData();
21 uid_data->num_texgens = xfmem.numTexGen.numTexGens;
22
23 return out;
24 }
25
26 static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out);
27
GenVertexShader(APIType ApiType,const ShaderHostConfig & host_config,const vertex_ubershader_uid_data * uid_data)28 ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
29 const vertex_ubershader_uid_data* uid_data)
30 {
31 const bool msaa = host_config.msaa;
32 const bool ssaa = host_config.ssaa;
33 const bool per_pixel_lighting = host_config.per_pixel_lighting;
34 const bool vertex_rounding = host_config.vertex_rounding;
35 const u32 numTexgen = uid_data->num_texgens;
36 ShaderCode out;
37
38 out.Write("// Vertex UberShader\n\n");
39 out.Write("%s", s_lighting_struct);
40
41 // uniforms
42 if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
43 out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
44 else
45 out.Write("cbuffer VSBlock {\n");
46 out.Write(s_shader_uniforms);
47 out.Write("};\n");
48
49 out.Write("struct VS_OUTPUT {\n");
50 GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, "");
51 out.Write("};\n\n");
52
53 WriteUberShaderCommonHeader(out, ApiType, host_config);
54 WriteLightingFunction(out);
55
56 if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
57 {
58 out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
59 out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
60 out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
61 out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
62 out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
63 out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
64 out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
65 for (int i = 0; i < 8; ++i)
66 out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i, i);
67
68 if (host_config.backend_geometry_shaders)
69 {
70 out.Write("VARYING_LOCATION(0) out VertexData {\n");
71 GenerateVSOutputMembers(out, ApiType, numTexgen, host_config,
72 GetInterpolationQualifier(msaa, ssaa, true, false));
73 out.Write("} vs;\n");
74 }
75 else
76 {
77 // Let's set up attributes
78 u32 counter = 0;
79 out.Write("VARYING_LOCATION(%u) %s out float4 colors_0;\n", counter++,
80 GetInterpolationQualifier(msaa, ssaa));
81 out.Write("VARYING_LOCATION(%u) %s out float4 colors_1;\n", counter++,
82 GetInterpolationQualifier(msaa, ssaa));
83 for (u32 i = 0; i < numTexgen; ++i)
84 {
85 out.Write("VARYING_LOCATION(%u) %s out float3 tex%u;\n", counter++,
86 GetInterpolationQualifier(msaa, ssaa), i);
87 }
88 if (!host_config.fast_depth_calc)
89 {
90 out.Write("VARYING_LOCATION(%u) %s out float4 clipPos;\n", counter++,
91 GetInterpolationQualifier(msaa, ssaa));
92 }
93 if (per_pixel_lighting)
94 {
95 out.Write("VARYING_LOCATION(%u) %s out float3 Normal;\n", counter++,
96 GetInterpolationQualifier(msaa, ssaa));
97 out.Write("VARYING_LOCATION(%u) %s out float3 WorldPos;\n", counter++,
98 GetInterpolationQualifier(msaa, ssaa));
99 }
100 }
101
102 out.Write("void main()\n{\n");
103 }
104 else // D3D
105 {
106 out.Write("VS_OUTPUT main(\n");
107
108 // inputs
109 out.Write(" float3 rawnorm0 : NORMAL0,\n");
110 out.Write(" float3 rawnorm1 : NORMAL1,\n");
111 out.Write(" float3 rawnorm2 : NORMAL2,\n");
112 out.Write(" float4 rawcolor0 : COLOR0,\n");
113 out.Write(" float4 rawcolor1 : COLOR1,\n");
114 for (int i = 0; i < 8; ++i)
115 out.Write(" float3 rawtex%d : TEXCOORD%d,\n", i, i);
116 out.Write(" uint posmtx : BLENDINDICES,\n");
117 out.Write(" float4 rawpos : POSITION) {\n");
118 }
119
120 out.Write("VS_OUTPUT o;\n"
121 "\n");
122
123 // Transforms
124 out.Write("// Position matrix\n"
125 "float4 P0;\n"
126 "float4 P1;\n"
127 "float4 P2;\n"
128 "\n"
129 "// Normal matrix\n"
130 "float3 N0;\n"
131 "float3 N1;\n"
132 "float3 N2;\n"
133 "\n"
134 "if ((components & %uu) != 0u) {// VB_HAS_POSMTXIDX\n",
135 VB_HAS_POSMTXIDX);
136 out.Write(" // Vertex format has a per-vertex matrix\n"
137 " int posidx = int(posmtx.r);\n"
138 " P0 = " I_TRANSFORMMATRICES "[posidx];\n"
139 " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
140 " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
141 "\n"
142 " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
143 " N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
144 " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
145 " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
146 "} else {\n"
147 " // One shared matrix\n"
148 " P0 = " I_POSNORMALMATRIX "[0];\n"
149 " P1 = " I_POSNORMALMATRIX "[1];\n"
150 " P2 = " I_POSNORMALMATRIX "[2];\n"
151 " N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
152 " N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
153 " N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
154 "}\n"
155 "\n"
156 "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
157 "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
158 "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
159 "\n"
160 "// Only the first normal gets normalized (TODO: why?)\n"
161 "float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
162 "if ((components & %uu) != 0u) // VB_HAS_NRM0\n",
163 VB_HAS_NRM0);
164 out.Write(
165 " _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
166 "\n"
167 "float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
168 "if ((components & %uu) != 0u) // VB_HAS_NRM1\n",
169 VB_HAS_NRM1);
170 out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
171 "\n"
172 "float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
173 "if ((components & %uu) != 0u) // VB_HAS_NRM2\n",
174 VB_HAS_NRM2);
175 out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
176 "\n");
177
178 // Hardware Lighting
179 WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
180 "o.colors_1");
181
182 // Texture Coordinates
183 if (numTexgen > 0)
184 GenVertexShaderTexGens(ApiType, numTexgen, out);
185
186 out.Write("if (xfmem_numColorChans == 0u) {\n");
187 out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL0);
188 out.Write(" o.colors_0 = rawcolor0;\n");
189 out.Write(" else\n");
190 out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n");
191 out.Write("}\n");
192 out.Write("if (xfmem_numColorChans < 2u) {\n");
193 out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL1);
194 out.Write(" o.colors_0 = rawcolor1;\n");
195 out.Write(" else\n");
196 out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n");
197 out.Write("}\n");
198
199 if (!host_config.fast_depth_calc)
200 {
201 // clipPos/w needs to be done in pixel shader, not here
202 out.Write("o.clipPos = o.pos;\n");
203 }
204
205 if (per_pixel_lighting)
206 {
207 out.Write("o.Normal = _norm0;\n");
208 out.Write("o.WorldPos = pos.xyz;\n");
209 out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
210 out.Write(" o.colors_0 = rawcolor0;\n");
211 out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1);
212 out.Write(" o.colors_1 = rawcolor1;\n");
213 }
214
215 // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
216 // our own depth clipping and calculate the depth range before the perspective divide if
217 // necessary.
218 if (host_config.backend_depth_clamp)
219 {
220 // Since we're adjusting z for the depth range before the perspective divide, we have to do our
221 // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
222 // We adjust our depth value for clipping purposes to match the perspective projection in the
223 // software backend, which is a hack to fix Sonic Adventure and Unleashed games.
224 out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n");
225 out.Write("float clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w
226 out.Write("float clipDist1 = -clipDepth;\n"); // Far: z > 0
227 if (host_config.backend_geometry_shaders)
228 {
229 out.Write("o.clipDist0 = clipDist0;\n");
230 out.Write("o.clipDist1 = clipDist1;\n");
231 }
232 }
233
234 // Write the true depth value. If the game uses depth textures, then the pixel shader will
235 // override it with the correct values if not then early z culling will improve speed.
236 // There are two different ways to do this, when the depth range is oversized, we process
237 // the depth range in the vertex shader, if not we let the host driver handle it.
238 //
239 // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
240 // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
241 // We have to handle the depth range in the vertex shader instead of after the perspective
242 // divide, because some games will use a depth range larger than what is allowed by the
243 // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
244 // games effectively add a depth bias to the values written to the depth buffer.
245 out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
246 "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
247
248 if (!host_config.backend_clip_control)
249 {
250 // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
251 // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
252 // operation that can introduce a round-trip error.
253 out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
254 }
255
256 // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
257 // since the viewport height is already negated by the render backend.
258 out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
259
260 // The console GPU places the pixel center at 7/12 in screen space unless
261 // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
262 // in some primitives being placed one pixel too far to the bottom-right,
263 // which in turn can be critical if it happens for clear quads.
264 // Hence, we compensate for this pixel center difference so that primitives
265 // get rasterized correctly.
266 out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
267
268 if (vertex_rounding)
269 {
270 // By now our position is in clip space. However, higher resolutions than the Wii outputs
271 // cause an additional pixel offset. Due to a higher pixel density we need to correct this
272 // by converting our clip-space position into the Wii's screen-space.
273 // Acquire the right pixel and then convert it back.
274 out.Write("if (o.pos.w == 1.0f)\n");
275 out.Write("{\n");
276
277 out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n");
278 out.Write("\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
279
280 out.Write("\tss_pixel_x = round(ss_pixel_x);\n");
281 out.Write("\tss_pixel_y = round(ss_pixel_y);\n");
282
283 out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n");
284 out.Write("\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n");
285 out.Write("}\n");
286 }
287
288 if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
289 {
290 if (host_config.backend_geometry_shaders)
291 {
292 AssignVSOutputMembers(out, "vs", "o", numTexgen, host_config);
293 }
294 else
295 {
296 // TODO: Pass interface blocks between shader stages even if geometry shaders
297 // are not supported, however that will require at least OpenGL 3.2 support.
298 for (u32 i = 0; i < numTexgen; ++i)
299 out.Write("tex%d.xyz = o.tex%d;\n", i, i);
300 if (!host_config.fast_depth_calc)
301 out.Write("clipPos = o.clipPos;\n");
302 if (per_pixel_lighting)
303 {
304 out.Write("Normal = o.Normal;\n");
305 out.Write("WorldPos = o.WorldPos;\n");
306 }
307 out.Write("colors_0 = o.colors_0;\n");
308 out.Write("colors_1 = o.colors_1;\n");
309 }
310
311 if (host_config.backend_depth_clamp)
312 {
313 out.Write("gl_ClipDistance[0] = clipDist0;\n");
314 out.Write("gl_ClipDistance[1] = clipDist1;\n");
315 }
316
317 // Vulkan NDC space has Y pointing down (right-handed NDC space).
318 if (ApiType == APIType::Vulkan)
319 out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
320 else
321 out.Write("gl_Position = o.pos;\n");
322 }
323 else // D3D
324 {
325 out.Write("return o;\n");
326 }
327 out.Write("}\n");
328
329 return out;
330 }
331
GenVertexShaderTexGens(APIType ApiType,u32 numTexgen,ShaderCode & out)332 void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out)
333 {
334 // The HLSL compiler complains that the output texture coordinates are uninitialized when trying
335 // to dynamically index them.
336 for (u32 i = 0; i < numTexgen; i++)
337 out.Write("o.tex%u = float3(0.0, 0.0, 0.0);\n", i);
338
339 out.Write("// Texture coordinate generation\n");
340 if (numTexgen == 1)
341 out.Write("{ const uint texgen = 0u;\n");
342 else
343 out.Write("%sfor (uint texgen = 0u; texgen < %uu; texgen++) {\n",
344 ApiType == APIType::D3D ? "[loop] " : "", numTexgen);
345
346 out.Write(" // Texcoord transforms\n");
347 out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
348 " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
349 out.Write(" switch (%s) {\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow).c_str());
350 out.Write(" case %uu: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
351 out.Write(" coord.xyz = rawpos.xyz;\n");
352 out.Write(" break;\n\n");
353 out.Write(" case %uu: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
354 out.Write(
355 " coord.xyz = ((components & %uu /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
356 VB_HAS_NRM0);
357 out.Write(" break;\n\n");
358 out.Write(" case %uu: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
359 out.Write(
360 " coord.xyz = ((components & %uu /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
361 VB_HAS_NRM1);
362 out.Write(" break;\n\n");
363 out.Write(" case %uu: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
364 out.Write(
365 " coord.xyz = ((components & %uu /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
366 VB_HAS_NRM2);
367 out.Write(" break;\n\n");
368 for (u32 i = 0; i < 8; i++)
369 {
370 out.Write(" case %uu: // XF_SRCTEX%u_INROW\n", XF_SRCTEX0_INROW + i, i);
371 out.Write(
372 " coord = ((components & %uu /* VB_HAS_UV%u */) != 0u) ? float4(rawtex%u.x, rawtex%u.y, "
373 "1.0, 1.0) : coord;\n",
374 VB_HAS_UV0 << i, i, i, i);
375 out.Write(" break;\n\n");
376 }
377 out.Write(" }\n");
378 out.Write("\n");
379
380 out.Write(" // Input form of AB11 sets z element to 1.0\n");
381 out.Write(" if (%s == %uu) // inputform == XF_TEXINPUT_AB11\n",
382 BitfieldExtract("texMtxInfo", TexMtxInfo().inputform).c_str(), XF_TEXINPUT_AB11);
383 out.Write(" coord.z = 1.0f;\n");
384 out.Write("\n");
385
386 out.Write(" // first transformation\n");
387 out.Write(" uint texgentype = %s;\n",
388 BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype).c_str());
389 out.Write(" float3 output_tex;\n"
390 " switch (texgentype)\n"
391 " {\n");
392 out.Write(" case %uu: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
393 out.Write(" {\n");
394 out.Write(" uint light = %s;\n",
395 BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift).c_str());
396 out.Write(" uint source = %s;\n",
397 BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift).c_str());
398 out.Write(" switch (source) {\n");
399 for (u32 i = 0; i < numTexgen; i++)
400 out.Write(" case %uu: output_tex.xyz = o.tex%u; break;\n", i, i);
401 out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
402 " }\n");
403 out.Write(" if ((components & %uu) != 0u) { // VB_HAS_NRM1 | VB_HAS_NRM2\n",
404 VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
405 out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
406 " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
407 " }\n"
408 " }\n"
409 " break;\n\n");
410 out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
411 out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
412 " break;\n\n");
413 out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
414 out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
415 " break;\n\n");
416 out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
417 " {\n");
418 out.Write(" if ((components & (%uu /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {\n",
419 VB_HAS_TEXMTXIDX0);
420 out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
421 " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
422 " int tmp = 0;\n"
423 " switch (texgen) {\n");
424 for (u32 i = 0; i < numTexgen; i++)
425 out.Write(" case %uu: tmp = int(rawtex%u.z); break;\n", i, i);
426 out.Write(" }\n"
427 "\n");
428 out.Write(" if (%s == %uu) {\n",
429 BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
430 out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
431 " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
432 " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
433 " } else {\n"
434 " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
435 " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
436 " 1.0);\n"
437 " }\n"
438 " } else {\n");
439 out.Write(" if (%s == %uu) {\n",
440 BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
441 out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
442 " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
443 " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
444 " } else {\n"
445 " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
446 " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
447 " 1.0);\n"
448 " }\n"
449 " }\n"
450 " }\n"
451 " break;\n\n"
452 " }\n"
453 "\n");
454
455 out.Write(" if (xfmem_dualTexInfo != 0u) {\n");
456 out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
457 out.Write(" uint base_index = %s;\n",
458 BitfieldExtract("postMtxInfo", PostMtxInfo().index).c_str());
459 out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
460 " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
461 " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
462 "\n");
463 out.Write(" if (%s != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize).c_str());
464 out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
465 "\n"
466 " // multiply by postmatrix\n"
467 " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
468 " dot(P1.xyz, output_tex.xyz) + P1.w,\n"
469 " dot(P2.xyz, output_tex.xyz) + P2.w);\n"
470 " }\n\n");
471
472 // When q is 0, the GameCube appears to have a special case
473 // This can be seen in devkitPro's neheGX Lesson08 example for Wii
474 // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
475 out.Write(" if (texgentype == %uu && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
476 XF_TEXGEN_REGULAR);
477 out.Write(
478 " output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
479 "\n");
480
481 out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
482 out.Write(" switch (texgen) {\n");
483 for (u32 i = 0; i < numTexgen; i++)
484 out.Write(" case %uu: o.tex%u = output_tex; break;\n", i, i);
485 out.Write(" }\n"
486 "}\n");
487 }
488
EnumerateVertexShaderUids(const std::function<void (const VertexShaderUid &)> & callback)489 void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)
490 {
491 VertexShaderUid uid;
492
493 for (u32 texgens = 0; texgens <= 8; texgens++)
494 {
495 vertex_ubershader_uid_data* const vuid = uid.GetUidData();
496 vuid->num_texgens = texgens;
497 callback(uid);
498 }
499 }
500 } // namespace UberShader
501