1 // Copyright 2015 Citra Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #include <array>
6 #include <cstddef>
7 #include <string_view>
8 #include <fmt/format.h>
9 #include "common/assert.h"
10 #include "common/bit_field.h"
11 #include "common/bit_set.h"
12 #include "common/logging/log.h"
13 #include "core/core.h"
14 #include "video_core/regs_framebuffer.h"
15 #include "video_core/regs_lighting.h"
16 #include "video_core/regs_rasterizer.h"
17 #include "video_core/regs_texturing.h"
18 #include "video_core/renderer_opengl/gl_rasterizer.h"
19 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
20 #include "video_core/renderer_opengl/gl_shader_gen.h"
21 #include "video_core/renderer_opengl/gl_shader_util.h"
22 #include "video_core/renderer_opengl/gl_vars.h"
23 #include "video_core/video_core.h"
24 
25 using Pica::FramebufferRegs;
26 using Pica::LightingRegs;
27 using Pica::RasterizerRegs;
28 using Pica::TexturingRegs;
29 using TevStageConfig = TexturingRegs::TevStageConfig;
30 using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;
31 
32 namespace OpenGL {
33 
34 constexpr std::string_view UniformBlockDef = R"(
35 #define NUM_TEV_STAGES 6
36 #define NUM_LIGHTS 8
37 #define NUM_LIGHTING_SAMPLERS 24
38 
39 struct LightSrc {
40     vec3 specular_0;
41     vec3 specular_1;
42     vec3 diffuse;
43     vec3 ambient;
44     vec3 position;
45     vec3 spot_direction;
46     float dist_atten_bias;
47     float dist_atten_scale;
48 };
49 
50 layout (std140) uniform shader_data {
51     int framebuffer_scale;
52     int alphatest_ref;
53     float depth_scale;
54     float depth_offset;
55     float shadow_bias_constant;
56     float shadow_bias_linear;
57     int scissor_x1;
58     int scissor_y1;
59     int scissor_x2;
60     int scissor_y2;
61     int fog_lut_offset;
62     int proctex_noise_lut_offset;
63     int proctex_color_map_offset;
64     int proctex_alpha_map_offset;
65     int proctex_lut_offset;
66     int proctex_diff_lut_offset;
67     float proctex_bias;
68     int shadow_texture_bias;
69     ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
70     vec3 fog_color;
71     vec2 proctex_noise_f;
72     vec2 proctex_noise_a;
73     vec2 proctex_noise_p;
74     vec3 lighting_global_ambient;
75     LightSrc light_src[NUM_LIGHTS];
76     vec4 const_color[NUM_TEV_STAGES];
77     vec4 tev_combiner_buffer_color;
78     vec4 clip_coef;
79 };
80 )";
81 
GetVertexInterfaceDeclaration(bool is_output,bool separable_shader)82 static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) {
83     std::string out;
84 
85     const auto append_variable = [&](std::string_view var, int location) {
86         if (separable_shader) {
87             out += fmt::format("layout (location={}) ", location);
88         }
89         out += fmt::format("{}{};\n", is_output ? "out " : "in ", var);
90     };
91 
92     append_variable("vec4 primary_color", ATTRIBUTE_COLOR);
93     append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0);
94     append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1);
95     append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2);
96     append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W);
97     append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT);
98     append_variable("vec3 view", ATTRIBUTE_VIEW);
99 
100     if (is_output && separable_shader) {
101         // gl_PerVertex redeclaration is required for separate shader object
102         out += R"(
103 out gl_PerVertex {
104     vec4 gl_Position;
105 #if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
106     float gl_ClipDistance[2];
107 #endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
108 };
109 )";
110     }
111 
112     return out;
113 }
114 
BuildFromRegs(const Pica::Regs & regs)115 PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
116     PicaFSConfig res{};
117 
118     auto& state = res.state;
119 
120     state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
121 
122     state.depthmap_enable = regs.rasterizer.depthmap_enable;
123 
124     state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
125                                 ? regs.framebuffer.output_merger.alpha_test.func.Value()
126                                 : FramebufferRegs::CompareFunc::Always;
127 
128     state.texture0_type = regs.texturing.texture0.type;
129 
130     state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
131 
132     if (GLES) {
133         // With GLES, we need this in the fragment shader to emulate logic operations
134         state.alphablend_enable =
135             Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1;
136         state.logic_op = regs.framebuffer.output_merger.logic_op;
137     } else {
138         // We don't need these otherwise, reset them to avoid unnecessary shader generation
139         state.alphablend_enable = {};
140         state.logic_op = {};
141     }
142 
143     // Copy relevant tev stages fields.
144     // We don't sync const_color here because of the high variance, it is a
145     // shader uniform instead.
146     const auto& tev_stages = regs.texturing.GetTevStages();
147     DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
148     for (std::size_t i = 0; i < tev_stages.size(); i++) {
149         const auto& tev_stage = tev_stages[i];
150         state.tev_stages[i].sources_raw = tev_stage.sources_raw;
151         state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
152         state.tev_stages[i].ops_raw = tev_stage.ops_raw;
153         state.tev_stages[i].scales_raw = tev_stage.scales_raw;
154     }
155 
156     state.fog_mode = regs.texturing.fog_mode;
157     state.fog_flip = regs.texturing.fog_flip != 0;
158 
159     state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
160                                   regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
161                                       << 4;
162 
163     // Fragment lighting
164 
165     state.lighting.enable = !regs.lighting.disable;
166     state.lighting.src_num = regs.lighting.max_light_index + 1;
167 
168     for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
169         unsigned num = regs.lighting.light_enable.GetNum(light_index);
170         const auto& light = regs.lighting.light[num];
171         state.lighting.light[light_index].num = num;
172         state.lighting.light[light_index].directional = light.config.directional != 0;
173         state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
174         state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
175         state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
176         state.lighting.light[light_index].dist_atten_enable =
177             !regs.lighting.IsDistAttenDisabled(num);
178         state.lighting.light[light_index].spot_atten_enable =
179             !regs.lighting.IsSpotAttenDisabled(num);
180         state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
181     }
182 
183     state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
184     state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
185     state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
186     state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
187 
188     state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
189     state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
190     state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
191     state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
192 
193     // this is a dummy field due to lack of the corresponding register
194     state.lighting.lut_sp.enable = true;
195     state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
196     state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
197     state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
198 
199     state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
200     state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
201     state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
202     state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
203 
204     state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
205     state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
206     state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
207     state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
208 
209     state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
210     state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
211     state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
212     state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
213 
214     state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
215     state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
216     state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
217     state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
218 
219     state.lighting.config = regs.lighting.config0.config;
220     state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
221     state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
222     state.lighting.bump_mode = regs.lighting.config0.bump_mode;
223     state.lighting.bump_selector = regs.lighting.config0.bump_selector;
224     state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
225     state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
226 
227     state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
228     state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
229     state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
230     state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
231     state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
232     state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
233 
234     state.proctex.enable = regs.texturing.main_config.texture3_enable;
235     if (state.proctex.enable) {
236         state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
237         state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
238         state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
239         state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
240         state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
241         state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
242         state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
243         state.proctex.u_shift = regs.texturing.proctex.u_shift;
244         state.proctex.v_shift = regs.texturing.proctex.v_shift;
245         state.proctex.lut_width = regs.texturing.proctex_lut.width;
246         state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
247         state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
248         state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
249         state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
250         state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
251         state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
252         state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
253     }
254 
255     state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
256                              FramebufferRegs::FragmentOperationMode::Shadow;
257 
258     state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
259 
260     return res;
261 }
262 
Init(const Pica::ShaderRegs & regs,Pica::Shader::ShaderSetup & setup)263 void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
264     program_hash = setup.GetProgramCodeHash();
265     swizzle_hash = setup.GetSwizzleDataHash();
266     main_offset = regs.main_offset;
267     sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
268 
269     num_outputs = 0;
270     output_map.fill(16);
271 
272     for (int reg : Common::BitSet<u32>(regs.output_mask)) {
273         output_map[reg] = num_outputs++;
274     }
275 }
276 
Init(const Pica::Regs & regs)277 void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
278     vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
279     gs_output_attributes = vs_output_attributes;
280 
281     semantic_maps.fill({16, 0});
282     for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
283         const std::array semantics{
284             regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
285             regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
286             regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
287             regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
288         };
289         for (u32 comp = 0; comp < 4; ++comp) {
290             const auto semantic = semantics[comp];
291             if (static_cast<std::size_t>(semantic) < 24) {
292                 semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
293             } else if (semantic != VSOutputAttributes::INVALID) {
294                 LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
295             }
296         }
297     }
298 }
299 
300 /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
IsPassThroughTevStage(const TevStageConfig & stage)301 static bool IsPassThroughTevStage(const TevStageConfig& stage) {
302     return (stage.color_op == TevStageConfig::Operation::Replace &&
303             stage.alpha_op == TevStageConfig::Operation::Replace &&
304             stage.color_source1 == TevStageConfig::Source::Previous &&
305             stage.alpha_source1 == TevStageConfig::Source::Previous &&
306             stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
307             stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
308             stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
309 }
310 
SampleTexture(const PicaFSConfig & config,unsigned texture_unit)311 static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
312     const auto& state = config.state;
313     switch (texture_unit) {
314     case 0:
315         // Only unit 0 respects the texturing type
316         switch (state.texture0_type) {
317         case TexturingRegs::TextureConfig::Texture2D:
318             return "textureLod(tex0, texcoord0, getLod(texcoord0 * vec2(textureSize(tex0, 0))))";
319         case TexturingRegs::TextureConfig::Projection2D:
320             // TODO (wwylele): find the exact LOD formula for projection texture
321             return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
322         case TexturingRegs::TextureConfig::TextureCube:
323             return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
324         case TexturingRegs::TextureConfig::Shadow2D:
325             return "shadowTexture(texcoord0, texcoord0_w)";
326         case TexturingRegs::TextureConfig::ShadowCube:
327             return "shadowTextureCube(texcoord0, texcoord0_w)";
328         case TexturingRegs::TextureConfig::Disabled:
329             return "vec4(0.0)";
330         default:
331             LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
332             UNIMPLEMENTED();
333             return "texture(tex0, texcoord0)";
334         }
335     case 1:
336         return "textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, 0))))";
337     case 2:
338         if (state.texture2_use_coord1)
339             return "textureLod(tex2, texcoord1, getLod(texcoord1 * vec2(textureSize(tex2, 0))))";
340         else
341             return "textureLod(tex2, texcoord2, getLod(texcoord2 * vec2(textureSize(tex2, 0))))";
342     case 3:
343         if (state.proctex.enable) {
344             return "ProcTex()";
345         } else {
346             LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it");
347             return "vec4(0.0)";
348         }
349     default:
350         UNREACHABLE();
351         return "";
352     }
353 }
354 
355 /// Writes the specified TEV stage source component(s)
AppendSource(std::string & out,const PicaFSConfig & config,TevStageConfig::Source source,std::string_view index_name)356 static void AppendSource(std::string& out, const PicaFSConfig& config,
357                          TevStageConfig::Source source, std::string_view index_name) {
358     using Source = TevStageConfig::Source;
359     switch (source) {
360     case Source::PrimaryColor:
361         out += "rounded_primary_color";
362         break;
363     case Source::PrimaryFragmentColor:
364         out += "primary_fragment_color";
365         break;
366     case Source::SecondaryFragmentColor:
367         out += "secondary_fragment_color";
368         break;
369     case Source::Texture0:
370         out += SampleTexture(config, 0);
371         break;
372     case Source::Texture1:
373         out += SampleTexture(config, 1);
374         break;
375     case Source::Texture2:
376         out += SampleTexture(config, 2);
377         break;
378     case Source::Texture3:
379         out += SampleTexture(config, 3);
380         break;
381     case Source::PreviousBuffer:
382         out += "combiner_buffer";
383         break;
384     case Source::Constant:
385         out += "const_color[";
386         out += index_name;
387         out += ']';
388         break;
389     case Source::Previous:
390         out += "last_tex_env_out";
391         break;
392     default:
393         out += "vec4(0.0)";
394         LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source);
395         break;
396     }
397 }
398 
399 /// Writes the color components to use for the specified TEV stage color modifier
AppendColorModifier(std::string & out,const PicaFSConfig & config,TevStageConfig::ColorModifier modifier,TevStageConfig::Source source,std::string_view index_name)400 static void AppendColorModifier(std::string& out, const PicaFSConfig& config,
401                                 TevStageConfig::ColorModifier modifier,
402                                 TevStageConfig::Source source, std::string_view index_name) {
403     using ColorModifier = TevStageConfig::ColorModifier;
404     switch (modifier) {
405     case ColorModifier::SourceColor:
406         AppendSource(out, config, source, index_name);
407         out += ".rgb";
408         break;
409     case ColorModifier::OneMinusSourceColor:
410         out += "vec3(1.0) - ";
411         AppendSource(out, config, source, index_name);
412         out += ".rgb";
413         break;
414     case ColorModifier::SourceAlpha:
415         AppendSource(out, config, source, index_name);
416         out += ".aaa";
417         break;
418     case ColorModifier::OneMinusSourceAlpha:
419         out += "vec3(1.0) - ";
420         AppendSource(out, config, source, index_name);
421         out += ".aaa";
422         break;
423     case ColorModifier::SourceRed:
424         AppendSource(out, config, source, index_name);
425         out += ".rrr";
426         break;
427     case ColorModifier::OneMinusSourceRed:
428         out += "vec3(1.0) - ";
429         AppendSource(out, config, source, index_name);
430         out += ".rrr";
431         break;
432     case ColorModifier::SourceGreen:
433         AppendSource(out, config, source, index_name);
434         out += ".ggg";
435         break;
436     case ColorModifier::OneMinusSourceGreen:
437         out += "vec3(1.0) - ";
438         AppendSource(out, config, source, index_name);
439         out += ".ggg";
440         break;
441     case ColorModifier::SourceBlue:
442         AppendSource(out, config, source, index_name);
443         out += ".bbb";
444         break;
445     case ColorModifier::OneMinusSourceBlue:
446         out += "vec3(1.0) - ";
447         AppendSource(out, config, source, index_name);
448         out += ".bbb";
449         break;
450     default:
451         out += "vec3(0.0)";
452         LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier);
453         break;
454     }
455 }
456 
457 /// Writes the alpha component to use for the specified TEV stage alpha modifier
AppendAlphaModifier(std::string & out,const PicaFSConfig & config,TevStageConfig::AlphaModifier modifier,TevStageConfig::Source source,const std::string & index_name)458 static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config,
459                                 TevStageConfig::AlphaModifier modifier,
460                                 TevStageConfig::Source source, const std::string& index_name) {
461     using AlphaModifier = TevStageConfig::AlphaModifier;
462     switch (modifier) {
463     case AlphaModifier::SourceAlpha:
464         AppendSource(out, config, source, index_name);
465         out += ".a";
466         break;
467     case AlphaModifier::OneMinusSourceAlpha:
468         out += "1.0 - ";
469         AppendSource(out, config, source, index_name);
470         out += ".a";
471         break;
472     case AlphaModifier::SourceRed:
473         AppendSource(out, config, source, index_name);
474         out += ".r";
475         break;
476     case AlphaModifier::OneMinusSourceRed:
477         out += "1.0 - ";
478         AppendSource(out, config, source, index_name);
479         out += ".r";
480         break;
481     case AlphaModifier::SourceGreen:
482         AppendSource(out, config, source, index_name);
483         out += ".g";
484         break;
485     case AlphaModifier::OneMinusSourceGreen:
486         out += "1.0 - ";
487         AppendSource(out, config, source, index_name);
488         out += ".g";
489         break;
490     case AlphaModifier::SourceBlue:
491         AppendSource(out, config, source, index_name);
492         out += ".b";
493         break;
494     case AlphaModifier::OneMinusSourceBlue:
495         out += "1.0 - ";
496         AppendSource(out, config, source, index_name);
497         out += ".b";
498         break;
499     default:
500         out += "0.0";
501         LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier);
502         break;
503     }
504 }
505 
506 /// Writes the combiner function for the color components for the specified TEV stage operation
AppendColorCombiner(std::string & out,TevStageConfig::Operation operation,std::string_view variable_name)507 static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation,
508                                 std::string_view variable_name) {
509     out += "clamp(";
510     using Operation = TevStageConfig::Operation;
511     switch (operation) {
512     case Operation::Replace:
513         out += fmt::format("{}[0]", variable_name);
514         break;
515     case Operation::Modulate:
516         out += fmt::format("{0}[0] * {0}[1]", variable_name);
517         break;
518     case Operation::Add:
519         out += fmt::format("{0}[0] + {0}[1]", variable_name);
520         break;
521     case Operation::AddSigned:
522         out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name);
523         break;
524     case Operation::Lerp:
525         out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name);
526         break;
527     case Operation::Subtract:
528         out += fmt::format("{0}[0] - {0}[1]", variable_name);
529         break;
530     case Operation::MultiplyThenAdd:
531         out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
532         break;
533     case Operation::AddThenMultiply:
534         out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name);
535         break;
536     case Operation::Dot3_RGB:
537     case Operation::Dot3_RGBA:
538         out +=
539             fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name);
540         break;
541     default:
542         out += "vec3(0.0)";
543         LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation);
544         break;
545     }
546     out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0
547 }
548 
549 /// Writes the combiner function for the alpha component for the specified TEV stage operation
AppendAlphaCombiner(std::string & out,TevStageConfig::Operation operation,std::string_view variable_name)550 static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation,
551                                 std::string_view variable_name) {
552     out += "clamp(";
553     using Operation = TevStageConfig::Operation;
554     switch (operation) {
555     case Operation::Replace:
556         out += fmt::format("{}[0]", variable_name);
557         break;
558     case Operation::Modulate:
559         out += fmt::format("{0}[0] * {0}[1]", variable_name);
560         break;
561     case Operation::Add:
562         out += fmt::format("{0}[0] + {0}[1]", variable_name);
563         break;
564     case Operation::AddSigned:
565         out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name);
566         break;
567     case Operation::Lerp:
568         out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name);
569         break;
570     case Operation::Subtract:
571         out += fmt::format("{0}[0] - {0}[1]", variable_name);
572         break;
573     case Operation::MultiplyThenAdd:
574         out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
575         break;
576     case Operation::AddThenMultiply:
577         out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name);
578         break;
579     default:
580         out += "0.0";
581         LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation);
582         break;
583     }
584     out += ", 0.0, 1.0)";
585 }
586 
587 /// Writes the if-statement condition used to evaluate alpha testing
AppendAlphaTestCondition(std::string & out,FramebufferRegs::CompareFunc func)588 static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
589     using CompareFunc = FramebufferRegs::CompareFunc;
590     switch (func) {
591     case CompareFunc::Never:
592         out += "true";
593         break;
594     case CompareFunc::Always:
595         out += "false";
596         break;
597     case CompareFunc::Equal:
598     case CompareFunc::NotEqual:
599     case CompareFunc::LessThan:
600     case CompareFunc::LessThanOrEqual:
601     case CompareFunc::GreaterThan:
602     case CompareFunc::GreaterThanOrEqual: {
603         static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"};
604         const auto index = static_cast<u32>(func) - static_cast<u32>(CompareFunc::Equal);
605         out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]);
606         break;
607     }
608 
609     default:
610         out += "false";
611         LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func);
612         break;
613     }
614 }
615 
616 /// Writes the code to emulate the specified TEV stage
WriteTevStage(std::string & out,const PicaFSConfig & config,unsigned index)617 static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) {
618     const auto stage =
619         static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
620     if (!IsPassThroughTevStage(stage)) {
621         const std::string index_name = std::to_string(index);
622 
623         out += fmt::format("vec3 color_results_{}_1 = ", index_name);
624         AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
625         out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
626         AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
627         out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
628         AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
629         out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
630                            "color_results_{}_2, color_results_{}_3);\n",
631                            index_name, index_name, index_name, index_name);
632 
633         // Round the output of each TEV stage to maintain the PICA's 8 bits of precision
634         out += fmt::format("vec3 color_output_{} = byteround(", index_name);
635         AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
636         out += ");\n";
637 
638         if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
639             // result of Dot3_RGBA operation is also placed to the alpha component
640             out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name);
641         } else {
642             out += fmt::format("float alpha_results_{}[3] = float[3](", index_name);
643             AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
644                                 index_name);
645             out += ", ";
646             AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
647                                 index_name);
648             out += ", ";
649             AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
650                                 index_name);
651             out += ");\n";
652 
653             out += fmt::format("float alpha_output_{} = byteround(", index_name);
654             AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
655             out += ");\n";
656         }
657 
658         out += fmt::format("last_tex_env_out = vec4("
659                            "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
660                            "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
661                            index_name, stage.GetColorMultiplier(), index_name,
662                            stage.GetAlphaMultiplier());
663     }
664 
665     out += "combiner_buffer = next_combiner_buffer;\n";
666 
667     if (config.TevStageUpdatesCombinerBufferColor(index))
668         out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n";
669 
670     if (config.TevStageUpdatesCombinerBufferAlpha(index))
671         out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
672 }
673 
674 /// Writes the code to emulate fragment lighting
WriteLighting(std::string & out,const PicaFSConfig & config)675 static void WriteLighting(std::string& out, const PicaFSConfig& config) {
676     const auto& lighting = config.state.lighting;
677 
678     // Define lighting globals
679     out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
680            "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
681            "vec3 light_vector = vec3(0.0);\n"
682            "vec3 refl_value = vec3(0.0);\n"
683            "vec3 spot_dir = vec3(0.0);\n"
684            "vec3 half_vector = vec3(0.0);\n"
685            "float dot_product = 0.0;\n"
686            "float clamp_highlights = 1.0;\n"
687            "float geo_factor = 1.0;\n";
688 
689     // Compute fragment normals and tangents
690     const auto Perturbation = [&] {
691         return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector));
692     };
693     if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
694         // Bump mapping is enabled using a normal map
695         out += fmt::format("vec3 surface_normal = {};\n", Perturbation());
696 
697         // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
698         // precision result
699         if (lighting.bump_renorm) {
700             constexpr std::string_view val =
701                 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
702             out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val);
703         }
704 
705         // The tangent vector is not perturbed by the normal map and is just a unit vector.
706         out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
707     } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
708         // Bump mapping is enabled using a tangent map
709         out += fmt::format("vec3 surface_tangent = {};\n", Perturbation());
710         // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
711         // computation below, which is also confirmed on 3DS. So we don't bother recomputing here
712         // even if 'renorm' is enabled.
713 
714         // The normal vector is not perturbed by the tangent map and is just a unit vector.
715         out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
716     } else {
717         // No bump mapping - surface local normal and tangent are just unit vectors
718         out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"
719                "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
720     }
721 
722     // Rotate the surface-local normal by the interpolated normal quaternion to convert it to
723     // eyespace.
724     out += "vec4 normalized_normquat = normalize(normquat);\n"
725            "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n"
726            "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
727 
728     if (lighting.enable_shadow) {
729         std::string shadow_texture = SampleTexture(config, lighting.shadow_selector);
730         if (lighting.shadow_invert) {
731             out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture);
732         } else {
733             out += fmt::format("vec4 shadow = {};\n", shadow_texture);
734         }
735     } else {
736         out += "vec4 shadow = vec4(1.0);\n";
737     }
738 
739     // Samples the specified lookup table for specular lighting
740     auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num,
741                                    LightingRegs::LightingLutInput input, bool abs) {
742         std::string index;
743         switch (input) {
744         case LightingRegs::LightingLutInput::NH:
745             index = "dot(normal, normalize(half_vector))";
746             break;
747 
748         case LightingRegs::LightingLutInput::VH:
749             index = "dot(normalize(view), normalize(half_vector))";
750             break;
751 
752         case LightingRegs::LightingLutInput::NV:
753             index = "dot(normal, normalize(view))";
754             break;
755 
756         case LightingRegs::LightingLutInput::LN:
757             index = "dot(light_vector, normal)";
758             break;
759 
760         case LightingRegs::LightingLutInput::SP:
761             index = "dot(light_vector, spot_dir)";
762             break;
763 
764         case LightingRegs::LightingLutInput::CP:
765             // CP input is only available with configuration 7
766             if (lighting.config == LightingRegs::LightingConfig::Config7) {
767                 // Note: even if the normal vector is modified by normal map, which is not the
768                 // normal of the tangent plane anymore, the half angle vector is still projected
769                 // using the modified normal vector.
770                 constexpr std::string_view half_angle_proj =
771                     "normalize(half_vector) - normal * dot(normal, normalize(half_vector))";
772                 // Note: the half angle vector projection is confirmed not normalized before the dot
773                 // product. The result is in fact not cos(phi) as the name suggested.
774                 index = fmt::format("dot({}, tangent)", half_angle_proj);
775             } else {
776                 index = "0.0";
777             }
778             break;
779 
780         default:
781             LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input);
782             UNIMPLEMENTED();
783             index = "0.0";
784             break;
785         }
786 
787         const auto sampler_index = static_cast<u32>(sampler);
788 
789         if (abs) {
790             // LUT index is in the range of (0.0, 1.0)
791             index = lighting.light[light_num].two_sided_diffuse
792                         ? fmt::format("abs({})", index)
793                         : fmt::format("max({}, 0.0)", index);
794             return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index);
795         } else {
796             // LUT index is in the range of (-1.0, 1.0)
797             return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index);
798         }
799     };
800 
801     // Write the code to emulate each enabled light
802     for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
803         const auto& light_config = lighting.light[light_index];
804         const std::string light_src = fmt::format("light_src[{}]", light_config.num);
805 
806         // Compute light vector (directional or positional)
807         if (light_config.directional) {
808             out += fmt::format("light_vector = normalize({}.position);\n", light_src);
809         } else {
810             out += fmt::format("light_vector = normalize({}.position + view);\n", light_src);
811         }
812 
813         out += fmt::format("spot_dir = {}.spot_direction;\n", light_src);
814         out += "half_vector = normalize(view) + light_vector;\n";
815 
816         // Compute dot product of light_vector and normal, adjust if lighting is one-sided or
817         // two-sided
818         out += std::string("dot_product = ") + (light_config.two_sided_diffuse
819                                                     ? "abs(dot(light_vector, normal));\n"
820                                                     : "max(dot(light_vector, normal), 0.0);\n");
821 
822         // If enabled, clamp specular component if lighting result is zero
823         if (lighting.clamp_highlights) {
824             out += "clamp_highlights = sign(dot_product);\n";
825         }
826 
827         // If enabled, compute spot light attenuation value
828         std::string spot_atten = "1.0";
829         if (light_config.spot_atten_enable &&
830             LightingRegs::IsLightingSamplerSupported(
831                 lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
832             const std::string value =
833                 GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
834                             light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input);
835             spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value);
836         }
837 
838         // If enabled, compute distance attenuation value
839         std::string dist_atten = "1.0";
840         if (light_config.dist_atten_enable) {
841             const std::string index = fmt::format("clamp({}.dist_atten_scale * length(-view - "
842                                                   "{}.position) + {}.dist_atten_bias, 0.0, 1.0)",
843                                                   light_src, light_src, light_src);
844             const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num);
845             dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index);
846         }
847 
848         if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
849             out += "geo_factor = dot(half_vector, half_vector);\n"
850                    "geo_factor = geo_factor == 0.0 ? 0.0 : min("
851                    "dot_product / geo_factor, 1.0);\n";
852         }
853 
854         // Specular 0 component
855         std::string d0_lut_value = "1.0";
856         if (lighting.lut_d0.enable &&
857             LightingRegs::IsLightingSamplerSupported(
858                 lighting.config, LightingRegs::LightingSampler::Distribution0)) {
859             // Lookup specular "distribution 0" LUT value
860             const std::string value =
861                 GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
862                             lighting.lut_d0.type, lighting.lut_d0.abs_input);
863             d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value);
864         }
865         std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src);
866         if (light_config.geometric_factor_0) {
867             specular_0 = fmt::format("({} * geo_factor)", specular_0);
868         }
869 
870         // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
871         if (lighting.lut_rr.enable &&
872             LightingRegs::IsLightingSamplerSupported(lighting.config,
873                                                      LightingRegs::LightingSampler::ReflectRed)) {
874             std::string value =
875                 GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
876                             lighting.lut_rr.type, lighting.lut_rr.abs_input);
877             value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value);
878             out += fmt::format("refl_value.r = {};\n", value);
879         } else {
880             out += "refl_value.r = 1.0;\n";
881         }
882 
883         // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
884         if (lighting.lut_rg.enable &&
885             LightingRegs::IsLightingSamplerSupported(lighting.config,
886                                                      LightingRegs::LightingSampler::ReflectGreen)) {
887             std::string value =
888                 GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
889                             lighting.lut_rg.type, lighting.lut_rg.abs_input);
890             value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value);
891             out += fmt::format("refl_value.g = {};\n", value);
892         } else {
893             out += "refl_value.g = refl_value.r;\n";
894         }
895 
896         // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
897         if (lighting.lut_rb.enable &&
898             LightingRegs::IsLightingSamplerSupported(lighting.config,
899                                                      LightingRegs::LightingSampler::ReflectBlue)) {
900             std::string value =
901                 GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
902                             lighting.lut_rb.type, lighting.lut_rb.abs_input);
903             value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value);
904             out += fmt::format("refl_value.b = {};\n", value);
905         } else {
906             out += "refl_value.b = refl_value.r;\n";
907         }
908 
909         // Specular 1 component
910         std::string d1_lut_value = "1.0";
911         if (lighting.lut_d1.enable &&
912             LightingRegs::IsLightingSamplerSupported(
913                 lighting.config, LightingRegs::LightingSampler::Distribution1)) {
914             // Lookup specular "distribution 1" LUT value
915             const std::string value =
916                 GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
917                             lighting.lut_d1.type, lighting.lut_d1.abs_input);
918             d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value);
919         }
920         std::string specular_1 =
921             fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src);
922         if (light_config.geometric_factor_1) {
923             specular_1 = fmt::format("({} * geo_factor)", specular_1);
924         }
925 
926         // Fresnel
927         // Note: only the last entry in the light slots applies the Fresnel factor
928         if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
929             LightingRegs::IsLightingSamplerSupported(lighting.config,
930                                                      LightingRegs::LightingSampler::Fresnel)) {
931             // Lookup fresnel LUT value
932             std::string value =
933                 GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
934                             lighting.lut_fr.type, lighting.lut_fr.abs_input);
935             value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value);
936 
937             // Enabled for diffuse lighting alpha component
938             if (lighting.enable_primary_alpha) {
939                 out += fmt::format("diffuse_sum.a = {};\n", value);
940             }
941 
942             // Enabled for the specular lighting alpha component
943             if (lighting.enable_secondary_alpha) {
944                 out += fmt::format("specular_sum.a = {};\n", value);
945             }
946         }
947 
948         bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable;
949         bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable;
950         std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : "";
951         std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : "";
952 
953         // Compute primary fragment color (diffuse lighting) function
954         out += fmt::format(
955             "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n",
956             light_src, light_src, dist_atten, spot_atten, shadow_primary);
957 
958         // Compute secondary fragment color (specular lighting) function
959         out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n",
960                            specular_0, specular_1, dist_atten, spot_atten, shadow_secondary);
961     }
962 
963     // Apply shadow attenuation to alpha components if enabled
964     if (lighting.shadow_alpha) {
965         if (lighting.enable_primary_alpha) {
966             out += "diffuse_sum.a *= shadow.a;\n";
967         }
968         if (lighting.enable_secondary_alpha) {
969             out += "specular_sum.a *= shadow.a;\n";
970         }
971     }
972 
973     // Sum final lighting result
974     out += "diffuse_sum.rgb += lighting_global_ambient;\n"
975            "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"
976            "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
977 }
978 
979 using ProcTexClamp = TexturingRegs::ProcTexClamp;
980 using ProcTexShift = TexturingRegs::ProcTexShift;
981 using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
982 using ProcTexFilter = TexturingRegs::ProcTexFilter;
983 
AppendProcTexShiftOffset(std::string & out,std::string_view v,ProcTexShift mode,ProcTexClamp clamp_mode)984 static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode,
985                                      ProcTexClamp clamp_mode) {
986     const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5";
987     switch (mode) {
988     case ProcTexShift::None:
989         out += "0.0";
990         break;
991     case ProcTexShift::Odd:
992         out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v);
993         break;
994     case ProcTexShift::Even:
995         out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v);
996         break;
997     default:
998         LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode);
999         out += "0.0";
1000         break;
1001     }
1002 }
1003 
AppendProcTexClamp(std::string & out,std::string_view var,ProcTexClamp mode)1004 static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) {
1005     switch (mode) {
1006     case ProcTexClamp::ToZero:
1007         out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var);
1008         break;
1009     case ProcTexClamp::ToEdge:
1010         out += fmt::format("{0} = min({0}, 1.0);\n", var);
1011         break;
1012     case ProcTexClamp::SymmetricalRepeat:
1013         out += fmt::format("{0} = fract({0});\n", var);
1014         break;
1015     case ProcTexClamp::MirroredRepeat: {
1016         out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var);
1017         break;
1018     }
1019     case ProcTexClamp::Pulse:
1020         out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var);
1021         break;
1022     default:
1023         LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode);
1024         out += fmt::format("{0} = min({0}, 1.0);\n", var);
1025         break;
1026     }
1027 }
1028 
AppendProcTexCombineAndMap(std::string & out,ProcTexCombiner combiner,std::string_view offset)1029 static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner,
1030                                        std::string_view offset) {
1031     const auto combined = [combiner]() -> std::string_view {
1032         switch (combiner) {
1033         case ProcTexCombiner::U:
1034             return "u";
1035         case ProcTexCombiner::U2:
1036             return "(u * u)";
1037         case TexturingRegs::ProcTexCombiner::V:
1038             return "v";
1039         case TexturingRegs::ProcTexCombiner::V2:
1040             return "(v * v)";
1041         case TexturingRegs::ProcTexCombiner::Add:
1042             return "((u + v) * 0.5)";
1043         case TexturingRegs::ProcTexCombiner::Add2:
1044             return "((u * u + v * v) * 0.5)";
1045         case TexturingRegs::ProcTexCombiner::SqrtAdd2:
1046             return "min(sqrt(u * u + v * v), 1.0)";
1047         case TexturingRegs::ProcTexCombiner::Min:
1048             return "min(u, v)";
1049         case TexturingRegs::ProcTexCombiner::Max:
1050             return "max(u, v)";
1051         case TexturingRegs::ProcTexCombiner::RMax:
1052             return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)";
1053         default:
1054             LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner);
1055             return "0.0";
1056         }
1057     }();
1058 
1059     out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined);
1060 }
1061 
AppendProcTexSampler(std::string & out,const PicaFSConfig & config)1062 static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) {
1063     // LUT sampling uitlity
1064     // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
1065     // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
1066     // value entries and difference entries.
1067     out += R"(
1068 float ProcTexLookupLUT(int offset, float coord) {
1069     coord *= 128.0;
1070     float index_i = clamp(floor(coord), 0.0, 127.0);
1071     float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be
1072                                      // extracted as index_i = 127.0 and index_f = 1.0
1073     vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg;
1074     return clamp(entry.r + entry.g * index_f, 0.0, 1.0);
1075 }
1076     )";
1077 
1078     // Noise utility
1079     if (config.state.proctex.noise_enable) {
1080         // See swrasterizer/proctex.cpp for more information about these functions
1081         out += R"(
1082 int ProcTexNoiseRand1D(int v) {
1083     const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11);
1084     return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF];
1085 }
1086 
1087 float ProcTexNoiseRand2D(vec2 point) {
1088     const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14);
1089     int u2 = ProcTexNoiseRand1D(int(point.x));
1090     int v2 = ProcTexNoiseRand1D(int(point.y));
1091     v2 += ((u2 & 3) == 1) ? 4 : 0;
1092     v2 ^= (u2 & 1) * 6;
1093     v2 += 10 + u2;
1094     v2 &= 0xF;
1095     v2 ^= table[u2];
1096     return -1.0 + float(v2) * 2.0/ 15.0;
1097 }
1098 
1099 float ProcTexNoiseCoef(vec2 x) {
1100     vec2 grid  = 9.0 * proctex_noise_f * abs(x + proctex_noise_p);
1101     vec2 point = floor(grid);
1102     vec2 frac  = grid - point;
1103 
1104     float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y);
1105     float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0);
1106     float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0);
1107     float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0);
1108 
1109     float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x);
1110     float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y);
1111     float x0 = mix(g0, g1, x_noise);
1112     float x1 = mix(g2, g3, x_noise);
1113     return mix(x0, x1, y_noise);
1114 }
1115         )";
1116     }
1117 
1118     out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
1119     out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width);
1120     // Offsets for level 4-7 seem to be hardcoded
1121     out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n",
1122                        config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
1123                        config.state.proctex.lut_offset2, config.state.proctex.lut_offset3);
1124     out += "int lut_offset = lut_offsets[level];\n";
1125     // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
1126     out += "lut_coord *= float(lut_width - 1);\n";
1127 
1128     switch (config.state.proctex.lut_filter) {
1129     case ProcTexFilter::Linear:
1130     case ProcTexFilter::LinearMipmapLinear:
1131     case ProcTexFilter::LinearMipmapNearest:
1132         out += "int lut_index_i = int(lut_coord) + lut_offset;\n";
1133         out += "float lut_index_f = fract(lut_coord);\n";
1134         out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + "
1135                "proctex_lut_offset) + "
1136                "lut_index_f * "
1137                "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n";
1138         break;
1139     case ProcTexFilter::Nearest:
1140     case ProcTexFilter::NearestMipmapLinear:
1141     case ProcTexFilter::NearestMipmapNearest:
1142         out += "lut_coord += float(lut_offset);\n";
1143         out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + "
1144                "proctex_lut_offset);\n";
1145         break;
1146     }
1147 
1148     out += "}\n";
1149 
1150     out += "vec4 ProcTex() {\n";
1151     if (config.state.proctex.coord < 3) {
1152         out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord);
1153     } else {
1154         LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
1155         out += "vec2 uv = abs(texcoord0);\n";
1156     }
1157 
1158     // This LOD formula is the same as the LOD upper limit defined in OpenGL.
1159     // f(x, y) <= m_u + m_v + m_w
1160     // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
1161     // Note: this is different from the one normal 2D textures use.
1162     out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
1163     // unlike normal texture, the bias is inside the log2
1164     out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n",
1165                        config.state.proctex.lut_width);
1166     out += "if (proctex_bias == 0.0) lod = 0.0;\n";
1167     out += fmt::format("lod = clamp(lod, {:#}, {:#});\n",
1168                        std::max(0.0f, static_cast<float>(config.state.proctex.lod_min)),
1169                        std::min(7.0f, static_cast<float>(config.state.proctex.lod_max)));
1170     // Get shift offset before noise generation
1171     out += "float u_shift = ";
1172     AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
1173                              config.state.proctex.u_clamp);
1174     out += ";\n";
1175     out += "float v_shift = ";
1176     AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
1177                              config.state.proctex.v_clamp);
1178     out += ";\n";
1179 
1180     // Generate noise
1181     if (config.state.proctex.noise_enable) {
1182         out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"
1183                "uv = abs(uv);\n";
1184     }
1185 
1186     // Shift
1187     out += "float u = uv.x + u_shift;\n"
1188            "float v = uv.y + v_shift;\n";
1189 
1190     // Clamp
1191     AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
1192     AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
1193 
1194     // Combine and map
1195     out += "float lut_coord = ";
1196     AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
1197                                "proctex_color_map_offset");
1198     out += ";\n";
1199 
1200     switch (config.state.proctex.lut_filter) {
1201     case ProcTexFilter::Linear:
1202     case ProcTexFilter::Nearest:
1203         out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
1204         break;
1205     case ProcTexFilter::NearestMipmapNearest:
1206     case ProcTexFilter::LinearMipmapNearest:
1207         out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n";
1208         break;
1209     case ProcTexFilter::NearestMipmapLinear:
1210     case ProcTexFilter::LinearMipmapLinear:
1211         out += "int lod_i = int(lod);\n"
1212                "float lod_f = fract(lod);\n"
1213                "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), "
1214                "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n";
1215         break;
1216     }
1217 
1218     if (config.state.proctex.separate_alpha) {
1219         // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
1220         // uses the output of CombineAndMap directly instead.
1221         out += "float final_alpha = ";
1222         AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
1223                                    "proctex_alpha_map_offset");
1224         out += ";\n";
1225         out += "return vec4(final_color.xyz, final_alpha);\n}\n";
1226     } else {
1227         out += "return final_color;\n}\n";
1228     }
1229 }
1230 
GenerateFragmentShader(const PicaFSConfig & config,bool separable_shader)1231 ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
1232                                                        bool separable_shader) {
1233     const auto& state = config.state;
1234     std::string out;
1235 
1236     if (GLES) {
1237         out += R"(
1238 #define ALLOW_SHADOW (defined(CITRA_GLES))
1239 )";
1240     } else {
1241         out += R"(
1242 #extension GL_ARB_shader_image_load_store : enable
1243 #extension GL_ARB_shader_image_size : enable
1244 #define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
1245 )";
1246     }
1247 
1248     if (separable_shader && !GLES) {
1249         out += "#extension GL_ARB_separate_shader_objects : enable\n";
1250     }
1251 
1252     if (GLES) {
1253         out += fragment_shader_precision_OES;
1254     }
1255 
1256     out += GetVertexInterfaceDeclaration(false, separable_shader);
1257 
1258     out += R"(
1259 #ifndef CITRA_GLES
1260 in vec4 gl_FragCoord;
1261 #endif // CITRA_GLES
1262 
1263 out vec4 color;
1264 
1265 uniform sampler2D tex0;
1266 uniform sampler2D tex1;
1267 uniform sampler2D tex2;
1268 uniform samplerCube tex_cube;
1269 uniform samplerBuffer texture_buffer_lut_lf;
1270 uniform samplerBuffer texture_buffer_lut_rg;
1271 uniform samplerBuffer texture_buffer_lut_rgba;
1272 
1273 #if ALLOW_SHADOW
1274 layout(r32ui) uniform readonly uimage2D shadow_texture_px;
1275 layout(r32ui) uniform readonly uimage2D shadow_texture_nx;
1276 layout(r32ui) uniform readonly uimage2D shadow_texture_py;
1277 layout(r32ui) uniform readonly uimage2D shadow_texture_ny;
1278 layout(r32ui) uniform readonly uimage2D shadow_texture_pz;
1279 layout(r32ui) uniform readonly uimage2D shadow_texture_nz;
1280 layout(r32ui) uniform uimage2D shadow_buffer;
1281 #endif
1282 )";
1283 
1284     out += UniformBlockDef;
1285 
1286     out += R"(
1287 // Rotate the vector v by the quaternion q
1288 vec3 quaternion_rotate(vec4 q, vec3 v) {
1289     return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
1290 }
1291 
1292 float LookupLightingLUT(int lut_index, int index, float delta) {
1293     vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
1294     return entry.r + entry.g * delta;
1295 }
1296 
1297 float LookupLightingLUTUnsigned(int lut_index, float pos) {
1298     int index = clamp(int(pos * 256.0), 0, 255);
1299     float delta = pos * 256.0 - float(index);
1300     return LookupLightingLUT(lut_index, index, delta);
1301 }
1302 
1303 float LookupLightingLUTSigned(int lut_index, float pos) {
1304     int index = clamp(int(pos * 128.0), -128, 127);
1305     float delta = pos * 128.0 - float(index);
1306     if (index < 0) index += 256;
1307     return LookupLightingLUT(lut_index, index, delta);
1308 }
1309 
1310 float byteround(float x) {
1311     return round(x * 255.0) * (1.0 / 255.0);
1312 }
1313 
1314 vec2 byteround(vec2 x) {
1315     return round(x * 255.0) * (1.0 / 255.0);
1316 }
1317 
1318 vec3 byteround(vec3 x) {
1319     return round(x * 255.0) * (1.0 / 255.0);
1320 }
1321 
1322 vec4 byteround(vec4 x) {
1323     return round(x * 255.0) * (1.0 / 255.0);
1324 }
1325 
1326 // PICA's LOD formula for 2D textures.
1327 // This LOD formula is the same as the LOD lower limit defined in OpenGL.
1328 // f(x, y) >= max{m_u, m_v, m_w}
1329 // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
1330 float getLod(vec2 coord) {
1331     vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord)));
1332     return log2(max(d.x, d.y));
1333 }
1334 
1335 #if ALLOW_SHADOW
1336 
1337 uvec2 DecodeShadow(uint pixel) {
1338     return uvec2(pixel >> 8, pixel & 0xFFu);
1339 }
1340 
1341 uint EncodeShadow(uvec2 pixel) {
1342     return (pixel.x << 8) | pixel.y;
1343 }
1344 
1345 float CompareShadow(uint pixel, uint z) {
1346     uvec2 p = DecodeShadow(pixel);
1347     return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
1348 }
1349 
1350 float SampleShadow2D(ivec2 uv, uint z) {
1351     if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) )))
1352         return 1.0;
1353     return CompareShadow(imageLoad(shadow_texture_px, uv).x, z);
1354 }
1355 
1356 float mix2(vec4 s, vec2 a) {
1357     vec2 t = mix(s.xy, s.zw, a.yy);
1358     return mix(t.x, t.y, a.x);
1359 }
1360 
1361 vec4 shadowTexture(vec2 uv, float w) {
1362 )";
1363     if (!config.state.shadow_texture_orthographic) {
1364         out += "uv /= w;";
1365     }
1366     out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
1367     out += R"(
1368     vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5);
1369     vec2 coord_floor = floor(coord);
1370     vec2 f = coord - coord_floor;
1371     ivec2 i = ivec2(coord_floor);
1372     vec4 s = vec4(
1373         SampleShadow2D(i              , z),
1374         SampleShadow2D(i + ivec2(1, 0), z),
1375         SampleShadow2D(i + ivec2(0, 1), z),
1376         SampleShadow2D(i + ivec2(1, 1), z));
1377     return vec4(mix2(s, f));
1378 }
1379 
1380 vec4 shadowTextureCube(vec2 uv, float w) {
1381     ivec2 size = imageSize(shadow_texture_px);
1382     vec3 c = vec3(uv, w);
1383     vec3 a = abs(c);
1384     if (a.x > a.y && a.x > a.z) {
1385         w = a.x;
1386         uv = -c.zy;
1387         if (c.x < 0.0) uv.x = -uv.x;
1388     } else if (a.y > a.z) {
1389         w = a.y;
1390         uv = c.xz;
1391         if (c.y < 0.0) uv.y = -uv.y;
1392     } else {
1393         w = a.z;
1394         uv = -c.xy;
1395         if (c.z > 0.0) uv.x = -uv.x;
1396     }
1397 )";
1398     out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
1399     out += R"(
1400     vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
1401     vec2 coord_floor = floor(coord);
1402     vec2 f = coord - coord_floor;
1403     ivec2 i00 = ivec2(coord_floor);
1404     ivec2 i10 = i00 + ivec2(1, 0);
1405     ivec2 i01 = i00 + ivec2(0, 1);
1406     ivec2 i11 = i00 + ivec2(1, 1);
1407     ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1);
1408     i00 = clamp(i00, cmin, cmax);
1409     i10 = clamp(i10, cmin, cmax);
1410     i01 = clamp(i01, cmin, cmax);
1411     i11 = clamp(i11, cmin, cmax);
1412     uvec4 pixels;
1413     // This part should have been refactored into functions,
1414     // but many drivers don't like passing uimage2D as parameters
1415     if (a.x > a.y && a.x > a.z) {
1416         if (c.x > 0.0)
1417             pixels = uvec4(
1418                 imageLoad(shadow_texture_px, i00).r,
1419                 imageLoad(shadow_texture_px, i10).r,
1420                 imageLoad(shadow_texture_px, i01).r,
1421                 imageLoad(shadow_texture_px, i11).r);
1422         else
1423             pixels = uvec4(
1424                 imageLoad(shadow_texture_nx, i00).r,
1425                 imageLoad(shadow_texture_nx, i10).r,
1426                 imageLoad(shadow_texture_nx, i01).r,
1427                 imageLoad(shadow_texture_nx, i11).r);
1428     } else if (a.y > a.z) {
1429         if (c.y > 0.0)
1430             pixels = uvec4(
1431                 imageLoad(shadow_texture_py, i00).r,
1432                 imageLoad(shadow_texture_py, i10).r,
1433                 imageLoad(shadow_texture_py, i01).r,
1434                 imageLoad(shadow_texture_py, i11).r);
1435         else
1436             pixels = uvec4(
1437                 imageLoad(shadow_texture_ny, i00).r,
1438                 imageLoad(shadow_texture_ny, i10).r,
1439                 imageLoad(shadow_texture_ny, i01).r,
1440                 imageLoad(shadow_texture_ny, i11).r);
1441     } else {
1442         if (c.z > 0.0)
1443             pixels = uvec4(
1444                 imageLoad(shadow_texture_pz, i00).r,
1445                 imageLoad(shadow_texture_pz, i10).r,
1446                 imageLoad(shadow_texture_pz, i01).r,
1447                 imageLoad(shadow_texture_pz, i11).r);
1448         else
1449             pixels = uvec4(
1450                 imageLoad(shadow_texture_nz, i00).r,
1451                 imageLoad(shadow_texture_nz, i10).r,
1452                 imageLoad(shadow_texture_nz, i01).r,
1453                 imageLoad(shadow_texture_nz, i11).r);
1454     }
1455     vec4 s = vec4(
1456         CompareShadow(pixels.x, z),
1457         CompareShadow(pixels.y, z),
1458         CompareShadow(pixels.z, z),
1459         CompareShadow(pixels.w, z));
1460     return vec4(mix2(s, f));
1461 }
1462 
1463 #else
1464 
1465 vec4 shadowTexture(vec2 uv, float w) {
1466     return vec4(1.0);
1467 }
1468 
1469 vec4 shadowTextureCube(vec2 uv, float w) {
1470     return vec4(1.0);
1471 }
1472 
1473 #endif
1474 )";
1475 
1476     if (config.state.proctex.enable)
1477         AppendProcTexSampler(out, config);
1478 
1479     // We round the interpolated primary color to the nearest 1/255th
1480     // This maintains the PICA's 8 bits of precision
1481     out += R"(
1482 void main() {
1483 vec4 rounded_primary_color = byteround(primary_color);
1484 vec4 primary_fragment_color = vec4(0.0);
1485 vec4 secondary_fragment_color = vec4(0.0);
1486 )";
1487 
1488     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
1489     if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
1490         out += "discard; }";
1491         return {std::move(out)};
1492     }
1493 
1494     // Append the scissor test
1495     if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) {
1496         out += "if (";
1497         // Negate the condition if we have to keep only the pixels outside the scissor box
1498         if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) {
1499             out += '!';
1500         }
1501         out += "(gl_FragCoord.x >= float(scissor_x1) && "
1502                "gl_FragCoord.y >= float(scissor_y1) && "
1503                "gl_FragCoord.x < float(scissor_x2) && "
1504                "gl_FragCoord.y < float(scissor_y2))) discard;\n";
1505     }
1506 
1507     // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
1508     // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
1509     // do our own transformation according to PICA specification.
1510     out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"
1511            "float depth = z_over_w * depth_scale + depth_offset;\n";
1512     if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
1513         out += "depth /= gl_FragCoord.w;\n";
1514     }
1515 
1516     if (state.lighting.enable)
1517         WriteLighting(out, config);
1518 
1519     out += "vec4 combiner_buffer = vec4(0.0);\n"
1520            "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"
1521            "vec4 last_tex_env_out = vec4(0.0);\n";
1522 
1523     for (std::size_t index = 0; index < state.tev_stages.size(); ++index) {
1524         WriteTevStage(out, config, static_cast<u32>(index));
1525     }
1526 
1527     if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
1528         out += "if (";
1529         AppendAlphaTestCondition(out, state.alpha_test_func);
1530         out += ") discard;\n";
1531     }
1532 
1533     // Append fog combiner
1534     if (state.fog_mode == TexturingRegs::FogMode::Fog) {
1535         // Get index into fog LUT
1536         if (state.fog_flip) {
1537             out += "float fog_index = (1.0 - float(depth)) * 128.0;\n";
1538         } else {
1539             out += "float fog_index = depth * 128.0;\n";
1540         }
1541 
1542         // Generate clamped fog factor from LUT for given fog index
1543         out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"
1544                "float fog_f = fog_index - fog_i;\n"
1545                "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + "
1546                "fog_lut_offset).rg;\n"
1547                "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"
1548                "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
1549 
1550         // Blend the fog
1551         out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
1552     } else if (state.fog_mode == TexturingRegs::FogMode::Gas) {
1553         Core::System::GetInstance().TelemetrySession().AddField(
1554             Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true);
1555         LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode");
1556         out += "discard; }";
1557         return {std::move(out)};
1558     }
1559 
1560     if (state.shadow_rendering) {
1561         out += R"(
1562 #if ALLOW_SHADOW
1563 uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
1564 uint s = uint(last_tex_env_out.g * float(0xFF));
1565 ivec2 image_coord = ivec2(gl_FragCoord.xy);
1566 
1567 uint old = imageLoad(shadow_buffer, image_coord).x;
1568 uint new;
1569 uint old2;
1570 do {
1571     old2 = old;
1572 
1573     uvec2 ref = DecodeShadow(old);
1574     if (d < ref.x) {
1575         if (s == 0u) {
1576             ref.x = d;
1577         } else {
1578             s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
1579             ref.y = min(s, ref.y);
1580         }
1581     }
1582     new = EncodeShadow(ref);
1583 
1584 } while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2);
1585 #endif // ALLOW_SHADOW
1586 )";
1587     } else {
1588         out += "gl_FragDepth = depth;\n";
1589         // Round the final fragment color to maintain the PICA's 8 bits of precision
1590         out += "color = byteround(last_tex_env_out);\n";
1591     }
1592 
1593     if (GLES) {
1594         if (!state.alphablend_enable) {
1595             switch (state.logic_op) {
1596             case FramebufferRegs::LogicOp::Clear:
1597                 out += "color = vec4(0);\n";
1598                 break;
1599             case FramebufferRegs::LogicOp::Set:
1600                 out += "color = vec4(1);\n";
1601                 break;
1602             case FramebufferRegs::LogicOp::Copy:
1603                 // Take the color output as-is
1604                 break;
1605             case FramebufferRegs::LogicOp::CopyInverted:
1606                 out += "color = ~color;\n";
1607                 break;
1608             case FramebufferRegs::LogicOp::NoOp:
1609                 // We need to discard the color, but not necessarily the depth. This is not possible
1610                 // with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
1611                 break;
1612             default:
1613                 LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
1614                 UNIMPLEMENTED();
1615             }
1616         }
1617     }
1618 
1619     out += '}';
1620 
1621     return {std::move(out)};
1622 }
1623 
GenerateTrivialVertexShader(bool separable_shader)1624 ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) {
1625     std::string out;
1626     if (separable_shader && !GLES) {
1627         out += "#extension GL_ARB_separate_shader_objects : enable\n";
1628     }
1629 
1630     out +=
1631         fmt::format("layout(location = {}) in vec4 vert_position;\n"
1632                     "layout(location = {}) in vec4 vert_color;\n"
1633                     "layout(location = {}) in vec2 vert_texcoord0;\n"
1634                     "layout(location = {}) in vec2 vert_texcoord1;\n"
1635                     "layout(location = {}) in vec2 vert_texcoord2;\n"
1636                     "layout(location = {}) in float vert_texcoord0_w;\n"
1637                     "layout(location = {}) in vec4 vert_normquat;\n"
1638                     "layout(location = {}) in vec3 vert_view;\n",
1639                     ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1,
1640                     ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW);
1641 
1642     out += GetVertexInterfaceDeclaration(true, separable_shader);
1643 
1644     out += UniformBlockDef;
1645 
1646     out += R"(
1647 
1648 void main() {
1649     primary_color = vert_color;
1650     texcoord0 = vert_texcoord0;
1651     texcoord1 = vert_texcoord1;
1652     texcoord2 = vert_texcoord2;
1653     texcoord0_w = vert_texcoord0_w;
1654     normquat = vert_normquat;
1655     view = vert_view;
1656     gl_Position = vert_position;
1657 #if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
1658     gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
1659     gl_ClipDistance[1] = dot(clip_coef, vert_position);
1660 #endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
1661 }
1662 )";
1663 
1664     return {std::move(out)};
1665 }
1666 
GenerateVertexShader(const Pica::Shader::ShaderSetup & setup,const PicaVSConfig & config,bool separable_shader)1667 std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
1668     const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) {
1669     std::string out;
1670     if (separable_shader && !GLES) {
1671         out += "#extension GL_ARB_separate_shader_objects : enable\n";
1672     }
1673 
1674     out += ShaderDecompiler::GetCommonDeclarations();
1675 
1676     std::array<bool, 16> used_regs{};
1677     const auto get_input_reg = [&used_regs](u32 reg) {
1678         ASSERT(reg < 16);
1679         used_regs[reg] = true;
1680         return fmt::format("vs_in_reg{}", reg);
1681     };
1682 
1683     const auto get_output_reg = [&](u32 reg) -> std::string {
1684         ASSERT(reg < 16);
1685         if (config.state.output_map[reg] < config.state.num_outputs) {
1686             return fmt::format("vs_out_attr{}", config.state.output_map[reg]);
1687         }
1688         return "";
1689     };
1690 
1691     auto program_source_opt = ShaderDecompiler::DecompileProgram(
1692         setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg,
1693         get_output_reg, config.state.sanitize_mul);
1694 
1695     if (!program_source_opt)
1696         return std::nullopt;
1697 
1698     std::string& program_source = program_source_opt->code;
1699 
1700     out += R"(
1701 #define uniforms vs_uniforms
1702 layout (std140) uniform vs_config {
1703     pica_uniforms uniforms;
1704 };
1705 
1706 )";
1707     // input attributes declaration
1708     for (std::size_t i = 0; i < used_regs.size(); ++i) {
1709         if (used_regs[i]) {
1710             out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i);
1711         }
1712     }
1713     out += '\n';
1714 
1715     // output attributes declaration
1716     for (u32 i = 0; i < config.state.num_outputs; ++i) {
1717         out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) +
1718                " out vec4 vs_out_attr" + std::to_string(i) + ";\n";
1719     }
1720 
1721     out += "\nvoid main() {\n";
1722     for (u32 i = 0; i < config.state.num_outputs; ++i) {
1723         out += fmt::format("    vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i);
1724     }
1725     out += "\n    exec_shader();\n}\n\n";
1726 
1727     out += program_source;
1728 
1729     return {{std::move(out)}};
1730 }
1731 
GetGSCommonSource(const PicaGSConfigCommonRaw & config,bool separable_shader)1732 static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) {
1733     std::string out = GetVertexInterfaceDeclaration(true, separable_shader);
1734     out += UniformBlockDef;
1735     out += ShaderDecompiler::GetCommonDeclarations();
1736 
1737     out += '\n';
1738     for (u32 i = 0; i < config.vs_output_attributes; ++i) {
1739         out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) +
1740                " in vec4 vs_out_attr" + std::to_string(i) + "[];\n";
1741     }
1742 
1743     out += R"(
1744 struct Vertex {
1745 )";
1746     out += fmt::format("    vec4 attributes[{}];\n", config.gs_output_attributes);
1747     out += "};\n\n";
1748 
1749     const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string {
1750         const u32 slot = static_cast<u32>(slot_semantic);
1751         const u32 attrib = config.semantic_maps[slot].attribute_index;
1752         const u32 comp = config.semantic_maps[slot].component_index;
1753         if (attrib < config.gs_output_attributes) {
1754             return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]);
1755         }
1756         return "0.0";
1757     };
1758 
1759     out += "vec4 GetVertexQuaternion(Vertex vtx) {\n";
1760     out += "    return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " +
1761            semantic(VSOutputAttributes::QUATERNION_Y) + ", " +
1762            semantic(VSOutputAttributes::QUATERNION_Z) + ", " +
1763            semantic(VSOutputAttributes::QUATERNION_W) + ");\n";
1764     out += "}\n\n";
1765 
1766     out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n";
1767     out += "    vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " +
1768            semantic(VSOutputAttributes::POSITION_Y) + ", " +
1769            semantic(VSOutputAttributes::POSITION_Z) + ", " +
1770            semantic(VSOutputAttributes::POSITION_W) + ");\n";
1771     out += "    gl_Position = vtx_pos;\n";
1772     out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
1773     out += "    gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
1774     out += "    gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
1775     out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n";
1776 
1777     out += "    vec4 vtx_quat = GetVertexQuaternion(vtx);\n";
1778     out += "    normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n";
1779 
1780     out += "    vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " +
1781            semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) +
1782            ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n";
1783     out += "    primary_color = min(abs(vtx_color), vec4(1.0));\n\n";
1784 
1785     out += "    texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " +
1786            semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n";
1787     out += "    texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " +
1788            semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n";
1789 
1790     out += "    texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n";
1791     out += "    view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " +
1792            semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) +
1793            ");\n\n";
1794 
1795     out += "    texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " +
1796            semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n";
1797 
1798     out += "    EmitVertex();\n";
1799     out += "}\n";
1800 
1801     out += R"(
1802 bool AreQuaternionsOpposite(vec4 qa, vec4 qb) {
1803     return (dot(qa, qb) < 0.0);
1804 }
1805 
1806 void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
1807     EmitVtx(vtx0, false);
1808     EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1)));
1809     EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2)));
1810     EndPrimitive();
1811 }
1812 )";
1813 
1814     return out;
1815 };
1816 
GenerateFixedGeometryShader(const PicaFixedGSConfig & config,bool separable_shader)1817 ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
1818                                                             bool separable_shader) {
1819     std::string out;
1820     if (separable_shader && !GLES) {
1821         out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
1822     }
1823 
1824     out += R"(
1825 layout(triangles) in;
1826 layout(triangle_strip, max_vertices = 3) out;
1827 
1828 )";
1829 
1830     out += GetGSCommonSource(config.state, separable_shader);
1831 
1832     out += R"(
1833 void main() {
1834     Vertex prim_buffer[3];
1835 )";
1836     for (u32 vtx = 0; vtx < 3; ++vtx) {
1837         out += fmt::format("    prim_buffer[{}].attributes = vec4[{}](", vtx,
1838                            config.state.gs_output_attributes);
1839         for (u32 i = 0; i < config.state.vs_output_attributes; ++i) {
1840             out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx);
1841         }
1842         out += ");\n";
1843     }
1844     out += "    EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n";
1845     out += "}\n";
1846 
1847     return {std::move(out)};
1848 }
1849 } // namespace OpenGL
1850