1 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
2 
3 //  crt-royale: A full-featured CRT shader, with cheese.
4 //  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
5 //
6 //  This program is free software; you can redistribute it and/or modify it
7 //  under the terms of the GNU General Public License as published by the Free
8 //  Software Foundation; either version 2 of the License, or any later version.
9 //
10 //  This program is distributed in the hope that it will be useful, but WITHOUT
11 //  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 //  more details.
14 //
15 //  You should have received a copy of the GNU General Public License along with
16 //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 //  Place, Suite 330, Boston, MA 02111-1307 USA
18 
layout(push_constant)19 layout(push_constant) uniform Push
20 {
21 	vec4 SourceSize;
22 	vec4 OriginalSize;
23 	vec4 OutputSize;
24 	vec4 VERTICAL_SCANLINESSize;
25 	vec4 BLOOM_APPROXSize;
26 	vec4 HALATION_BLURSize;
27 	vec4 MASK_RESIZESize;
28 } params;
29 
30 #define VERTICAL_SCANLINEStexture VERTICAL_SCANLINES
31 #define VERTICAL_SCANLINEStexture_size params.VERTICAL_SCANLINESSize.xy
32 #define VERTICAL_SCANLINESvideo_size params.VERTICAL_SCANLINESSize.xy
33 #define BLOOM_APPROXtexture BLOOM_APPROX
34 #define BLOOM_APPROXtexture_size params.BLOOM_APPROXSize.xy
35 #define BLOOM_APPROXvideo_size params.BLOOM_APPROXSize.xy
36 #define HALATION_BLURtexture HALATION_BLUR
37 #define HALATION_BLURtexture_size params.HALATION_BLURSize.xy
38 #define HALATION_BLURvideo_size params.HALATION_BLURSize.xy
39 #ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
40 	#define MASK_RESIZEtexture Source
41 #else
42 	#define MASK_RESIZEtexture MASK_RESIZE
43 #endif
44 #define MASK_RESIZEtexture_size params.MASK_RESIZESize.xy
45 #define MASK_RESIZEvideo_size params.MASK_RESIZESize.xy
46 
47 float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
48 const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
49 
50 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
51 
52 #include "../../../../include/compat_macros.inc"
53 #include "../user-settings.h"
54 #include "derived-settings-and-constants.h"
55 #include "bind-shader-params.h"
56 
57 
58 ///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
59 
60 #include "scanline-functions.h"
61 #include "phosphor-mask-resizing.h"
62 #include "../../../../include/gamma-management.h"
63 
64 ///////////////////////////////////  HELPERS  //////////////////////////////////
65 
tex2Dtiled_mask_linearize(const sampler2D tex,const float2 tex_uv)66 inline float4 tex2Dtiled_mask_linearize(const sampler2D tex,
67     const float2 tex_uv)
68 {
69     //  If we're manually tiling a texture, anisotropic filtering can get
70     //  confused.  One workaround is to just select the lowest mip level:
71     #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
72         #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
73             //  TODO: Use tex2Dlod_linearize with a calculated mip level.
74             return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
75         #else
76             #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
77                 return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
78             #else
79                 return tex2D_linearize(tex, tex_uv);
80             #endif
81         #endif
82     #else
83         return tex2D_linearize(tex, tex_uv);
84     #endif
85 }
86 
87 #pragma stage vertex
88 layout(location = 0) in vec4 Position;
89 layout(location = 1) in vec2 TexCoord;
90 layout(location = 0) out vec2 video_uv;
91 layout(location = 1) out vec2 scanline_tex_uv;
92 layout(location = 2) out vec2 blur3x3_tex_uv;
93 layout(location = 3) out vec2 halation_tex_uv;
94 layout(location = 4) out vec2 scanline_texture_size_inv;
95 layout(location = 5) out vec4 mask_tile_start_uv_and_size;
96 layout(location = 6) out vec2 mask_tiles_per_screen;
97 
main()98 void main()
99 {
100    gl_Position = global.MVP * Position;
101    float2 tex_uv = TexCoord;
102 	//  Our various input textures use different coords.
103     video_uv = tex_uv * IN.texture_size/IN.video_size;
104     scanline_texture_size_inv =
105         float2(1.0, 1.0)/VERTICAL_SCANLINEStexture_size;
106     //video_uv = video_uv;
107     scanline_tex_uv = video_uv * VERTICAL_SCANLINESvideo_size *
108         scanline_texture_size_inv;
109     blur3x3_tex_uv = video_uv * BLOOM_APPROXvideo_size /
110         BLOOM_APPROXtexture_size;
111     halation_tex_uv = video_uv * HALATION_BLURvideo_size /
112         HALATION_BLURtexture_size;
113     //scanline_texture_size_inv = scanline_texture_size_inv;
114 
115     //  Get a consistent name for the final mask texture size.  Sample mode 0
116     //  uses the manually resized mask, but ignore it if we never resized.
117     #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
118         const float mask_sample_mode = get_mask_sample_mode();
119         const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
120             MASK_RESIZEtexture_size : mask_texture_large_size;
121         const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
122             MASK_RESIZEvideo_size : mask_texture_large_size;
123     #else
124         const float2 mask_resize_texture_size = mask_texture_large_size;
125         const float2 mask_resize_video_size = mask_texture_large_size;
126     #endif
127     //  Compute mask tile dimensions, starting points, etc.:
128     //float2 mask_tiles_per_screen;
129     mask_tile_start_uv_and_size = get_mask_sampling_parameters(
130         mask_resize_texture_size, mask_resize_video_size, IN.output_size,
131         mask_tiles_per_screen);
132     //mask_tiles_per_screen = mask_tiles_per_screen;
133 }
134 
135 #pragma stage fragment
136 layout(location = 0) in vec2 video_uv;
137 layout(location = 1) in vec2 scanline_tex_uv;
138 layout(location = 2) in vec2 blur3x3_tex_uv;
139 layout(location = 3) in vec2 halation_tex_uv;
140 layout(location = 4) in vec2 scanline_texture_size_inv;
141 layout(location = 5) in vec4 mask_tile_start_uv_and_size;
142 layout(location = 6) in vec2 mask_tiles_per_screen;
143 layout(location = 0) out vec4 FragColor;
144 layout(set = 0, binding = 2) uniform sampler2D Source;
145 layout(set = 0, binding = 3) uniform sampler2D mask_grille_texture_large;
146 layout(set = 0, binding = 4) uniform sampler2D mask_slot_texture_large;
147 layout(set = 0, binding = 5) uniform sampler2D mask_shadow_texture_large;
148 layout(set = 0, binding = 6) uniform sampler2D VERTICAL_SCANLINES;
149 layout(set = 0, binding = 7) uniform sampler2D BLOOM_APPROX;
150 layout(set = 0, binding = 8) uniform sampler2D HALATION_BLUR;
151 #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
152 layout(set = 0, binding = 9) uniform sampler2D MASK_RESIZE;
153 #endif
154 
155 //////////////////////////////  FRAGMENT INCLUDES  //////////////////////////////
156 
157 #include "bloom-functions.h"
158 
main()159 void main()
160 {
161     //  This pass: Sample (misconverged?) scanlines to the final horizontal
162     //  resolution, apply halation (bouncing electrons), and apply the phosphor
163     //  mask.  Fake a bloom if requested.  Unless we fake a bloom, the output
164     //  will be dim from the scanline auto-dim, mask dimming, and low gamma.
165 
166     //  Horizontally sample the current row (a vertically interpolated scanline)
167     //  and account for horizontal convergence offsets, given in units of texels.
168     const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
169         VERTICAL_SCANLINEStexture, scanline_tex_uv,
170         VERTICAL_SCANLINEStexture_size, scanline_texture_size_inv);
171     const float auto_dim_factor = levels_autodim_temp;
172 
173     //  Sample the phosphor mask:
174     const float2 tile_uv_wrap = video_uv * mask_tiles_per_screen;
175     const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
176         tile_uv_wrap, mask_tile_start_uv_and_size);
177     float3 phosphor_mask_sample;
178     #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
179         const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
180     #else
181         static const bool sample_orig_luts = true;
182     #endif
183     if(sample_orig_luts)
184     {
185         //  If mask_type is static, this branch will be resolved statically.
186         if(mask_type < 0.5)
187         {
188             phosphor_mask_sample = tex2D_linearize(
189                 mask_grille_texture_large, mask_tex_uv).rgb;
190         }
191         else if(mask_type < 1.5)
192         {
193             phosphor_mask_sample = tex2D_linearize(
194                 mask_slot_texture_large, mask_tex_uv).rgb;
195         }
196         else
197         {
198             phosphor_mask_sample = tex2D_linearize(
199                 mask_shadow_texture_large, mask_tex_uv).rgb;
200         }
201     }
202     else
203     {
204         //  Sample the resized mask, and avoid tiling artifacts:
205         phosphor_mask_sample = tex2Dtiled_mask_linearize(
206             MASK_RESIZEtexture, mask_tex_uv).rgb;
207     }
208 
209     //  Sample the halation texture (auto-dim to match the scanlines), and
210     //  account for both horizontal and vertical convergence offsets, given
211     //  in units of texels horizontally and same-field scanlines vertically:
212     const float3 halation_color = tex2D_linearize(
213         HALATION_BLURtexture, halation_tex_uv).rgb;
214 
215     //  Apply halation: Halation models electrons flying around under the glass
216     //  and hitting the wrong phosphors (of any color).  It desaturates, so
217     //  average the halation electrons to a scalar.  Reduce the local scanline
218     //  intensity accordingly to conserve energy.
219     const float3 halation_intensity_dim =
220         float3(dot(halation_color, float3(auto_dim_factor/3.0)));
221     const float3 electron_intensity_dim = lerp(scanline_color_dim,
222         halation_intensity_dim, global.halation_weight);
223 
224     //  Apply the phosphor mask:
225     const float3 phosphor_emission_dim = electron_intensity_dim *
226         phosphor_mask_sample;
227 
228     #ifdef PHOSPHOR_BLOOM_FAKE
229         //  The BLOOM_APPROX pass approximates a blurred version of a masked
230         //  and scanlined image.  It's usually used to compute the brightpass,
231         //  but we can also use it to fake the bloom stage entirely.  Caveats:
232         //  1.) A fake bloom is conceptually different, since we're mixing in a
233         //      fully blurred low-res image, and the biggest implication are:
234         //  2.) If mask_amplify is incorrect, results deteriorate more quickly.
235         //  3.) The inaccurate blurring hurts quality in high-contrast areas.
236         //  4.) The bloom_underestimate_levels parameter seems less sensitive.
237         //  Reverse the auto-dimming and amplify to compensate for mask dimming:
238 		#define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
239         #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
240             static const float blur_contrast = 1.05;
241         #else
242             static const float blur_contrast = 1.0;
243         #endif
244         const float mask_amplify = get_mask_amplify();
245         const float undim_factor = 1.0/auto_dim_factor;
246         const float3 phosphor_emission =
247             phosphor_emission_dim * undim_factor * mask_amplify;
248         //  Get a phosphor blur estimate, accounting for convergence offsets:
249         const float3 electron_intensity = electron_intensity_dim * undim_factor;
250         const float3 phosphor_blur_approx_soft = tex2D_linearize(
251             BLOOM_APPROXtexture, blur3x3_tex_uv).rgb;
252         const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
253             electron_intensity, 0.1) * blur_contrast;
254         //  We could blend between phosphor_emission and phosphor_blur_approx,
255         //  solving for the minimum blend_ratio that avoids clipping past 1.0:
256         //      1.0 >= total_intensity
257         //      1.0 >= phosphor_emission * (1.0 - blend_ratio) +
258         //              phosphor_blur_approx * blend_ratio
259         //      blend_ratio = (phosphor_emission - 1.0)/
260         //          (phosphor_emission - phosphor_blur_approx);
261         //  However, this blurs far more than necessary, because it aims for
262         //  full brightness, not minimal blurring.  To fix it, base blend_ratio
263         //  on a max area intensity only so it varies more smoothly:
264         const float3 phosphor_blur_underestimate =
265             phosphor_blur_approx * bloom_underestimate_levels;
266         const float3 area_max_underestimate =
267             phosphor_blur_underestimate * mask_amplify;
268         #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
269             const float3 blend_ratio_temp =
270                 (area_max_underestimate - float3(1.0, 1.0, 1.0)) /
271                 (area_max_underestimate - phosphor_blur_underestimate);
272         #else
273             //  Try doing it like an area-based brightpass.  This is nearly
274             //  identical, but it's worth toying with the code in case I ever
275             //  find a way to make it look more like a real bloom.  (I've had
276             //  some promising textures from combining an area-based blend ratio
277             //  for the phosphor blur and a more brightpass-like blend-ratio for
278             //  the phosphor emission, but I haven't found a way to make the
279             //  brightness correct across the whole color range, especially with
280             //  different bloom_underestimate_levels values.)
281             const float desired_triad_size = lerp(global.mask_triad_size_desired,
282                 IN.output_size.x/global.mask_num_triads_desired,
283                 global.mask_specify_num_triads);
284             const float bloom_sigma = get_min_sigma_to_blur_triad(
285                 desired_triad_size, bloom_diff_thresh);
286             const float center_weight = get_center_weight(bloom_sigma);
287             const float3 max_area_contribution_approx =
288                 max(float3(0.0, 0.0, 0.0), phosphor_blur_approx -
289                 center_weight * phosphor_emission);
290             const float3 area_contrib_underestimate =
291                 bloom_underestimate_levels * max_area_contribution_approx;
292             const float3 blend_ratio_temp =
293                 ((float3(1.0, 1.0, 1.0) - area_contrib_underestimate) /
294                 area_max_underestimate - float3(1.0, 1.0, 1.0)) / (center_weight - 1.0);
295         #endif
296         //  Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
297         //  min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
298         //  and this redundant sequence avoids bugs, at least on nVidia cards:
299         const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
300         const float3 blend_ratio = lerp(blend_ratio_clamped, float3(1.0,1.0,1.0), global.bloom_excess);
301         //  Blend the blurred and unblurred images:
302         const float3 phosphor_emission_unclipped =
303             lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
304         //  Simulate refractive diffusion by reusing the halation sample.
305         const float3 pixel_color = lerp(phosphor_emission_unclipped,
306             halation_color, global.diffusion_weight);
307     #else
308         const float3 pixel_color = phosphor_emission_dim;
309     #endif
310     //  Encode if necessary, and output.
311     FragColor = encode_output(float4(pixel_color, 1.0));
312 }
313