1 #ifndef VRAM_H
2 #define VRAM_H
3 
4 layout(location = 1) in mediump vec2 vUV;
5 layout(location = 2) flat in mediump ivec3 vParam;
6 layout(location = 3) flat in mediump ivec2 vBaseUV;
7 layout(location = 4) flat in mediump ivec4 vWindow;
8 layout(location = 5) flat in mediump ivec4 vTexLimits;
9 #if defined(UNSCALED)
10 layout(set = 0, binding = 0) uniform mediump usampler2D uFramebuffer;
11 #else
12 layout(constant_id = 3) const int SCALE = 1;
13 #if defined(MSAA)
14 layout(set = 0, binding = 0) uniform mediump sampler2DMS uFramebufferMS;
15 #else
16 layout(set = 0, binding = 0) uniform mediump sampler2D uFramebuffer;
17 #endif
18 #endif
19 layout(constant_id = 4) const int SHIFT = 0;
20 
clamp_coord(vec2 coord)21 vec2 clamp_coord(vec2 coord)
22 {
23 	return clamp(coord.xy, vec2(vTexLimits.xy), vec2(vTexLimits.zw));
24 }
25 
26 // Nearest neighbor
sample_vram_atlas(vec2 uvv)27 vec4 sample_vram_atlas(vec2 uvv)
28 {
29     const vec2 FB_SIZE = vec2(1024, 512);
30     const ivec2 FB_MASK = ivec2(1023, 511);
31     ivec3 params = vParam;
32     // int shift = params.z & 3;
33     const int shift = SHIFT;
34 
35 #if defined(UNSCALED)
36     ivec2 coord;
37 #else
38     vec2 coord;
39 #endif
40     if (shift != 0)
41     {
42         int bpp = 16 >> shift;
43         ivec2 uv = (ivec2(uvv) & vWindow.xy) | vWindow.zw;
44         int phase = uv.x & ((1 << shift) - 1);
45         int align = bpp * phase;
46         uv.x >>= shift;
47 #if defined(UNSCALED)
48         int value = int(texelFetch(uFramebuffer, (vBaseUV + uv) & FB_MASK, 0).x);
49 #else
50         uv = ivec2(mod((vBaseUV + uv), FB_SIZE));
51 #if defined(MSAA)
52         int value = int(pack_abgr1555(texelFetch(uFramebufferMS, uv * SCALE, gl_SampleID)));
53 #else
54         int value = int(pack_abgr1555(texelFetch(uFramebuffer, uv * SCALE, 0)));
55 #endif
56 #endif
57         int mask = (1 << bpp) - 1;
58         value = (value >> align) & mask;
59 
60         params.x += value;
61         coord = params.xy;
62     }
63     else
64 #if defined(UNSCALED)
65         coord = vBaseUV + ivec2(uvv);
66 #else
67         coord = vBaseUV + uvv;
68 #endif
69 
70 #if defined(UNSCALED)
71     return abgr1555(texelFetch(uFramebuffer, coord & FB_MASK, 0).x);
72 #elif defined(MSAA)
73     return texelFetch(uFramebufferMS, ivec2(mod(coord, FB_SIZE) * SCALE), gl_SampleID);
74 #else
75     return texelFetch(uFramebuffer, ivec2(mod(coord, FB_SIZE) * SCALE), 0);
76 #endif
77 }
78 
79 // Take a normalized color and convert it into a 16bit 1555 ABGR
80 // integer in the format used internally by the Playstation GPU.
rebuild_psx_color(vec4 color)81 uint rebuild_psx_color(vec4 color) {
82   uint a = uint(floor(color.a + 0.5));
83   uint r = uint(floor(color.r * 31. + 0.5));
84   uint g = uint(floor(color.g * 31. + 0.5));
85   uint b = uint(floor(color.b * 31. + 0.5));
86 
87   return (a << 15) | (b << 10) | (g << 5) | r;
88 }
89 
90 // Texture color 0x0000 is special in the Playstation GPU, it denotes
91 // a fully transparent texel (even for opaque draw commands). If you
92 // want black you have to use an opaque draw command and use `0x8000`
93 // instead.
is_transparent(vec4 texel)94 bool is_transparent(vec4 texel)
95 {
96 	return rebuild_psx_color(texel) == 0U;
97 }
98 
99 #ifdef FILTERS
sample_vram_bilinear(out float opacity)100 vec4 sample_vram_bilinear(out float opacity)
101 {
102   float x = vUV.x;
103   float y = vUV.y;
104 
105   // interpolate from centre of texel
106   vec2 uv_frac = fract(vec2(x, y)) - vec2(0.5, 0.5);
107   vec2 uv_offs = sign(uv_frac);
108   uv_frac = abs(uv_frac);
109 
110   // sample 4 nearest texels
111   vec4 texel_00 = sample_vram_atlas(clamp_coord(vec2(x + 0., y + 0.)));
112   vec4 texel_10 = sample_vram_atlas(clamp_coord(vec2(x + uv_offs.x, y + 0.)));
113   vec4 texel_01 = sample_vram_atlas(clamp_coord(vec2(x + 0., y + uv_offs.y)));
114   vec4 texel_11 = sample_vram_atlas(clamp_coord(vec2(x + uv_offs.x, y + uv_offs.y)));
115 
116   // test for fully transparent texel
117   texel_00.w = 1. - float(is_transparent(texel_00));
118   texel_10.w = 1. - float(is_transparent(texel_10));
119   texel_01.w = 1. - float(is_transparent(texel_01));
120   texel_11.w = 1. - float(is_transparent(texel_11));
121 
122    // average samples
123    vec4 texel = texel_00 * (1. - uv_frac.x) * (1. - uv_frac.y)
124      + texel_10 * uv_frac.x * (1. - uv_frac.y)
125      + texel_01 * (1. - uv_frac.x) * uv_frac.y
126      + texel_11 * uv_frac.x * uv_frac.y;
127 
128    opacity = texel.w;
129 
130    // adjust colour to account for black transparent samples (assume rgb would be average of other pixels)
131    texel.rgb = texel.rgb * (1./opacity);
132 
133    return texel;
134 }
135 
136 const int BLEND_NONE = 0;
137 const int BLEND_NORMAL = 1;
138 const int BLEND_DOMINANT = 2;
139 const float LUMINANCE_WEIGHT = 1.0;
140 const float EQUAL_COLOR_TOLERANCE = 0.1176470588235294;
141 const float STEEP_DIRECTION_THRESHOLD = 2.2;
142 const float DOMINANT_DIRECTION_THRESHOLD = 3.6;
143 const vec4 w = vec4(0.2627, 0.6780, 0.0593, 0.5);
144 
DistYCbCr(vec4 pixA,vec4 pixB)145 float DistYCbCr(vec4 pixA, vec4 pixB)
146 {
147   const float scaleB = 0.5 / (1.0 - w.b);
148   const float scaleR = 0.5 / (1.0 - w.r);
149   vec4 diff = pixA - pixB;
150   float Y = dot(diff, w);
151   float Cb = scaleB * (diff.b - Y);
152   float Cr = scaleR * (diff.r - Y);
153 
154   return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr));
155 }
156 
IsPixEqual(const vec4 pixA,const vec4 pixB)157 bool IsPixEqual(const vec4 pixA, const vec4 pixB)
158 {
159   return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE);
160 }
161 
get_left_ratio(vec2 center,vec2 origin,vec2 direction,vec2 scale)162 float get_left_ratio(vec2 center, vec2 origin, vec2 direction, vec2 scale)
163 {
164   vec2 P0 = center - origin;
165   vec2 proj = direction * (dot(P0, direction) / dot(direction, direction));
166   vec2 distv = P0 - proj;
167   vec2 orth = vec2(-direction.y, direction.x);
168   float side = sign(dot(P0, orth));
169   float v = side * length(distv * scale);
170 
171 //  return step(0, v);
172   return smoothstep(-sqrt(2.0)/2.0, sqrt(2.0)/2.0, v);
173 }
174 
eq(vec4 a,vec4 b)175 bool eq(vec4 a, vec4 b){
176    return (a == b);
177 }
178 
neq(vec4 a,vec4 b)179 bool neq(vec4 a, vec4 b){
180    return (a != b);
181 }
182 
P(vec2 coord,int x,int y)183 vec4 P(vec2 coord, int x, int y){
184    return sample_vram_atlas(clamp_coord(coord + vec2(x, y)));
185 }
186 
sample_vram_xbr(out float opacity)187 vec4 sample_vram_xbr(out float opacity)
188 {
189   //---------------------------------------
190   // Input Pixel Mapping:  -|x|x|x|-
191   //                       x|A|B|C|x
192   //                       x|D|E|F|x
193   //                       x|G|H|I|x
194   //                       -|x|x|x|-
195 
196   vec2 scale = vec2(8.0);
197   vec2 pos = fract(vUV.xy) - vec2(0.5, 0.5);
198   vec2 coord = vUV.xy - pos;
199 
200   vec4 A = P(coord, -1,-1);
201   A.w = 1. - float(is_transparent(A));
202   vec4 B = P(coord,  0,-1);
203   B.w = 1. - float(is_transparent(B));
204   vec4 C = P(coord,  1,-1);
205   C.w = 1. - float(is_transparent(C));
206   vec4 D = P(coord, -1, 0);
207   D.w = 1. - float(is_transparent(D));
208   vec4 E = P(coord, 0, 0);
209   E.w = 1. - float(is_transparent(E));
210   vec4 F = P(coord,  1, 0);
211   F.w = 1. - float(is_transparent(F));
212   vec4 G = P(coord, -1, 1);
213   G.w = 1. - float(is_transparent(G));
214   vec4 H = P(coord,  0, 1);
215   H.w = 1. - float(is_transparent(H));
216   vec4 I = P(coord,  1, 1);
217   I.w = 1. - float(is_transparent(I));
218 
219   // blendResult Mapping: x|y|
220   //                      w|z|
221   ivec4 blendResult = ivec4(BLEND_NONE,BLEND_NONE,BLEND_NONE,BLEND_NONE);
222 
223   // Preprocess corners
224   // Pixel Tap Mapping: -|-|-|-|-
225   //                    -|-|B|C|-
226   //                    -|D|E|F|x
227   //                    -|G|H|I|x
228   //                    -|-|x|x|-
229   if (!((eq(E,F) && eq(H,I)) || (eq(E,H) && eq(F,I))))
230   {
231     float dist_H_F = DistYCbCr(G, E) + DistYCbCr(E, C) + DistYCbCr(P(coord, 0,2), I) + DistYCbCr(I, P(coord, 2,0)) + (4.0 * DistYCbCr(H, F));
232     float dist_E_I = DistYCbCr(D, H) + DistYCbCr(H, P(coord, 1,2)) + DistYCbCr(B, F) + DistYCbCr(F, P(coord, 2,1)) + (4.0 * DistYCbCr(E, I));
233     bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_H_F) < dist_E_I;
234     blendResult.z = ((dist_H_F < dist_E_I) && neq(E,F) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
235   }
236 
237 
238   // Pixel Tap Mapping: -|-|-|-|-
239   //                    -|A|B|-|-
240   //                    x|D|E|F|-
241   //                    x|G|H|I|-
242   //                    -|x|x|-|-
243   if (!((eq(D,E) && eq(G,H)) || (eq(D,G) && eq(E,H))))
244   {
245     float dist_G_E = DistYCbCr(P(coord, -2,1)  , D) + DistYCbCr(D, B) + DistYCbCr(P(coord, -1,2), H) + DistYCbCr(H, F) + (4.0 * DistYCbCr(G, E));
246     float dist_D_H = DistYCbCr(P(coord, -2,0)  , G) + DistYCbCr(G, P(coord, 0,2)) + DistYCbCr(A, E) + DistYCbCr(E, I) + (4.0 * DistYCbCr(D, H));
247     bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_H) < dist_G_E;
248     blendResult.w = ((dist_G_E > dist_D_H) && neq(E,D) && neq(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
249   }
250 
251   // Pixel Tap Mapping: -|-|x|x|-
252   //                    -|A|B|C|x
253   //                    -|D|E|F|x
254   //                    -|-|H|I|-
255   //                    -|-|-|-|-
256   if (!((eq(B,C) && eq(E,F)) || (eq(B,E) && eq(C,F))))
257   {
258     float dist_E_C = DistYCbCr(D, B) + DistYCbCr(B, P(coord, 1,-2)) + DistYCbCr(H, F) + DistYCbCr(F, P(coord, 2,-1)) + (4.0 * DistYCbCr(E, C));
259     float dist_B_F = DistYCbCr(A, E) + DistYCbCr(E, I) + DistYCbCr(P(coord, 0,-2), C) + DistYCbCr(C, P(coord, 2,0)) + (4.0 * DistYCbCr(B, F));
260     bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_B_F) < dist_E_C;
261     blendResult.y = ((dist_E_C > dist_B_F) && neq(E,B) && neq(E,F)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
262   }
263 
264   // Pixel Tap Mapping: -|x|x|-|-
265   //                    x|A|B|C|-
266   //                    x|D|E|F|-
267   //                    -|G|H|-|-
268   //                    -|-|-|-|-
269   if (!((eq(A,B) && eq(D,E)) || (eq(A,D) && eq(B,E))))
270   {
271     float dist_D_B = DistYCbCr(P(coord, -2,0), A) + DistYCbCr(A, P(coord, 0,-2)) + DistYCbCr(G, E) + DistYCbCr(E, C) + (4.0 * DistYCbCr(D, B));
272     float dist_A_E = DistYCbCr(P(coord, -2,-1), D) + DistYCbCr(D, H) + DistYCbCr(P(coord, -1,-2), B) + DistYCbCr(B, F) + (4.0 * DistYCbCr(A, E));
273     bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_B) < dist_A_E;
274     blendResult.x = ((dist_D_B < dist_A_E) && neq(E,D) && neq(E,B)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
275   }
276 
277   vec4 res = E;
278 
279   // Pixel Tap Mapping: -|-|-|-|-
280   //                    -|-|B|C|-
281   //                    -|D|E|F|x
282   //                    -|G|H|I|x
283   //                    -|-|x|x|-
284   if(blendResult.z != BLEND_NONE)
285   {
286     float dist_F_G = DistYCbCr(F, G);
287     float dist_H_C = DistYCbCr(H, C);
288     bool doLineBlend = (blendResult.z == BLEND_DOMINANT ||
289                 !((blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || (blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) ||
290                   (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I))));
291 
292     vec2 origin = vec2(0.0, 1.0 / sqrt(2.0));
293     vec2 direction = vec2(1.0, -1.0);
294     if(doLineBlend)
295     {
296       bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && neq(E,G) && neq(D,G);
297       bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && neq(E,C) && neq(B,C);
298       origin = haveShallowLine? vec2(0.0, 0.25) : vec2(0.0, 0.5);
299       direction.x += haveShallowLine? 1.0: 0.0;
300       direction.y -= haveSteepLine? 1.0: 0.0;
301     }
302 
303     vec4 blendPix = mix(H,F, step(DistYCbCr(E, F), DistYCbCr(E, H)));
304     res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
305   }
306 
307   // Pixel Tap Mapping: -|-|-|-|-
308   //                    -|A|B|-|-
309   //                    x|D|E|F|-
310   //                    x|G|H|I|-
311   //                    -|x|x|-|-
312   if(blendResult.w != BLEND_NONE)
313   {
314     float dist_H_A = DistYCbCr(H, A);
315     float dist_D_I = DistYCbCr(D, I);
316     bool doLineBlend = (blendResult.w == BLEND_DOMINANT ||
317                 !((blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || (blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) ||
318                   (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G))));
319 
320     vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0);
321     vec2 direction = vec2(1.0, 1.0);
322     if(doLineBlend)
323     {
324       bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && neq(E,A) && neq(B,A);
325       bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && neq(E,I) && neq(F,I);
326       origin = haveShallowLine? vec2(-0.25, 0.0) : vec2(-0.5, 0.0);
327       direction.y += haveShallowLine? 1.0: 0.0;
328       direction.x += haveSteepLine? 1.0: 0.0;
329     }
330     origin = origin;
331     direction = direction;
332 
333     vec4 blendPix = mix(H,D, step(DistYCbCr(E, D), DistYCbCr(E, H)));
334     res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
335   }
336 
337   // Pixel Tap Mapping: -|-|x|x|-
338   //                    -|A|B|C|x
339   //                    -|D|E|F|x
340   //                    -|-|H|I|-
341   //                    -|-|-|-|-
342   if(blendResult.y != BLEND_NONE)
343   {
344     float dist_B_I = DistYCbCr(B, I);
345     float dist_F_A = DistYCbCr(F, A);
346     bool doLineBlend = (blendResult.y == BLEND_DOMINANT ||
347                 !((blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || (blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) ||
348                   (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C))));
349 
350     vec2 origin = vec2(1.0 / sqrt(2.0), 0.0);
351     vec2 direction = vec2(-1.0, -1.0);
352 
353     if(doLineBlend)
354     {
355       bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && neq(E,I) && neq(H,I);
356       bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && neq(E,A) && neq(D,A);
357       origin = haveShallowLine? vec2(0.25, 0.0) : vec2(0.5, 0.0);
358       direction.y -= haveShallowLine? 1.0: 0.0;
359       direction.x -= haveSteepLine? 1.0: 0.0;
360     }
361 
362     vec4 blendPix = mix(F,B, step(DistYCbCr(E, B), DistYCbCr(E, F)));
363     res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
364   }
365 
366   // Pixel Tap Mapping: -|x|x|-|-
367   //                    x|A|B|C|-
368   //                    x|D|E|F|-
369   //                    -|G|H|-|-
370   //                    -|-|-|-|-
371   if(blendResult.x != BLEND_NONE)
372   {
373     float dist_D_C = DistYCbCr(D, C);
374     float dist_B_G = DistYCbCr(B, G);
375     bool doLineBlend = (blendResult.x == BLEND_DOMINANT ||
376                 !((blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || (blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) ||
377                   (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A))));
378 
379     vec2 origin = vec2(0.0, -1.0 / sqrt(2.0));
380     vec2 direction = vec2(-1.0, 1.0);
381     if(doLineBlend)
382     {
383       bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && neq(E,C) && neq(F,C);
384       bool haveSteepLine  = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && neq(E,G) && neq(H,G);
385       origin = haveShallowLine? vec2(0.0, -0.25) : vec2(0.0, -0.5);
386       direction.x -= haveShallowLine? 1.0: 0.0;
387       direction.y += haveSteepLine? 1.0: 0.0;
388     }
389 
390     vec4 blendPix = mix(D,B, step(DistYCbCr(E, B), DistYCbCr(E, D)));
391     res = mix(res, blendPix, get_left_ratio(pos, origin, direction, scale));
392   }
393 
394     opacity = res.w;
395     res.xyz = res.xyz * (1./opacity);
396     return vec4(res);
397 }
398 
399 // constants and functions for sabr
400 const  vec4 Ai  = vec4( 1.0, -1.0, -1.0,  1.0);
401 const  vec4 B45 = vec4( 1.0,  1.0, -1.0, -1.0);
402 const  vec4 C45 = vec4( 1.5,  0.5, -0.5,  0.5);
403 const  vec4 B30 = vec4( 0.5,  2.0, -0.5, -2.0);
404 const  vec4 C30 = vec4( 1.0,  1.0, -0.5,  0.0);
405 const  vec4 B60 = vec4( 2.0,  0.5, -2.0, -0.5);
406 const  vec4 C60 = vec4( 2.0,  0.0, -1.0,  0.5);
407 
408 const  vec4 M45 = vec4(0.4, 0.4, 0.4, 0.4);
409 const  vec4 M30 = vec4(0.2, 0.4, 0.2, 0.4);
410 const  vec4 M60 = M30.yxwz;
411 const  vec4 Mshift = vec4(0.2, 0.2, 0.2, 0.2);
412 
413 const  vec4 threshold = vec4(0.32, 0.32, 0.32, 0.32);
414 
415 const  vec4 lum = vec4(0.21, 0.72, 0.07, 1.0);
416 
lum_to(vec4 v0,vec4 v1,vec4 v2,vec4 v3)417 vec4 lum_to(vec4 v0, vec4 v1, vec4 v2, vec4 v3) {
418 	return vec4(dot(lum, v0), dot(lum, v1), dot(lum, v2), dot(lum, v3));
419 }
420 
lum_df(vec4 A,vec4 B)421 vec4 lum_df(vec4 A, vec4 B) {
422 	return abs(A - B);
423 }
424 
lum_eq(vec4 A,vec4 B)425 bvec4 lum_eq(vec4 A, vec4 B) {
426 	return lessThan(lum_df(A, B) , vec4(threshold));
427 }
428 
lum_wd(vec4 a,vec4 b,vec4 c,vec4 d,vec4 e,vec4 f,vec4 g,vec4 h)429 vec4 lum_wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) {
430 	return lum_df(a, b) + lum_df(a, c) + lum_df(d, e) + lum_df(d, f) + 4.0 * lum_df(g, h);
431 }
432 
c_df(vec4 c1,vec4 c2)433 float c_df(vec4 c1, vec4 c2) {
434 	vec4 df = abs(c1 - c2);
435 	return df.r + df.g + df.b;
436 }
437 
P(vec2 coord)438 vec4 P(vec2 coord){
439    return sample_vram_atlas(clamp_coord(coord));
440 }
441 
442 //sabr
sample_vram_sabr(out float opacity)443 vec4 sample_vram_sabr(out float opacity)
444 {
445 	vec2 tc = vUV.xy;
446    vec4 xyp_1_2_3    = tc.xxxy + vec4(-1,  0, 1, -2);
447    vec4 xyp_6_7_8    = tc.xxxy + vec4(-1,  0, 1, -1);
448    vec4 xyp_11_12_13 = tc.xxxy + vec4(-1,  0, 1,  0);
449    vec4 xyp_16_17_18 = tc.xxxy + vec4(-1,  0, 1,  1);
450    vec4 xyp_21_22_23 = tc.xxxy + vec4(-1,  0, 1,  2);
451    vec4 xyp_5_10_15  = tc.xyyy + vec4(-2, -1, 0,  1);
452    vec4 xyp_9_14_9   = tc.xyyy + vec4( 2, -1, 0,  1);
453 
454 	// Store mask values
455 	vec4 P1  = P(xyp_1_2_3.xw   );
456 	P1.w = 1. - float(is_transparent(P1));
457 	vec4 P2  = P(xyp_1_2_3.yw   );
458 	P2.w = 1. - float(is_transparent(P2));
459 	vec4 P3  = P(xyp_1_2_3.zw   );
460 	P3.w = 1. - float(is_transparent(P3));
461 
462 	vec4 P6  = P(xyp_6_7_8.xw   );
463 	P6.w = 1. - float(is_transparent(P6));
464 	vec4 P7  = P(xyp_6_7_8.yw   );
465 	P7.w = 1. - float(is_transparent(P7));
466 	vec4 P8  = P(xyp_6_7_8.zw   );
467 	P8.w = 1. - float(is_transparent(P8));
468 
469 	vec4 P11 = P(xyp_11_12_13.xw);
470 	P11.w = 1. - float(is_transparent(P11));
471 	vec4 P12 = P(xyp_11_12_13.yw);
472 	P12.w = 1. - float(is_transparent(P12));
473 	vec4 P13 = P(xyp_11_12_13.zw);
474 	P13.w = 1. - float(is_transparent(P13));
475 
476 	vec4 P16 = P(xyp_16_17_18.xw);
477 	P16.w = 1. - float(is_transparent(P16));
478 	vec4 P17 = P(xyp_16_17_18.yw);
479 	P17.w = 1. - float(is_transparent(P17));
480 	vec4 P18 = P(xyp_16_17_18.zw);
481 	P18.w = 1. - float(is_transparent(P18));
482 
483 	vec4 P21 = P(xyp_21_22_23.xw);
484 	P21.w = 1. - float(is_transparent(P21));
485 	vec4 P22 = P(xyp_21_22_23.yw);
486 	P22.w = 1. - float(is_transparent(P22));
487 	vec4 P23 = P(xyp_21_22_23.zw);
488 	P23.w = 1. - float(is_transparent(P23));
489 
490 	vec4 P5  = P(xyp_5_10_15.xy );
491 	P5.w = 1. - float(is_transparent(P5));
492 	vec4 P10 = P(xyp_5_10_15.xz );
493 	P10.w = 1. - float(is_transparent(P10));
494 	vec4 P15 = P(xyp_5_10_15.xw );
495 	P15.w = 1. - float(is_transparent(P15));
496 
497 	vec4 P9  = P(xyp_9_14_9.xy  );
498 	P9.w = 1. - float(is_transparent(P9));
499 	vec4 P14 = P(xyp_9_14_9.xz  );
500 	P14.w = 1. - float(is_transparent(P14));
501 	vec4 P19 = P(xyp_9_14_9.xw  );
502 	P19.w = 1. - float(is_transparent(P19));
503 
504 // Store luminance values of each point
505 	vec4 p7  = lum_to(P7,  P11, P17, P13);
506 	vec4 p8  = lum_to(P8,  P6,  P16, P18);
507 	vec4 p11 = p7.yzwx;                      // P11, P17, P13, P7
508 	vec4 p12 = lum_to(P12, P12, P12, P12);
509 	vec4 p13 = p7.wxyz;                      // P13, P7,  P11, P17
510 	vec4 p14 = lum_to(P14, P2,  P10, P22);
511 	vec4 p16 = p8.zwxy;                      // P16, P18, P8,  P6
512 	vec4 p17 = p7.zwxy;                      // P11, P17, P13, P7
513 	vec4 p18 = p8.wxyz;                      // P18, P8,  P6,  P16
514 	vec4 p19 = lum_to(P19, P3,  P5,  P21);
515 	vec4 p22 = p14.wxyz;                     // P22, P14, P2,  P10
516 	vec4 p23 = lum_to(P23, P9,  P1,  P15);
517 
518 	vec2 fp = fract(tc);
519 
520 	vec4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x);
521 	vec4 ma30 = smoothstep(C30 - M30, C30 + M30, Ai * fp.y + B30 * fp.x);
522 	vec4 ma60 = smoothstep(C60 - M60, C60 + M60, Ai * fp.y + B60 * fp.x);
523 	vec4 marn = smoothstep(C45 - M45 + Mshift, C45 + M45 + Mshift, Ai * fp.y + B45 * fp.x);
524 
525 	vec4 e45   = lum_wd(p12, p8, p16, p18, p22, p14, p17, p13);
526 	vec4 econt = lum_wd(p17, p11, p23, p13, p7, p19, p12, p18);
527 	vec4 e30   = lum_df(p13, p16);
528 	vec4 e60   = lum_df(p8, p17);
529 
530    vec4 final45 = vec4(1.0);
531 	vec4 final30 = vec4(0.0);
532 	vec4 final60 = vec4(0.0);
533 	vec4 final36 = vec4(0.0);
534 	vec4 finalrn = vec4(0.0);
535 
536 	vec4 px = step(lum_df(p12, p17), lum_df(p12, p13));
537 
538 	vec4 mac = final36 * max(ma30, ma60) + final30 * ma30 + final60 * ma60 + final45 * ma45 + finalrn * marn;
539 
540 	vec4 res1 = P12;
541 	res1 = mix(res1, mix(P13, P17, px.x), mac.x);
542 	res1 = mix(res1, mix(P7 , P13, px.y), mac.y);
543 	res1 = mix(res1, mix(P11, P7 , px.z), mac.z);
544 	res1 = mix(res1, mix(P17, P11, px.w), mac.w);
545 
546 	vec4 res2 = P12;
547 	res2 = mix(res2, mix(P17, P11, px.w), mac.w);
548 	res2 = mix(res2, mix(P11, P7 , px.z), mac.z);
549 	res2 = mix(res2, mix(P7 , P13, px.y), mac.y);
550 	res2 = mix(res2, mix(P13, P17, px.x), mac.x);
551 
552    vec4 texel = vec4(mix(res1, res2, step(c_df(P12, res1), c_df(P12, res2))));
553    opacity = texel.w;
554 
555    return texel;
556 }
557 
558 const float JINC2_WINDOW_SINC = 0.44;
559 const float JINC2_SINC = 0.82;
560 const float JINC2_AR_STRENGTH = 0.8;
561 
562 const   float halfpi            = 1.5707963267948966192313216916398;
563 const   float pi                = 3.1415926535897932384626433832795;
564 const   float wa                = 1.382300768;
565 const   float wb                = 2.576105976;
566 
567 // Calculates the distance between two points
d(vec2 pt1,vec2 pt2)568 float d(vec2 pt1, vec2 pt2)
569 {
570   vec2 v = pt2 - pt1;
571   return sqrt(dot(v,v));
572 }
573 
min4(vec4 a,vec4 b,vec4 c,vec4 d)574 vec4 min4(vec4 a, vec4 b, vec4 c, vec4 d)
575 {
576     return min(a, min(b, min(c, d)));
577 }
578 
max4(vec4 a,vec4 b,vec4 c,vec4 d)579 vec4 max4(vec4 a, vec4 b, vec4 c, vec4 d)
580 {
581     return max(a, max(b, max(c, d)));
582 }
583 
resampler(vec4 x)584 vec4 resampler(vec4 x)
585 {
586    vec4 res;
587 
588    // Need to use mix(.., equal(..)) since we want zero check to be component wise
589    res = mix(sin(x*wa)*sin(x*wb)/(x*x), vec4(wa*wb), equal(x,vec4(0.0, 0.0, 0.0, 0.0)));
590 
591    return res;
592 }
593 
sample_vram_jinc2(out float opacity)594 vec4 sample_vram_jinc2(out float opacity)
595 {
596     vec4 color;
597     vec4 weights[4];
598 
599     vec2 dx = vec2(1.0, 0.0);
600     vec2 dy = vec2(0.0, 1.0);
601 
602     vec2 pc = vUV.xy;
603 
604     vec2 tc = (floor(pc-vec2(0.5,0.5))+vec2(0.5,0.5));
605 
606     weights[0] = resampler(vec4(d(pc, tc    -dx    -dy), d(pc, tc           -dy), d(pc, tc    +dx    -dy), d(pc, tc+2.0*dx    -dy)));
607     weights[1] = resampler(vec4(d(pc, tc    -dx       ), d(pc, tc              ), d(pc, tc    +dx       ), d(pc, tc+2.0*dx       )));
608     weights[2] = resampler(vec4(d(pc, tc    -dx    +dy), d(pc, tc           +dy), d(pc, tc    +dx    +dy), d(pc, tc+2.0*dx    +dy)));
609     weights[3] = resampler(vec4(d(pc, tc    -dx+2.0*dy), d(pc, tc       +2.0*dy), d(pc, tc    +dx+2.0*dy), d(pc, tc+2.0*dx+2.0*dy)));
610 
611     dx = dx;
612     dy = dy;
613     tc = tc;
614 
615     vec4 c00 = sample_vram_atlas(clamp_coord(tc    -dx    -dy));
616     c00.w = 1. - float(is_transparent(c00));
617     vec4 c10 = sample_vram_atlas(clamp_coord(tc           -dy));
618     c10.w = 1. - float(is_transparent(c10));
619     vec4 c20 = sample_vram_atlas(clamp_coord(tc    +dx    -dy));
620     c20.w = 1. - float(is_transparent(c20));
621     vec4 c30 = sample_vram_atlas(clamp_coord(tc+2.0*dx    -dy));
622     c30.w = 1. - float(is_transparent(c30));
623     vec4 c01 = sample_vram_atlas(clamp_coord(tc    -dx       ));
624     c01.w = 1. - float(is_transparent(c01));
625     vec4 c11 = sample_vram_atlas(clamp_coord(tc              ));
626     c11.w = 1. - float(is_transparent(c11));
627     vec4 c21 = sample_vram_atlas(clamp_coord(tc    +dx       ));
628     c21.w = 1. - float(is_transparent(c21));
629     vec4 c31 = sample_vram_atlas(clamp_coord(tc+2.0*dx       ));
630     c31.w = 1. - float(is_transparent(c31));
631     vec4 c02 = sample_vram_atlas(clamp_coord(tc    -dx    +dy));
632     c02.w = 1. - float(is_transparent(c02));
633     vec4 c12 = sample_vram_atlas(clamp_coord(tc           +dy));
634     c12.w = 1. - float(is_transparent(c12));
635     vec4 c22 = sample_vram_atlas(clamp_coord(tc    +dx    +dy));
636     c22.w = 1. - float(is_transparent(c22));
637     vec4 c32 = sample_vram_atlas(clamp_coord(tc+2.0*dx    +dy));
638     c32.w = 1. - float(is_transparent(c32));
639     vec4 c03 = sample_vram_atlas(clamp_coord(tc    -dx+2.0*dy));
640     c03.w = 1. - float(is_transparent(c03));
641     vec4 c13 = sample_vram_atlas(clamp_coord(tc       +2.0*dy));
642     c13.w = 1. - float(is_transparent(c13));
643     vec4 c23 = sample_vram_atlas(clamp_coord(tc    +dx+2.0*dy));
644     c23.w = 1. - float(is_transparent(c23));
645     vec4 c33 = sample_vram_atlas(clamp_coord(tc+2.0*dx+2.0*dy));
646     c33.w = 1. - float(is_transparent(c33));
647 
648     color = sample_vram_atlas(vUV.xy);
649 
650     //  Get min/max samples
651     vec4 min_sample = min4(c11, c21, c12, c22);
652     vec4 max_sample = max4(c11, c21, c12, c22);
653 
654     color = vec4(dot(weights[0], vec4(c00.x, c10.x, c20.x, c30.x)), dot(weights[0], vec4(c00.y, c10.y, c20.y, c30.y)), dot(weights[0], vec4(c00.z, c10.z, c20.z, c30.z)), dot(weights[0], vec4(c00.w, c10.w, c20.w, c30.w)));
655     color+= vec4(dot(weights[1], vec4(c01.x, c11.x, c21.x, c31.x)), dot(weights[1], vec4(c01.y, c11.y, c21.y, c31.y)), dot(weights[1], vec4(c01.z, c11.z, c21.z, c31.z)), dot(weights[1], vec4(c01.w, c11.w, c21.w, c31.w)));
656     color+= vec4(dot(weights[2], vec4(c02.x, c12.x, c22.x, c32.x)), dot(weights[2], vec4(c02.y, c12.y, c22.y, c32.y)), dot(weights[2], vec4(c02.z, c12.z, c22.z, c32.z)), dot(weights[2], vec4(c02.w, c12.w, c22.w, c32.w)));
657     color+= vec4(dot(weights[3], vec4(c03.x, c13.x, c23.x, c33.x)), dot(weights[3], vec4(c03.y, c13.y, c23.y, c33.y)), dot(weights[3], vec4(c03.z, c13.z, c23.z, c33.z)), dot(weights[3], vec4(c03.w, c13.w, c23.w, c33.w)));
658     color = color/(dot(weights[0], vec4(1,1,1,1)) + dot(weights[1], vec4(1,1,1,1)) + dot(weights[2], vec4(1,1,1,1)) + dot(weights[3], vec4(1,1,1,1)));
659 
660     // Anti-ringing
661     vec4 aux = color;
662     color = clamp(color, min_sample, max_sample);
663     color = mix(aux, color, JINC2_AR_STRENGTH);
664 
665     // final sum and weight normalization
666     vec4 texel = vec4(color);
667     opacity = texel.w;
668     texel.rgb = texel.rgb * (1./opacity);
669     return texel;
670 }
671 
sample_vram_3point(out float opacity)672 vec4 sample_vram_3point(out float opacity)
673 {
674   float x = vUV.x;
675   float y = vUV.y;
676 
677   // interpolate from centre of texel
678   vec2 uv_frac = fract(vec2(x, y)) - vec2(0.5, 0.5);
679   vec2 uv_offs = sign(uv_frac);
680   uv_frac = abs(uv_frac);
681 
682   vec4 texel_00;
683 
684   if (uv_frac.x + uv_frac.y < 1.0) {
685     // Use bottom-left
686     texel_00 = sample_vram_atlas(vUV.xy);
687   } else {
688     // Use top-right
689     texel_00 = sample_vram_atlas(clamp_coord(vec2(x + uv_offs.x, y + uv_offs.y)));
690 
691     float tmp = 1. - uv_frac.y;
692     uv_frac.y = 1. - uv_frac.x;
693     uv_frac.x = tmp;
694   }
695 
696    vec4 texel_10 = sample_vram_atlas(clamp_coord(vec2(x + uv_offs.x, y)));
697    vec4 texel_01 = sample_vram_atlas(clamp_coord(vec2(x, y + uv_offs.y)));
698 
699    texel_00.w = 1. - float(is_transparent(texel_00));
700    texel_10.w = 1. - float(is_transparent(texel_10));
701    texel_01.w = 1. - float(is_transparent(texel_01));
702 
703    vec4 texel = texel_00
704      + uv_frac.x * (texel_10 - texel_00)
705      + uv_frac.y * (texel_01 - texel_00);
706 
707 	opacity = texel.w;
708    // adjust colour to account for black transparent samples (assume rgb would be average of other pixels)
709    texel.rgb = texel.rgb * (1./opacity);
710 
711    return texel;
712 }
713 #endif
714 
715 #endif
716