1#version 450
2
3/* Ported by Hyllian and hunterk - 2015 / 2017 */
4
5// Copyright (c) 2015-2017, bacondither
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions
10// are met:
11// 1. Redistributions of source code must retain the above copyright
12//    notice, this list of conditions and the following disclaimer
13//    in this position and unchanged.
14// 2. Redistributions in binary form must reproduce the above copyright
15//    notice, this list of conditions and the following disclaimer in the
16//    documentation and/or other materials provided with the distribution.
17//
18// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
19// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// Second pass, MUST BE PLACED IMMEDIATELY AFTER THE FIRST PASS IN THE CHAIN
30
31// Adaptive sharpen - version 2017-04-11 - (requires ps >= 3.0)
32// Tuned for use post-resize, EXPECTS FULL RANGE GAMMA LIGHT
33
34layout(push_constant) uniform Push
35{
36	vec4 SourceSize;
37	vec4 OriginalSize;
38	vec4 OutputSize;
39	uint FrameCount;
40	float CURVE_HEIGHT;
41	float VIDEO_LEVEL_OUT;
42} params;
43
44#pragma parameter CURVE_HEIGHT "AS Curve Height" 1.0 0.3 2.0 0.1
45#pragma parameter VIDEO_LEVEL_OUT "AS Video Lvl Out" 0.0 0.0 1.0 1.0
46
47#define mul(a,b) (b*a)
48#define saturate(c) clamp(c, 0.0, 1.0)
49
50layout(std140, set = 0, binding = 0) uniform UBO
51{
52	mat4 MVP;
53} global;
54
55//--------------------------------------- Settings ------------------------------------------------
56
57#define curve_height    params.CURVE_HEIGHT  // Main sharpening strength, POSITIVE VALUES ONLY!
58                                             // 0.3 <-> 2.0 is a reasonable range of values
59
60#define video_level_out params.VIDEO_LEVEL_OUT  // True to preserve BTB & WTW (minor summation error)
61												// Normally it should be set to false
62
63//-------------------------------------------------------------------------------------------------
64// Defined values under this row are "optimal" DO NOT CHANGE IF YOU DO NOT KNOW WHAT YOU ARE DOING!
65
66#define curveslope      0.4                  // Sharpening curve slope, high edge values
67
68#define L_overshoot     0.003                // Max light overshoot before compression [>0.001]
69#define L_compr_low     0.169                // Light compression, default (0.169=~9x)
70#define L_compr_high    0.337                // Light compression, surrounded by edges (0.337=~4x)
71
72#define D_overshoot     0.009                // Max dark overshoot before compression [>0.001]
73#define D_compr_low     0.253                // Dark compression, default (0.253=~6x)
74#define D_compr_high    0.504                // Dark compression, surrounded by edges (0.504=~2.5x)
75
76#define scale_lim       0.1                  // Abs max change before compression [>0.01]
77#define scale_cs        0.056                // Compression slope above scale_lim
78
79#define dW_lothr        0.3                  // Start interpolating between W1 and W2
80#define dW_hithr        0.8                  // When dW is equal to W2
81
82#define lowthr_mxw      0.11                 // Edge value for max lowthr weight [>0.01]
83
84#define pm_p            0.75                 // Power mean p-value [>0-1.0]
85
86#define alpha_out       1.0                  // MPDN requires the alpha channel output to be 1.0
87
88//-------------------------------------------------------------------------------------------------
89#define w_offset        1.0                  // Edge channel offset, must be the same in all passes
90#define bounds_check    true                 // If edge data is outside bounds, make pixels green
91//-------------------------------------------------------------------------------------------------
92
93// Soft if, fast approx
94#define soft_if(a,b,c) ( saturate((a + b + c - 3*w_offset + 0.06)/(abs(maxedge) + 0.03) - 0.85) )
95
96// Soft limit, modified tanh
97#define soft_lim(v,s)  ( ((exp(2.*min(abs(v), s*24.)/s) - 1.)/(exp(2*min(abs(v), s*24.)/s) + 1.))*s )
98
99// Weighted power mean
100#define wpmean(a,b,w)  ( pow((w*pow(abs(a), pm_p) + abs(1-w)*pow(abs(b), pm_p)), (1.0/pm_p)) )
101
102// Get destination pixel values
103#define get(x,y)       ( texture(Source, coord + vec2(x*(px), y*(py))) )
104#define sat(inp)       ( vec4(saturate((inp).xyz), (inp).w) )
105
106// Maximum of four values
107#define max4(a,b,c,d)  ( max(max(a, b), max(c, d)) )
108
109// Colour to luma, fast approx gamma, avg of rec. 709 & 601 luma coeffs
110#define CtL(RGB)       ( sqrt(dot(vec3(0.2558, 0.6511, 0.0931), saturate((RGB)*abs(RGB)).rgb)) )
111
112// Center pixel diff
113#define mdiff(a,b,c,d,e,f,g) ( abs(luma[g]-luma[a]) + abs(luma[g]-luma[b])			 \
114                             + abs(luma[g]-luma[c]) + abs(luma[g]-luma[d])			 \
115                             + 0.5*(abs(luma[g]-luma[e]) + abs(luma[g]-luma[f])) )
116
117#pragma stage vertex
118layout(location = 0) in vec4 Position;
119layout(location = 1) in vec2 TexCoord;
120layout(location = 0) out vec2 vTexCoord;
121
122void main()
123{
124   gl_Position = global.MVP * Position;
125   vTexCoord = TexCoord;
126}
127
128#pragma stage fragment
129layout(location = 0) in vec2 vTexCoord;
130layout(location = 0) out vec4 FragColor;
131layout(set = 0, binding = 2) uniform sampler2D Source;
132
133vec4 frag_op(vec4 orig, vec2 coord, float c_edge, float px, float py)
134{
135	if (bounds_check == true)
136	{
137		if (c_edge > 24. || c_edge < -0.5) { return vec4( 0., 1.0, 0., alpha_out ); }
138	}
139
140	// Get points, clip out of range colour data in c[0]
141	// [                c22               ]
142	// [           c24, c9,  c23          ]
143	// [      c21, c1,  c2,  c3, c18      ]
144	// [ c19, c10, c4,  c0,  c5, c11, c16 ]
145	// [      c20, c6,  c7,  c8, c17      ]
146	// [           c15, c12, c14          ]
147	// [                c13               ]
148	vec4 c[25] = { sat( orig), get(-1,-1), get( 0,-1), get( 1,-1), get(-1, 0),
149	                 get( 1, 0), get(-1, 1), get( 0, 1), get( 1, 1), get( 0,-2),
150	                 get(-2, 0), get( 2, 0), get( 0, 2), get( 0, 3), get( 1, 2),
151	                 get(-1, 2), get( 3, 0), get( 2, 1), get( 2,-1), get(-3, 0),
152	                 get(-2, 1), get(-2,-1), get( 0,-3), get( 1,-2), get(-1,-2) };
153
154	// Allow for higher overshoot if the current edge pixel is surrounded by similar edge pixels
155	float maxedge = max4( max4(c[1].w,c[2].w,c[3].w,c[4].w), max4(c[5].w,c[6].w,c[7].w,c[8].w),
156	                      max4(c[9].w,c[10].w,c[11].w,c[12].w), c[0].w ) - w_offset;
157
158	// [          x          ]
159	// [       z, x, w       ]
160	// [    z, z, x, w, w    ]
161	// [ y, y, y, 0, y, y, y ]
162	// [    w, w, x, z, z    ]
163	// [       w, x, z       ]
164	// [          x          ]
165	float sbe = soft_if(c[2].w,c[9].w,c[22].w) *soft_if(c[7].w,c[12].w,c[13].w)  // x dir
166	          + soft_if(c[4].w,c[10].w,c[19].w)*soft_if(c[5].w,c[11].w,c[16].w)  // y dir
167	          + soft_if(c[1].w,c[24].w,c[21].w)*soft_if(c[8].w,c[14].w,c[17].w)  // z dir
168	          + soft_if(c[3].w,c[23].w,c[18].w)*soft_if(c[6].w,c[20].w,c[15].w); // w dir
169
170	vec2 cs = mix( vec2(L_compr_low,  D_compr_low),
171	                  vec2(L_compr_high, D_compr_high), smoothstep(2, 3.1, sbe) );
172
173	// RGB to luma
174	float c0_Y = CtL(c[0]);
175
176	float luma[25] = { c0_Y, CtL(c[1]), CtL(c[2]), CtL(c[3]), CtL(c[4]), CtL(c[5]), CtL(c[6]),
177	                   CtL(c[7]),  CtL(c[8]),  CtL(c[9]),  CtL(c[10]), CtL(c[11]), CtL(c[12]),
178	                   CtL(c[13]), CtL(c[14]), CtL(c[15]), CtL(c[16]), CtL(c[17]), CtL(c[18]),
179	                   CtL(c[19]), CtL(c[20]), CtL(c[21]), CtL(c[22]), CtL(c[23]), CtL(c[24]) };
180
181	// Pre-calculated default squared kernel weights
182	const vec3 W1 = vec3(0.5,           1.0, 1.41421356237); // 0.25, 1.0, 2.0
183	const vec3 W2 = vec3(0.86602540378, 1.0, 0.5477225575);  // 0.75, 1.0, 0.3
184
185	// Transition to a concave kernel if the center edge val is above thr
186	vec3 dW = pow(mix( W1, W2, smoothstep(dW_lothr, dW_hithr, c_edge) ), vec3(2.0));
187
188	float mdiff_c0 = 0.02 + 3*( abs(luma[0]-luma[2]) + abs(luma[0]-luma[4])
189	                          + abs(luma[0]-luma[5]) + abs(luma[0]-luma[7])
190	                          + 0.25*(abs(luma[0]-luma[1]) + abs(luma[0]-luma[3])
191	                                 +abs(luma[0]-luma[6]) + abs(luma[0]-luma[8])) );
192
193	// Use lower weights for pixels in a more active area relative to center pixel area
194	// This results in narrower and less visible overshoots around sharp edges
195	float weights[12] = { ( min(mdiff_c0/mdiff(24, 21, 2,  4,  9,  10, 1),  dW.y) ),   // c1
196	                      ( dW.x ),                                                    // c2
197	                      ( min(mdiff_c0/mdiff(23, 18, 5,  2,  9,  11, 3),  dW.y) ),   // c3
198	                      ( dW.x ),                                                    // c4
199	                      ( dW.x ),                                                    // c5
200	                      ( min(mdiff_c0/mdiff(4,  20, 15, 7,  10, 12, 6),  dW.y) ),   // c6
201	                      ( dW.x ),                                                    // c7
202	                      ( min(mdiff_c0/mdiff(5,  7,  17, 14, 12, 11, 8),  dW.y) ),   // c8
203	                      ( min(mdiff_c0/mdiff(2,  24, 23, 22, 1,  3,  9),  dW.z) ),   // c9
204	                      ( min(mdiff_c0/mdiff(20, 19, 21, 4,  1,  6,  10), dW.z) ),   // c10
205	                      ( min(mdiff_c0/mdiff(17, 5,  18, 16, 3,  8,  11), dW.z) ),   // c11
206	                      ( min(mdiff_c0/mdiff(13, 15, 7,  14, 6,  8,  12), dW.z) ) }; // c12
207
208	weights[0] = (max(max((weights[8]  + weights[9])/4,  weights[0]), 0.25) + weights[0])/2;
209	weights[2] = (max(max((weights[8]  + weights[10])/4, weights[2]), 0.25) + weights[2])/2;
210	weights[5] = (max(max((weights[9]  + weights[11])/4, weights[5]), 0.25) + weights[5])/2;
211	weights[7] = (max(max((weights[10] + weights[11])/4, weights[7]), 0.25) + weights[7])/2;
212
213	// Calculate the negative part of the laplace kernel and the low threshold weight
214	float lowthrsum   = 0.;
215	float weightsum   = 0.;
216	float neg_laplace = 0.;
217
218	//[unroll]
219	for (int pix = 0; pix < 12; ++pix)
220	{
221		float x      = saturate((c[pix + 1].w - w_offset - 0.01)/(lowthr_mxw - 0.01));
222		float lowthr = x*x*(2.97 - 1.98*x) + 0.01; // x*x((3.0-c*3.) - (2.0-c*2.)*x) + c
223
224		neg_laplace += pow(luma[pix + 1] + 0.06, 2.4)*(weights[pix]*lowthr);
225		weightsum   += weights[pix]*lowthr;
226		lowthrsum   += lowthr/12.;
227	}
228
229	neg_laplace = pow(abs(neg_laplace/weightsum), (1.0/2.4)) - 0.06;
230
231	// Compute sharpening magnitude function
232	float sharpen_val = curve_height/(curve_height*curveslope*pow(abs(c_edge), 3.5) + 0.5);
233
234	// Calculate sharpening diff and scale
235	float sharpdiff = (c0_Y - neg_laplace)*(lowthrsum*sharpen_val*0.8 + 0.01);
236
237	// Calculate local near min & max, partial sort
238	//[unroll]
239	for (int i = 0; i < 3; ++i)
240	{
241		float temp;
242
243		for (int i1 = i; i1 < 24-i; i1 += 2)
244		{
245			temp = luma[i1];
246			luma[i1]   = min(luma[i1], luma[i1+1]);
247			luma[i1+1] = max(temp, luma[i1+1]);
248		}
249
250		for (int i2 = 24-i; i2 > i; i2 -= 2)
251		{
252			temp = luma[i];
253			luma[i]    = min(luma[i], luma[i2]);
254			luma[i2]   = max(temp, luma[i2]);
255
256			temp = luma[24-i];
257			luma[24-i] = max(luma[24-i], luma[i2-1]);
258			luma[i2-1] = min(temp, luma[i2-1]);
259		}
260	}
261
262	float nmax = (max(luma[22] + luma[23]*2., c0_Y*3.) + luma[24])/4.;
263	float nmin = (min(luma[2]  + luma[1]*2.,  c0_Y*3.) + luma[0])/4.;
264
265	// Calculate tanh scale factor, pos/neg
266	float nmax_scale = nmax - c0_Y + min(L_overshoot, 1.0001 - nmax);
267	float nmin_scale = c0_Y - nmin + min(D_overshoot, 0.0001 + nmin);
268
269	nmax_scale = min(nmax_scale, scale_lim*(1. - scale_cs) + nmax_scale*scale_cs);
270	nmin_scale = min(nmin_scale, scale_lim*(1. - scale_cs) + nmin_scale*scale_cs);
271
272	// Soft limited anti-ringing with tanh, wpmean to control compression slope
273	sharpdiff = wpmean( max(sharpdiff, 0.), soft_lim( max(sharpdiff, 0.), nmax_scale ), cs.x )
274	          - wpmean( min(sharpdiff, 0.), soft_lim( min(sharpdiff, 0.), nmin_scale ), cs.y );
275
276	// Compensate for saturation loss/gain while making pixels brighter/darker
277	float sharpdiff_lim = saturate(c0_Y + sharpdiff) - c0_Y;
278	float satmul = (c0_Y + sharpdiff_lim + 0.03)/(c0_Y + 0.03);
279	vec3 res = c0_Y + (sharpdiff_lim*3 + sharpdiff)/4 + (c[0].rgb - c0_Y)*satmul;
280
281	return vec4( (video_level_out == 1.0 ? orig.rgb + (res - c[0].rgb) : res), alpha_out );
282}
283
284void main()
285{
286	vec2 tex = vTexCoord;
287
288	float px = 1.0 / params.SourceSize.x;
289	float py = 1.0 / params.SourceSize.y;
290
291	vec4 orig  = texture(Source, tex);
292	float c_edge = orig.w - w_offset;
293
294	FragColor = vec4(frag_op(orig, tex, c_edge, px, py));
295}