1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#define REFERENCE_WHITE 100.0f 20extern float3 lrgb2yuv(float3); 21extern float lrgb2y(float3); 22extern float3 yuv2lrgb(float3); 23extern float3 lrgb2lrgb(float3); 24extern float get_luma_src(float3); 25extern float get_luma_dst(float3); 26extern float3 ootf(float3 c, float peak); 27extern float3 inverse_ootf(float3 c, float peak); 28extern float3 get_chroma_sample(float3, float3, float3, float3); 29 30struct detection_result { 31 float peak; 32 float average; 33}; 34 35float hable_f(float in) { 36 float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; 37 return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; 38} 39 40float direct(float s, float peak) { 41 return s; 42} 43 44float linear(float s, float peak) { 45 return s * tone_param / peak; 46} 47 48float gamma(float s, float peak) { 49 float p = s > 0.05f ? s /peak : 0.05f / peak; 50 float v = powr(p, 1.0f / tone_param); 51 return s > 0.05f ? v : (s * v /0.05f); 52} 53 54float clip(float s, float peak) { 55 return clamp(s * tone_param, 0.0f, 1.0f); 56} 57 58float reinhard(float s, float peak) { 59 return s / (s + tone_param) * (peak + tone_param) / peak; 60} 61 62float hable(float s, float peak) { 63 return hable_f(s)/hable_f(peak); 64} 65 66float mobius(float s, float peak) { 67 float j = tone_param; 68 float a, b; 69 70 if (s <= j) 71 return s; 72 73 a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); 74 b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); 75 76 return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); 77} 78 79// detect peak/average signal of a frame, the algorithm was ported from: 80// libplacebo (https://github.com/haasn/libplacebo) 81struct detection_result 82detect_peak_avg(global uint *util_buf, __local uint *sum_wg, 83 float signal, float peak) { 84// layout of the util buffer 85// 86// Name: : Size (units of 4-bytes) 87// average buffer : detection_frames + 1 88// peak buffer : detection_frames + 1 89// workgroup counter : 1 90// total of peak : 1 91// total of average : 1 92// frame index : 1 93// frame number : 1 94 global uint *avg_buf = util_buf; 95 global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; 96 global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; 97 global uint *max_total_p = counter_wg_p + 1; 98 global uint *avg_total_p = max_total_p + 1; 99 global uint *frame_idx_p = avg_total_p + 1; 100 global uint *scene_frame_num_p = frame_idx_p + 1; 101 102 uint frame_idx = *frame_idx_p; 103 uint scene_frame_num = *scene_frame_num_p; 104 105 size_t lidx = get_local_id(0); 106 size_t lidy = get_local_id(1); 107 size_t lsizex = get_local_size(0); 108 size_t lsizey = get_local_size(1); 109 uint num_wg = get_num_groups(0) * get_num_groups(1); 110 size_t group_idx = get_group_id(0); 111 size_t group_idy = get_group_id(1); 112 struct detection_result r = {peak, sdr_avg}; 113 if (lidx == 0 && lidy == 0) 114 *sum_wg = 0; 115 barrier(CLK_LOCAL_MEM_FENCE); 116 117 // update workgroup sum 118 atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); 119 barrier(CLK_LOCAL_MEM_FENCE); 120 121 // update frame peak/avg using work-group-average. 122 if (lidx == 0 && lidy == 0) { 123 uint avg_wg = *sum_wg / (lsizex * lsizey); 124 atomic_max(&peak_buf[frame_idx], avg_wg); 125 atomic_add(&avg_buf[frame_idx], avg_wg); 126 } 127 128 if (scene_frame_num > 0) { 129 float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); 130 float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); 131 r.peak = max(1.0f, peak); 132 r.average = max(0.25f, avg); 133 } 134 135 if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { 136 *counter_wg_p = 0; 137 avg_buf[frame_idx] /= num_wg; 138 139 if (scene_threshold > 0.0f) { 140 uint cur_max = peak_buf[frame_idx]; 141 uint cur_avg = avg_buf[frame_idx]; 142 int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; 143 144 if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { 145 for (uint i = 0; i < DETECTION_FRAMES + 1; i++) 146 avg_buf[i] = 0; 147 for (uint i = 0; i < DETECTION_FRAMES + 1; i++) 148 peak_buf[i] = 0; 149 *avg_total_p = *max_total_p = 0; 150 *scene_frame_num_p = 0; 151 avg_buf[frame_idx] = cur_avg; 152 peak_buf[frame_idx] = cur_max; 153 } 154 } 155 uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); 156 // add current frame, subtract next frame 157 *max_total_p += peak_buf[frame_idx] - peak_buf[next]; 158 *avg_total_p += avg_buf[frame_idx] - avg_buf[next]; 159 // reset next frame 160 peak_buf[next] = avg_buf[next] = 0; 161 *frame_idx_p = next; 162 *scene_frame_num_p = min(*scene_frame_num_p + 1, 163 (uint)DETECTION_FRAMES); 164 } 165 return r; 166} 167 168float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { 169 float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); 170 171 // Rescale the variables in order to bring it into a representation where 172 // 1.0 represents the dst_peak. This is because all of the tone mapping 173 // algorithms are defined in such a way that they map to the range [0.0, 1.0]. 174 if (target_peak > 1.0f) { 175 sig *= 1.0f / target_peak; 176 peak *= 1.0f / target_peak; 177 } 178 179 float sig_old = sig; 180 181 // Scale the signal to compensate for differences in the average brightness 182 float slope = min(1.0f, sdr_avg / average); 183 sig *= slope; 184 peak *= slope; 185 186 // Desaturate the color using a coefficient dependent on the signal level 187 if (desat_param > 0.0f) { 188 float luma = get_luma_dst(rgb); 189 float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); 190 coeff = native_powr(coeff, 10.0f / desat_param); 191 rgb = mix(rgb, (float3)luma, (float3)coeff); 192 sig = mix(sig, luma * slope, coeff); 193 } 194 195 sig = TONE_FUNC(sig, peak); 196 197 sig = min(sig, 1.0f); 198 rgb *= (sig/sig_old); 199 return rgb; 200} 201// map from source space YUV to destination space RGB 202float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { 203 float3 c = yuv2lrgb(yuv); 204 c = ootf(c, peak); 205 c = lrgb2lrgb(c); 206 return c; 207} 208 209__kernel void tonemap(__write_only image2d_t dst1, 210 __read_only image2d_t src1, 211 __write_only image2d_t dst2, 212 __read_only image2d_t src2, 213 global uint *util_buf, 214 float peak 215 ) 216{ 217 __local uint sum_wg; 218 const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | 219 CLK_ADDRESS_CLAMP_TO_EDGE | 220 CLK_FILTER_NEAREST); 221 int xi = get_global_id(0); 222 int yi = get_global_id(1); 223 // each work item process four pixels 224 int x = 2 * xi; 225 int y = 2 * yi; 226 227 float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; 228 float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; 229 float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; 230 float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; 231 float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; 232 233 float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak); 234 float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak); 235 float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak); 236 float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak); 237 238 float sig0 = max(c0.x, max(c0.y, c0.z)); 239 float sig1 = max(c1.x, max(c1.y, c1.z)); 240 float sig2 = max(c2.x, max(c2.y, c2.z)); 241 float sig3 = max(c3.x, max(c3.y, c3.z)); 242 float sig = max(sig0, max(sig1, max(sig2, sig3))); 243 244 struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); 245 246 float3 c0_old = c0, c1_old = c1, c2_old = c2; 247 c0 = map_one_pixel_rgb(c0, r.peak, r.average); 248 c1 = map_one_pixel_rgb(c1, r.peak, r.average); 249 c2 = map_one_pixel_rgb(c2, r.peak, r.average); 250 c3 = map_one_pixel_rgb(c3, r.peak, r.average); 251 252 c0 = inverse_ootf(c0, target_peak); 253 c1 = inverse_ootf(c1, target_peak); 254 c2 = inverse_ootf(c2, target_peak); 255 c3 = inverse_ootf(c3, target_peak); 256 257 y0 = lrgb2y(c0); 258 y1 = lrgb2y(c1); 259 y2 = lrgb2y(c2); 260 y3 = lrgb2y(c3); 261 float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); 262 float3 chroma = lrgb2yuv(chroma_c); 263 264 if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { 265 write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); 266 write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); 267 write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f)); 268 write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f)); 269 write_imagef(dst2, (int2)(xi, yi), 270 (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); 271 } 272} 273