1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <math.h>
19 #include "shaders.h"
20
pl_shader_decode_color(pl_shader sh,struct pl_color_repr * repr,const struct pl_color_adjustment * params)21 void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr,
22 const struct pl_color_adjustment *params)
23 {
24 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
25 return;
26
27 sh_describe(sh, "color decoding");
28 GLSL("// pl_shader_decode_color \n"
29 "{ \n");
30
31 // For the non-linear color systems we need some special input handling
32 // to make sure we don't accidentally screw everything up because of the
33 // alpha multiplication, which only commutes with linear operations.
34 bool is_nonlinear = !pl_color_system_is_linear(repr->sys);
35 if (is_nonlinear && repr->alpha == PL_ALPHA_PREMULTIPLIED) {
36 GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n");
37 repr->alpha = PL_ALPHA_INDEPENDENT;
38 }
39
40 // XYZ needs special handling due to the input gamma logic
41 if (repr->sys == PL_COLOR_SYSTEM_XYZ) {
42 ident_t scale = SH_FLOAT(pl_color_repr_normalize(repr));
43 GLSL("color.rgb = pow(vec3(%s) * color.rgb, vec3(2.6));\n", scale);
44 }
45
46 enum pl_color_system orig_sys = repr->sys;
47 struct pl_transform3x3 tr = pl_color_repr_decode(repr, params);
48
49 if (memcmp(&tr, &pl_transform3x3_identity, sizeof(tr))) {
50 ident_t cmat = sh_var(sh, (struct pl_shader_var) {
51 .var = pl_var_mat3("cmat"),
52 .data = PL_TRANSPOSE_3X3(tr.mat.m),
53 });
54
55 ident_t cmat_c = sh_var(sh, (struct pl_shader_var) {
56 .var = pl_var_vec3("cmat_c"),
57 .data = tr.c,
58 });
59
60 GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c);
61 }
62
63 switch (orig_sys) {
64 case PL_COLOR_SYSTEM_BT_2020_C:
65 // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
66 // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0
67 // = (B'-Y'c) / 1.5816 | C'bc > 0
68 //
69 // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0
70 // = (R'-Y'c) / 0.9936 | C'rc > 0
71 //
72 // as per the BT.2020 specification, table 4. This is a non-linear
73 // transformation because (constant) luminance receives non-equal
74 // contributions from the three different channels.
75 GLSL("// constant luminance conversion \n"
76 "color.br = color.br * mix(vec2(1.5816, 0.9936), \n"
77 " vec2(1.9404, 1.7184), \n"
78 " %s(lessThanEqual(color.br, vec2(0.0)))) \n"
79 " + color.gg; \n",
80 sh_bvec(sh, 2));
81 // Expand channels to camera-linear light. This shader currently just
82 // assumes everything uses the BT.2020 12-bit gamma function, since the
83 // difference between 10 and 12-bit is negligible for anything other
84 // than 12-bit content.
85 GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n"
86 " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
87 " vec3(1.0/0.45)), \n"
88 " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n",
89 sh_bvec(sh, 3));
90 // Calculate the green channel from the expanded RYcB, and recompress to G'
91 // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
92 GLSL("color.g = (lin.g - 0.2627*lin.r - 0.0593*lin.b)*1.0/0.6780; \n"
93 "color.g = mix(color.g * 4.5, \n"
94 " 1.0993 * pow(color.g, 0.45) - 0.0993, \n"
95 " %s(0.0181 <= color.g)); \n",
96 sh_bvec(sh, 1));
97 break;
98
99 case PL_COLOR_SYSTEM_BT_2100_PQ:
100 case PL_COLOR_SYSTEM_BT_2100_HLG: {
101 // Conversion process from the spec:
102 //
103 // 1. L'M'S' = cmat * ICtCp
104 // 2. LMS = linearize(L'M'S') (EOTF for PQ, inverse OETF for HLG)
105 // 3. RGB = lms2rgb * LMS
106 //
107 // After this we need to invert step 2 to arrive at non-linear RGB.
108 // (It's important we keep the transfer function conversion separate
109 // from the color system decoding, so we have to partially undo our
110 // work here even though we will end up linearizing later on anyway)
111 struct pl_color_space csp = {
112 .transfer = repr->sys == PL_COLOR_SYSTEM_BT_2100_PQ
113 ? PL_COLOR_TRC_PQ
114 : PL_COLOR_TRC_HLG,
115 };
116
117 // Inverted from the matrix in the spec, transposed to column major
118 static const char *bt2100_lms2rgb = "mat3("
119 " 3.43661, -0.79133, -0.0259499, "
120 " -2.50645, 1.9836, -0.0989137, "
121 "0.0698454, -0.192271, 1.12486) ";
122
123 pl_shader_linearize(sh, csp);
124 GLSL("color.rgb = %s * color.rgb; \n", bt2100_lms2rgb);
125 pl_shader_delinearize(sh, csp);
126 break;
127 }
128
129 case PL_COLOR_SYSTEM_UNKNOWN:
130 case PL_COLOR_SYSTEM_RGB:
131 case PL_COLOR_SYSTEM_XYZ:
132 case PL_COLOR_SYSTEM_BT_601:
133 case PL_COLOR_SYSTEM_BT_709:
134 case PL_COLOR_SYSTEM_SMPTE_240M:
135 case PL_COLOR_SYSTEM_BT_2020_NC:
136 case PL_COLOR_SYSTEM_YCGCO:
137 break; // no special post-processing needed
138
139 case PL_COLOR_SYSTEM_COUNT:
140 pl_unreachable();
141 }
142
143 if (repr->alpha == PL_ALPHA_INDEPENDENT) {
144 GLSL("color.rgb *= vec3(color.a);\n");
145 repr->alpha = PL_ALPHA_PREMULTIPLIED;
146 }
147
148 // Gamma adjustment. Doing this here (in non-linear light) is technically
149 // somewhat wrong, but this is just an aesthetic parameter and not really
150 // meant for colorimetric precision, so we don't care too much.
151 if (params && params->gamma != 1.0) {
152 ident_t gamma = sh_var(sh, (struct pl_shader_var) {
153 .var = pl_var_float("gamma"),
154 .data = ¶ms->gamma,
155 });
156 GLSL("color.rgb = pow(color.rgb, vec3(%s)); \n", gamma);
157 }
158
159 GLSL("}\n");
160 }
161
pl_shader_encode_color(pl_shader sh,const struct pl_color_repr * repr)162 void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr)
163 {
164 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
165 return;
166
167 sh_describe(sh, "color encoding");
168 GLSL("// pl_shader_encode_color \n"
169 "{ \n");
170
171 switch (repr->sys) {
172 case PL_COLOR_SYSTEM_BT_2020_C:
173 // Expand R'G'B' to RGB
174 GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n"
175 " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
176 " vec3(1.0/0.45)), \n"
177 " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n",
178 sh_bvec(sh, 3));
179
180 // Compute Yc from RGB and compress to R'Y'cB'
181 GLSL("color.g = dot(vec3(0.2627, 0.6780, 0.0593), lin); \n"
182 "color.g = mix(color.g * 4.5, \n"
183 " 1.0993 * pow(color.g, 0.45) - 0.0993, \n"
184 " %s(0.0181 <= color.g)); \n",
185 sh_bvec(sh, 1));
186
187 // Compute C'bc and C'rc into color.br
188 GLSL("color.br = color.br - color.gg; \n"
189 "color.br *= mix(vec2(1.0/1.5816, 1.0/0.9936), \n"
190 " vec2(1.0/1.9404, 1.0/1.7184), \n"
191 " %s(lessThanEqual(color.br, vec2(0.0)))); \n",
192 sh_bvec(sh, 2));
193 break;
194
195 case PL_COLOR_SYSTEM_BT_2100_PQ:
196 case PL_COLOR_SYSTEM_BT_2100_HLG: {
197 struct pl_color_space csp = {
198 .transfer = repr->sys == PL_COLOR_SYSTEM_BT_2100_PQ
199 ? PL_COLOR_TRC_PQ
200 : PL_COLOR_TRC_HLG,
201 };
202
203 // Inverse of the matrix above
204 static const char *bt2100_rgb2lms = "mat3("
205 "0.412109, 0.166748, 0.024170, "
206 "0.523925, 0.720459, 0.075440, "
207 "0.063965, 0.112793, 0.900394) ";
208
209 pl_shader_linearize(sh, csp);
210 GLSL("color.rgb = %s * color.rgb; \n", bt2100_rgb2lms);
211 pl_shader_delinearize(sh, csp);
212 break;
213 }
214
215 case PL_COLOR_SYSTEM_UNKNOWN:
216 case PL_COLOR_SYSTEM_RGB:
217 case PL_COLOR_SYSTEM_XYZ:
218 case PL_COLOR_SYSTEM_BT_601:
219 case PL_COLOR_SYSTEM_BT_709:
220 case PL_COLOR_SYSTEM_SMPTE_240M:
221 case PL_COLOR_SYSTEM_BT_2020_NC:
222 case PL_COLOR_SYSTEM_YCGCO:
223 break; // no special pre-processing needed
224
225 case PL_COLOR_SYSTEM_COUNT:
226 pl_unreachable();
227 }
228
229 // Since this is a relatively rare operation, bypass it as much as possible
230 bool skip = true;
231 skip &= PL_DEF(repr->sys, PL_COLOR_SYSTEM_RGB) == PL_COLOR_SYSTEM_RGB;
232 skip &= PL_DEF(repr->levels, PL_COLOR_LEVELS_FULL) == PL_COLOR_LEVELS_FULL;
233 skip &= !repr->bits.sample_depth || !repr->bits.color_depth ||
234 repr->bits.sample_depth == repr->bits.color_depth;
235 skip &= !repr->bits.bit_shift;
236
237 if (!skip) {
238 struct pl_color_repr copy = *repr;
239 ident_t xyzscale = NULL;
240 if (repr->sys == PL_COLOR_SYSTEM_XYZ)
241 xyzscale = SH_FLOAT(1.0 / pl_color_repr_normalize(©));
242
243 struct pl_transform3x3 tr = pl_color_repr_decode(©, NULL);
244 pl_transform3x3_invert(&tr);
245
246 ident_t cmat = sh_var(sh, (struct pl_shader_var) {
247 .var = pl_var_mat3("cmat"),
248 .data = PL_TRANSPOSE_3X3(tr.mat.m),
249 });
250
251 ident_t cmat_c = sh_var(sh, (struct pl_shader_var) {
252 .var = pl_var_vec3("cmat_c"),
253 .data = tr.c,
254 });
255
256 GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c);
257
258 if (xyzscale)
259 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6)) * vec3(%s); \n", xyzscale);
260 }
261
262 if (repr->alpha == PL_ALPHA_INDEPENDENT)
263 GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n");
264
265 GLSL("}\n");
266 }
267
268 // Common constants for SMPTE ST.2084 (PQ)
269 static const float PQ_M1 = 2610./4096 * 1./4,
270 PQ_M2 = 2523./4096 * 128,
271 PQ_C1 = 3424./4096,
272 PQ_C2 = 2413./4096 * 32,
273 PQ_C3 = 2392./4096 * 32;
274
275 // Common constants for ARIB STD-B67 (HLG)
276 static const float HLG_A = 0.17883277,
277 HLG_B = 0.28466892,
278 HLG_C = 0.55991073;
279
280 // Common constants for Panasonic V-Log
281 static const float VLOG_B = 0.00873,
282 VLOG_C = 0.241514,
283 VLOG_D = 0.598206;
284
285 // Common constants for Sony S-Log
286 static const float SLOG_A = 0.432699,
287 SLOG_B = 0.037584,
288 SLOG_C = 0.616596 + 0.03,
289 SLOG_P = 3.538813,
290 SLOG_Q = 0.030001,
291 SLOG_K2 = 155.0 / 219.0;
292
pl_shader_linearize(pl_shader sh,struct pl_color_space csp)293 void pl_shader_linearize(pl_shader sh, struct pl_color_space csp)
294 {
295 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
296 return;
297
298 if (csp.transfer == PL_COLOR_TRC_LINEAR)
299 return;
300
301 // Note that this clamp may technically violate the definition of
302 // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
303 // displayed on the display where such would be possible. That said, the
304 // problem is that not all gamma curves are well-defined on the values
305 // outside this range, so we ignore it and just clamp anyway for sanity.
306 GLSL("// pl_shader_linearize \n"
307 "color.rgb = max(color.rgb, 0.0); \n");
308
309 switch (csp.transfer) {
310 case PL_COLOR_TRC_SRGB:
311 GLSL("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n"
312 " pow((color.rgb + vec3(0.055))/vec3(1.055), \n"
313 " vec3(2.4)), \n"
314 " %s(lessThan(vec3(0.04045), color.rgb))); \n",
315 sh_bvec(sh, 3));
316 return;
317 case PL_COLOR_TRC_BT_1886: {
318 float binv = powf(PL_CLAMP(csp.sig_floor, 0.0, 1.0), 1.0/2.4);
319 float a = powf(1.0 - binv, 2.4);
320 float b = binv / (1.0 - binv);
321 GLSL("color.rgb = vec3(%s) * pow(color.rgb + vec3(%s), vec3(2.4));\n",
322 SH_FLOAT(a), SH_FLOAT(b));
323 return;
324 }
325 case PL_COLOR_TRC_GAMMA18:
326 GLSL("color.rgb = pow(color.rgb, vec3(1.8));\n");
327 return;
328 case PL_COLOR_TRC_GAMMA20:
329 GLSL("color.rgb = pow(color.rgb, vec3(2.0));\n");
330 return;
331 case PL_COLOR_TRC_UNKNOWN:
332 case PL_COLOR_TRC_GAMMA22:
333 GLSL("color.rgb = pow(color.rgb, vec3(2.2));\n");
334 return;
335 case PL_COLOR_TRC_GAMMA24:
336 GLSL("color.rgb = pow(color.rgb, vec3(2.4));\n");
337 return;
338 case PL_COLOR_TRC_GAMMA26:
339 GLSL("color.rgb = pow(color.rgb, vec3(2.6));\n");
340 return;
341 case PL_COLOR_TRC_GAMMA28:
342 GLSL("color.rgb = pow(color.rgb, vec3(2.8));\n");
343 return;
344 case PL_COLOR_TRC_PRO_PHOTO:
345 GLSL("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n"
346 " pow(color.rgb, vec3(1.8)), \n"
347 " %s(lessThan(vec3(0.03125), color.rgb))); \n",
348 sh_bvec(sh, 3));
349 return;
350 case PL_COLOR_TRC_PQ:
351 GLSL("color.rgb = pow(color.rgb, vec3(1.0/%f)); \n"
352 "color.rgb = max(color.rgb - vec3(%f), 0.0) \n"
353 " / (vec3(%f) - vec3(%f) * color.rgb); \n"
354 "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n"
355 // PQ's output range is 0-10000, but we need it to be relative to
356 // to PL_COLOR_SDR_WHITE instead, so rescale
357 "color.rgb *= vec3(%f); \n",
358 PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE);
359 return;
360 case PL_COLOR_TRC_HLG:
361 GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n"
362 " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
363 " + vec3(%f), \n"
364 " %s(lessThan(vec3(0.5), color.rgb))); \n"
365 // Rescale from 0-12 to be relative to PL_COLOR_SDR_WHITE_HLG
366 "color.rgb *= vec3(1.0/%f); \n",
367 HLG_C, HLG_A, HLG_B, sh_bvec(sh, 3), PL_COLOR_SDR_WHITE_HLG);
368 return;
369 case PL_COLOR_TRC_V_LOG:
370 GLSL("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
371 " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
372 " - vec3(%f), \n"
373 " %s(lessThanEqual(vec3(0.181), color.rgb))); \n",
374 VLOG_D, VLOG_C, VLOG_B, sh_bvec(sh, 3));
375 return;
376 case PL_COLOR_TRC_S_LOG1:
377 GLSL("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
378 " - vec3(%f); \n",
379 SLOG_C, SLOG_A, SLOG_B);
380 return;
381 case PL_COLOR_TRC_S_LOG2:
382 GLSL("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n"
383 " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
384 " - vec3(%f)) * vec3(1.0/%f), \n"
385 " %s(lessThanEqual(vec3(%f), color.rgb))); \n",
386 SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, sh_bvec(sh, 3),
387 SLOG_Q);
388 return;
389 case PL_COLOR_TRC_LINEAR:
390 case PL_COLOR_TRC_COUNT:
391 break;
392 }
393
394 pl_unreachable();
395 }
396
pl_shader_delinearize(pl_shader sh,struct pl_color_space csp)397 void pl_shader_delinearize(pl_shader sh, struct pl_color_space csp)
398 {
399 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
400 return;
401
402 if (csp.transfer == PL_COLOR_TRC_LINEAR)
403 return;
404
405 GLSL("// pl_shader_delinearize \n"
406 "color.rgb = max(color.rgb, 0.0); \n");
407
408 switch (csp.transfer) {
409 case PL_COLOR_TRC_SRGB:
410 GLSL("color.rgb = mix(color.rgb * vec3(12.92), \n"
411 " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n"
412 " - vec3(0.055), \n"
413 " %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n",
414 sh_bvec(sh, 3));
415 return;
416 case PL_COLOR_TRC_BT_1886: {
417 float binv = powf(PL_CLAMP(csp.sig_floor, 0.0, 1.0), 1.0/2.4);
418 float a = powf(1.0 - binv, 2.4);
419 float b = binv / (1.0 - binv);
420 GLSL("color.rgb = pow(vec3(%s) * color.rgb, vec3(1.0/2.4)) - vec3(%s);\n",
421 SH_FLOAT(1.0 / a), SH_FLOAT(b));
422 return;
423 }
424 case PL_COLOR_TRC_GAMMA18:
425 GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.8));\n");
426 return;
427 case PL_COLOR_TRC_GAMMA20:
428 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.0));\n");
429 return;
430 case PL_COLOR_TRC_UNKNOWN:
431 case PL_COLOR_TRC_GAMMA22:
432 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.2));\n");
433 return;
434 case PL_COLOR_TRC_GAMMA24:
435 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));\n");
436 return;
437 case PL_COLOR_TRC_GAMMA26:
438 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6));\n");
439 return;
440 case PL_COLOR_TRC_GAMMA28:
441 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.8));\n");
442 return;
443 case PL_COLOR_TRC_PRO_PHOTO:
444 GLSL("color.rgb = mix(color.rgb * vec3(16.0), \n"
445 " pow(color.rgb, vec3(1.0/1.8)), \n"
446 " %s(lessThanEqual(vec3(0.001953), color.rgb))); \n",
447 sh_bvec(sh, 3));
448 return;
449 case PL_COLOR_TRC_PQ:
450 GLSL("color.rgb *= vec3(1.0/%f); \n"
451 "color.rgb = pow(color.rgb, vec3(%f)); \n"
452 "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
453 " / (vec3(1.0) + vec3(%f) * color.rgb); \n"
454 "color.rgb = pow(color.rgb, vec3(%f)); \n",
455 10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
456 return;
457 case PL_COLOR_TRC_HLG:
458 GLSL("color.rgb *= vec3(%f); \n"
459 "color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n"
460 " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n"
461 " %s(lessThan(vec3(1.0), color.rgb))); \n",
462 PL_COLOR_SDR_WHITE_HLG, HLG_A, HLG_B, HLG_C, sh_bvec(sh, 3));
463 return;
464 case PL_COLOR_TRC_V_LOG:
465 GLSL("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n"
466 " vec3(%f) * log(color.rgb + vec3(%f)) \n"
467 " + vec3(%f), \n"
468 " %s(lessThanEqual(vec3(0.01), color.rgb))); \n",
469 VLOG_C / M_LN10, VLOG_B, VLOG_D, sh_bvec(sh, 3));
470 return;
471 case PL_COLOR_TRC_S_LOG1:
472 GLSL("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
473 SLOG_A / M_LN10, SLOG_B, SLOG_C);
474 return;
475 case PL_COLOR_TRC_S_LOG2:
476 GLSL("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n"
477 " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
478 " + vec3(%f), \n"
479 " %s(lessThanEqual(vec3(0.0), color.rgb))); \n",
480 SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C,
481 sh_bvec(sh, 3));
482 return;
483 case PL_COLOR_TRC_LINEAR:
484 case PL_COLOR_TRC_COUNT:
485 break;
486 }
487
488 pl_unreachable();
489 }
490
491 const struct pl_sigmoid_params pl_sigmoid_default_params = {
492 .center = 0.75,
493 .slope = 6.50,
494 };
495
pl_shader_sigmoidize(pl_shader sh,const struct pl_sigmoid_params * params)496 void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params)
497 {
498 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
499 return;
500
501 params = PL_DEF(params, &pl_sigmoid_default_params);
502 float center = PL_DEF(params->center, 0.75);
503 float slope = PL_DEF(params->slope, 6.5);
504
505 // This function needs to go through (0,0) and (1,1), so we compute the
506 // values at 1 and 0, and then scale/shift them, respectively.
507 float offset = 1.0 / (1 + expf(slope * center));
508 float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset;
509
510 GLSL("// pl_shader_sigmoidize \n"
511 "color = clamp(color, 0.0, 1.0); \n"
512 "color = vec4(%s) - log(vec4(1.0) / (color * vec4(%s) + vec4(%s)) \n"
513 " - vec4(1.0)) * vec4(%s); \n",
514 SH_FLOAT(center), SH_FLOAT(scale), SH_FLOAT(offset), SH_FLOAT(1.0 / slope));
515 }
516
pl_shader_unsigmoidize(pl_shader sh,const struct pl_sigmoid_params * params)517 void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params)
518 {
519 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
520 return;
521
522 // See: pl_shader_sigmoidize
523 params = PL_DEF(params, &pl_sigmoid_default_params);
524 float center = PL_DEF(params->center, 0.75);
525 float slope = PL_DEF(params->slope, 6.5);
526 float offset = 1.0 / (1 + expf(slope * center));
527 float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset;
528
529 GLSL("// pl_shader_unsigmoidize \n"
530 "color = clamp(color, 0.0, 1.0); \n"
531 "color = vec4(%s) / (vec4(1.0) + exp(vec4(%s) * (vec4(%s) - color))) \n"
532 " - vec4(%s); \n",
533 SH_FLOAT(1.0 / scale), SH_FLOAT(slope), SH_FLOAT(center), SH_FLOAT(offset / scale));
534 }
535
sh_luma_coeffs(pl_shader sh,enum pl_color_primaries prim)536 static ident_t sh_luma_coeffs(pl_shader sh, enum pl_color_primaries prim)
537 {
538 struct pl_matrix3x3 rgb2xyz;
539 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(prim));
540
541 // FIXME: Cannot use `const vec3` due to glslang bug #2025
542 ident_t coeffs = sh_fresh(sh, "luma_coeffs");
543 GLSLH("#define %s vec3(%s, %s, %s) \n", coeffs,
544 SH_FLOAT(rgb2xyz.m[1][0]), // RGB->Y vector
545 SH_FLOAT(rgb2xyz.m[1][1]),
546 SH_FLOAT(rgb2xyz.m[1][2]));
547 return coeffs;
548 }
549
550 // Applies the OOTF / inverse OOTF - including the sig_scale adaptation
pl_shader_ootf(pl_shader sh,struct pl_color_space csp)551 static void pl_shader_ootf(pl_shader sh, struct pl_color_space csp)
552 {
553 if (csp.sig_scale != 1.0)
554 GLSL("color.rgb *= vec3(%s); \n", SH_FLOAT(csp.sig_scale));
555
556 if (!csp.light || csp.light == PL_COLOR_LIGHT_DISPLAY)
557 return;
558
559 GLSL("// pl_shader_ootf \n"
560 "color.rgb = max(color.rgb, 0.0); \n");
561
562 switch (csp.light)
563 {
564 case PL_COLOR_LIGHT_SCENE_HLG: {
565 // HLG OOTF from BT.2100, tuned to the indicated peak
566 float peak = csp.sig_peak * PL_COLOR_SDR_WHITE;
567 float gamma = 1.2 + 0.42 * log10(peak / 1000.0);
568 gamma = PL_MAX(gamma, 1.0);
569 GLSL("color.rgb *= vec3(%s * pow(dot(%s, color.rgb), %s));\n",
570 SH_FLOAT(csp.sig_peak / pow(12.0 / PL_COLOR_SDR_WHITE_HLG, gamma)),
571 sh_luma_coeffs(sh, csp.primaries),
572 SH_FLOAT(gamma - 1.0));
573 return;
574 }
575 case PL_COLOR_LIGHT_SCENE_709_1886:
576 // This OOTF is defined by encoding the result as 709 and then decoding
577 // it as 1886; although this is called 709_1886 we actually use the
578 // more precise (by one decimal) values from BT.2020 instead
579 GLSL("color.rgb = mix(color.rgb * vec3(4.5), \n"
580 " vec3(1.0993) * pow(color.rgb, vec3(0.45)) \n"
581 " - vec3(0.0993), \n"
582 " %s(lessThan(vec3(0.0181), color.rgb))); \n"
583 "color.rgb = pow(color.rgb, vec3(2.4)); \n",
584 sh_bvec(sh, 3));
585 return;
586 case PL_COLOR_LIGHT_SCENE_1_2:
587 GLSL("color.rgb = pow(color.rgb, vec3(1.2));\n");
588 return;
589 case PL_COLOR_LIGHT_UNKNOWN:
590 case PL_COLOR_LIGHT_DISPLAY:
591 case PL_COLOR_LIGHT_COUNT:
592 break;
593 }
594
595 pl_unreachable();
596 }
597
pl_shader_inverse_ootf(pl_shader sh,struct pl_color_space csp)598 static void pl_shader_inverse_ootf(pl_shader sh, struct pl_color_space csp)
599 {
600 if (!csp.light || csp.light == PL_COLOR_LIGHT_DISPLAY)
601 goto done;
602
603 GLSL("// pl_shader_inverse_ootf \n"
604 "color.rgb = max(color.rgb, 0.0); \n");
605
606 switch (csp.light)
607 {
608 case PL_COLOR_LIGHT_SCENE_HLG: {
609 float peak = csp.sig_peak * PL_COLOR_SDR_WHITE;
610 float gamma = 1.2 + 0.42 * log10(peak / 1000.0);
611 gamma = PL_MAX(gamma, 1.0);
612 GLSL("color.rgb *= vec3(1.0/%s); \n"
613 "color.rgb /= vec3(max(1e-6, pow(dot(%s, color.rgb), \n"
614 " %s))); \n",
615 SH_FLOAT(csp.sig_peak / pow(12.0 / PL_COLOR_SDR_WHITE_HLG, gamma)),
616 sh_luma_coeffs(sh, csp.primaries),
617 SH_FLOAT((gamma - 1.0) / gamma));
618 goto done;
619 }
620 case PL_COLOR_LIGHT_SCENE_709_1886:
621 GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4)); \n"
622 "color.rgb = mix(color.rgb * vec3(1.0/4.5), \n"
623 " pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n"
624 " vec3(1/0.45)), \n"
625 " %s(lessThan(vec3(0.08145), color.rgb))); \n",
626 sh_bvec(sh, 3));
627 goto done;
628 case PL_COLOR_LIGHT_SCENE_1_2:
629 GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.2));\n");
630 goto done;
631 case PL_COLOR_LIGHT_UNKNOWN:
632 case PL_COLOR_LIGHT_DISPLAY:
633 case PL_COLOR_LIGHT_COUNT:
634 break;
635 }
636
637 pl_unreachable();
638
639 done:
640 if (csp.sig_scale != 1.0)
641 GLSL("color.rgb *= vec3(1.0 / %s); \n", SH_FLOAT(csp.sig_scale));
642 }
643
644 const struct pl_peak_detect_params pl_peak_detect_default_params = {
645 .smoothing_period = 100.0,
646 .scene_threshold_low = 5.5,
647 .scene_threshold_high = 10.0,
648 .overshoot_margin = 0.05,
649 .minimum_peak = 1.0,
650 };
651
652 struct sh_peak_obj {
653 pl_buf buf;
654 struct pl_shader_desc desc;
655 float margin;
656 };
657
sh_peak_uninit(pl_gpu gpu,void * ptr)658 static void sh_peak_uninit(pl_gpu gpu, void *ptr)
659 {
660 struct sh_peak_obj *obj = ptr;
661 pl_buf_destroy(gpu, &obj->buf);
662 *obj = (struct sh_peak_obj) {0};
663 }
664
iir_coeff(float rate)665 static inline float iir_coeff(float rate)
666 {
667 float a = 1.0 - cos(1.0 / rate);
668 return sqrt(a*a + 2*a) - a;
669 }
670
pl_shader_detect_peak(pl_shader sh,struct pl_color_space csp,pl_shader_obj * state,const struct pl_peak_detect_params * params)671 bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp,
672 pl_shader_obj *state,
673 const struct pl_peak_detect_params *params)
674 {
675 params = PL_DEF(params, &pl_peak_detect_default_params);
676 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
677 return false;
678
679 if (!sh_try_compute(sh, 8, 8, true, 2 * sizeof(int32_t))) {
680 PL_ERR(sh, "HDR peak detection requires compute shaders!");
681 return false;
682 }
683
684 if (sh_glsl(sh).version < 130) {
685 // uint was added in GLSL 130
686 PL_ERR(sh, "HDR peak detection requires GLSL >= 130!");
687 return false;
688 }
689
690 struct sh_peak_obj *obj;
691 obj = SH_OBJ(sh, state, PL_SHADER_OBJ_PEAK_DETECT, struct sh_peak_obj,
692 sh_peak_uninit);
693 if (!obj)
694 return false;
695
696 pl_gpu gpu = SH_GPU(sh);
697 obj->margin = params->overshoot_margin;
698
699 if (!obj->buf) {
700 obj->desc = (struct pl_shader_desc) {
701 .desc = {
702 .name = "PeakDetect",
703 .type = PL_DESC_BUF_STORAGE,
704 },
705 };
706
707 // Note: Don't change this order, `vec2 average` being the first
708 // element is hard-coded in `pl_get_detected_peak`
709 bool ok = true;
710 ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_vec2("average"));
711 ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_sum"));
712 ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_max"));
713 ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_uint("counter"));
714
715 if (!ok) {
716 PL_ERR(sh, "HDR peak detection exhausts device limits!");
717 return false;
718 }
719
720 // Create the SSBO
721 size_t size = sh_buf_desc_size(&obj->desc);
722 static const uint8_t zero[32] = {0};
723 pl_assert(sizeof(zero) >= size);
724 struct pl_buf_params buf_params = {
725 .size = size,
726 .host_readable = true,
727 .memory_type = PL_BUF_MEM_DEVICE,
728 .storable = true,
729 .initial_data = zero,
730 };
731
732 // Attempt creating host-readable SSBO first, suppress errors
733 pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
734 obj->buf = pl_buf_create(gpu, &buf_params);
735 pl_log_level_cap(gpu->log, PL_LOG_NONE);
736
737 if (!obj->buf) {
738 // Fall back to non-host-readable SSBO
739 buf_params.host_readable = false;
740 obj->buf = pl_buf_create(gpu, &buf_params);
741 }
742
743 obj->desc.binding.object = obj->buf;
744 }
745
746 if (!obj->buf) {
747 SH_FAIL(sh, "Failed creating peak detection SSBO!");
748 return false;
749 }
750
751 // Attach the SSBO and perform the peak detection logic
752 obj->desc.desc.access = PL_DESC_ACCESS_READWRITE;
753 obj->desc.memory = PL_MEMORY_COHERENT;
754 sh_desc(sh, obj->desc);
755
756 sh_describe(sh, "peak detection");
757 GLSL("// pl_shader_detect_peak \n"
758 "{ \n"
759 "vec4 color_orig = color; \n");
760
761 // Decode the color into linear light absolute scale representation
762 pl_color_space_infer(&csp);
763 pl_shader_linearize(sh, csp);
764 pl_shader_ootf(sh, csp);
765
766 // For performance, we want to do as few atomic operations on global
767 // memory as possible, so use an atomic in shmem for the work group.
768 ident_t wg_sum = sh_fresh(sh, "wg_sum"), wg_max = sh_fresh(sh, "wg_max");
769 GLSLH("shared int %s; \n", wg_sum);
770 GLSLH("shared int %s; \n", wg_max);
771 GLSL("%s = 0; %s = 0; \n"
772 "barrier(); \n",
773 wg_sum, wg_max);
774
775 // Chosen to avoid overflowing on an 8K buffer
776 const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
777
778 GLSL("float sig_max = max(max(color.r, color.g), color.b); \n"
779 "float sig_log = log(max(sig_max, %f)); \n"
780 "int isig_max = int(sig_max * %f); \n"
781 "int isig_log = int(sig_log * %f); \n",
782 log_min, sig_scale, log_scale);
783
784 // Update the work group's shared atomics
785 if (sh_glsl(sh).subgroup_size) {
786 GLSL("int group_max = subgroupMax(isig_max); \n"
787 "int group_sum = subgroupAdd(isig_log); \n"
788 "if (subgroupElect()) { \n"
789 " atomicMax(%s, group_max); \n"
790 " atomicAdd(%s, group_sum); \n"
791 "} \n"
792 "barrier(); \n",
793 wg_max, wg_sum);
794 } else {
795 GLSL("atomicMax(%s, isig_max); \n"
796 "atomicAdd(%s, isig_log); \n"
797 "barrier(); \n",
798 wg_max, wg_sum);
799 }
800
801 GLSL("color = color_orig; \n"
802 "} \n");
803
804 // Have one thread per work group update the global atomics. Do this
805 // at the end of the shader to avoid clobbering `average`, in case the
806 // state object will be used by the same pass.
807 GLSLF("// pl_shader_detect_peak \n"
808 "if (gl_LocalInvocationIndex == 0u) { \n"
809 " int wg_avg = %s / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y); \n"
810 " atomicAdd(frame_sum, wg_avg); \n"
811 " atomicMax(frame_max, %s); \n"
812 " memoryBarrierBuffer(); \n"
813 "} \n"
814 "barrier(); \n",
815 wg_sum, wg_max);
816
817 // Finally, to update the global state per dispatch, we increment a counter
818 GLSLF("if (gl_LocalInvocationIndex == 0u) { \n"
819 " uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y; \n"
820 " if (atomicAdd(counter, 1u) == num_wg - 1u) { \n"
821 " vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);\n"
822 " cur *= vec2(1.0 / %f, 1.0 / %f); \n"
823 " cur.x = exp(cur.x); \n"
824 " cur.y = max(cur.y, %s); \n",
825 log_scale, sig_scale, SH_FLOAT(PL_DEF(params->minimum_peak, 1.0)));
826
827 // Set the initial value accordingly if it contains no data
828 GLSLF(" if (average.y == 0.0) \n"
829 " average = cur; \n");
830
831 // Use an IIR low-pass filter to smooth out the detected values
832 GLSLF(" average += %s * (cur - average); \n",
833 SH_FLOAT(iir_coeff(PL_DEF(params->smoothing_period, 100.0))));
834
835 // Scene change hysteresis
836 float log_db = 10.0 / log(10.0);
837 if (params->scene_threshold_low > 0 && params->scene_threshold_high > 0) {
838 GLSLF(" float delta = abs(log(cur.x / average.x)); \n"
839 " average = mix(average, cur, smoothstep(%s, %s, delta)); \n",
840 SH_FLOAT(params->scene_threshold_low / log_db),
841 SH_FLOAT(params->scene_threshold_high / log_db));
842 }
843
844 // Reset SSBO state for the next frame
845 GLSLF(" frame_sum = 0; \n"
846 " frame_max = 0; \n"
847 " counter = 0u; \n"
848 " memoryBarrierBuffer(); \n"
849 " } \n"
850 "} \n");
851
852 return true;
853 }
854
pl_get_detected_peak(const pl_shader_obj state,float * out_peak,float * out_avg)855 bool pl_get_detected_peak(const pl_shader_obj state,
856 float *out_peak, float *out_avg)
857 {
858 if (!state || state->type != PL_SHADER_OBJ_PEAK_DETECT)
859 return false;
860
861 struct sh_peak_obj *obj = state->priv;
862 pl_gpu gpu = state->gpu;
863 pl_buf buf = obj->buf;
864 if (!buf)
865 return false;
866
867 float average[2] = {0};
868 pl_assert(obj->buf->params.size >= sizeof(average));
869
870 if (buf->params.host_readable) {
871
872 // We can read directly from the SSBO
873 if (!pl_buf_read(gpu, buf, 0, average, sizeof(average))) {
874 PL_ERR(gpu, "Failed reading from peak detect state buffer");
875 return false;
876 }
877
878 } else {
879
880 // We can't read directly from the SSBO, go via an intermediary
881 pl_buf tmp = pl_buf_create(gpu, &(struct pl_buf_params) {
882 .size = sizeof(average),
883 .host_readable = true,
884 });
885
886 if (!tmp) {
887 PL_ERR(gpu, "Failed creating buffer for SSBO read-back");
888 return false;
889 }
890
891 pl_buf_copy(gpu, tmp, 0, buf, 0, sizeof(average));
892 if (!pl_buf_read(gpu, tmp, 0, average, sizeof(average))) {
893 PL_ERR(gpu, "Failed reading from SSBO read-back buffer");
894 pl_buf_destroy(gpu, &tmp);
895 return false;
896 }
897 pl_buf_destroy(gpu, &tmp);
898
899 }
900
901 *out_avg = average[0];
902 *out_peak = average[1];
903
904 if (obj->margin > 0.0) {
905 *out_peak *= 1.0 + obj->margin;
906 *out_peak = PL_MIN(*out_peak, 10000 / PL_COLOR_SDR_WHITE);
907 }
908
909 return true;
910 }
911
pq_delinearize(float x)912 static inline float pq_delinearize(float x)
913 {
914 x *= PL_COLOR_SDR_WHITE / 10000.0;
915 x = powf(x, PQ_M1);
916 x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x);
917 x = pow(x, PQ_M2);
918 return x;
919 }
920
921 const struct pl_color_map_params pl_color_map_default_params = {
922 .intent = PL_INTENT_RELATIVE_COLORIMETRIC,
923 .tone_mapping_algo = PL_TONE_MAPPING_BT_2390,
924 .desaturation_strength = 0.90,
925 .desaturation_exponent = 0.20,
926 .desaturation_base = 0.18,
927 .gamut_clipping = true,
928 };
929
pl_shader_tone_map(pl_shader sh,struct pl_color_space src,struct pl_color_space dst,bool need_peak,bool need_black,pl_shader_obj * peak_detect_state,const struct pl_color_map_params * params)930 static void pl_shader_tone_map(pl_shader sh, struct pl_color_space src,
931 struct pl_color_space dst,
932 bool need_peak, bool need_black,
933 pl_shader_obj *peak_detect_state,
934 const struct pl_color_map_params *params)
935 {
936 sh_describe(sh, "tone mapping");
937 GLSL("// pl_shader_tone_map \n"
938 "{ \n"
939 "float sig_peak = %s; \n"
940 "float sig_avg = %s; \n",
941 SH_FLOAT(src.sig_peak * src.sig_scale),
942 SH_FLOAT(src.sig_avg * src.sig_scale));
943
944 if (need_peak) {
945 // To prevent discoloration due to out-of-bounds clipping, we need to
946 // make sure to reduce the value range as far as necessary to keep the
947 // entire signal in range, so tone map based on the brightest
948 // component.
949 GLSL("int sig_idx = 0; \n"
950 "if (color[1] > color[sig_idx]) sig_idx = 1; \n"
951 "if (color[2] > color[sig_idx]) sig_idx = 2; \n");
952
953 // Update the variables based on values from the peak detection buffer
954 if (peak_detect_state) {
955 struct sh_peak_obj *obj;
956 obj = SH_OBJ(sh, peak_detect_state, PL_SHADER_OBJ_PEAK_DETECT,
957 struct sh_peak_obj, sh_peak_uninit);
958 if (obj && obj->buf) {
959 obj->desc.desc.access = PL_DESC_ACCESS_READONLY;
960 obj->desc.memory = 0;
961 sh_desc(sh, obj->desc);
962 GLSL("sig_avg = average.x; \n"
963 "sig_peak = average.y; \n");
964
965 // Allow a tiny bit of extra overshoot for the smoothed peak
966 // values, clamped to the maximum reasonable range.
967 if (obj->margin > 0.0) {
968 GLSL("sig_peak = min(sig_peak * %s, %f); \n",
969 SH_FLOAT(1.0 + obj->margin),
970 10000 / PL_COLOR_SDR_WHITE);
971 }
972 }
973 }
974 }
975
976 // Rescale the input in order to bring it into a representation where the
977 // valid output range is [0.0, 1.0]. This is because (almost) all of the
978 // tone mapping algorithms are defined to map to this range.
979 bool need_norm = params->tone_mapping_algo != PL_TONE_MAPPING_BT_2390 &&
980 params->tone_mapping_algo != PL_TONE_MAPPING_CLIP;
981 float dst_range = (dst.sig_peak - dst.sig_floor) * dst.sig_scale;
982 dst_range = PL_MAX(dst_range, 1.0);
983
984 ident_t dst_range_c = sh_const_float(sh, "dst_range", dst_range);
985 ident_t src_floor_c = sh_const_float(sh, "src_floor", src.sig_floor * src.sig_scale);
986 ident_t dst_peak_c = sh_const_float(sh, "dst_peak", dst.sig_peak * dst.sig_scale);
987 ident_t dst_floor_c = sh_const_float(sh, "dst_floor", dst.sig_floor * dst.sig_scale);
988
989 if (need_norm) {
990 GLSL("color.rgb = vec3(1.0/%s) * (color.rgb - vec3(%s)); \n"
991 "sig_peak = (sig_peak - %s) / %s; \n",
992 dst_range_c, src_floor_c, src_floor_c, dst_range_c);
993 }
994
995 // Rename `color.rgb` to something shorter for conciseness, and also
996 // apply clipping to prevent the tone mapping functions from exploding
997 // for input values exceeding sig_peak
998 GLSL("vec3 sig = clamp(color.rgb, 0.0, sig_peak); \n"
999 "vec3 sig_orig = color.rgb; \n");
1000
1001 if (need_peak) {
1002 // Scale the signal to compensate for differences in the avg brightness
1003 GLSL("float slope = min(%s, %s / sig_avg); \n"
1004 "sig *= slope; \n"
1005 "sig_peak *= slope; \n",
1006 SH_FLOAT(PL_DEF(params->max_boost, 1.0)),
1007 sh_const_float(sh, "dst_avg", dst.sig_avg * dst.sig_scale));
1008 }
1009
1010 float param = params->tone_mapping_param;
1011 switch (params->tone_mapping_algo) {
1012 case PL_TONE_MAPPING_CLIP:
1013 GLSL("sig = clamp(sig, %s, %s); \n", dst_floor_c, dst_peak_c);
1014 break;
1015
1016 case PL_TONE_MAPPING_MOBIUS:
1017 // Mobius isn't well-defined for sig_peak <= 1.0, but the limit of
1018 // mobius as sig_peak -> 1.0 is a linear function, so we can just skip
1019 // tone-mapping in this case
1020 GLSL("if (sig_peak > 1.0 + 1e-6) { \n"
1021 " float j = %s; \n"
1022 // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
1023 // where M(x) = scale * (x+a)/(x+b)
1024 " float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak); \n"
1025 " float b = (j*j - 2.0*j*sig_peak + sig_peak) / \n"
1026 " max(1e-6, sig_peak - 1.0); \n"
1027 " float scale = (b*b + 2.0*b*j + j*j) / (b-a); \n"
1028 " sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)), \n"
1029 " %s(greaterThan(sig, vec3(j)))); \n"
1030 "} \n",
1031 SH_FLOAT(PL_DEF(param, 0.3)),
1032 sh_bvec(sh, 3));
1033 break;
1034
1035 case PL_TONE_MAPPING_REINHARD: {
1036 float contrast = PL_DEF(param, 0.5),
1037 offset = (1.0 - contrast) / contrast;
1038 ident_t offset_c = sh_const_float(sh, "offset", offset);
1039 GLSL("sig = sig / (sig + vec3(%s)); \n"
1040 "float scale = (sig_peak + %s) / sig_peak; \n"
1041 "sig *= scale; \n",
1042 offset_c, offset_c);
1043 break;
1044 }
1045
1046 case PL_TONE_MAPPING_HABLE: {
1047 float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
1048 ident_t hable = sh_fresh(sh, "hable");
1049 GLSLH("vec3 %s(vec3 x) { \n"
1050 " return (x * (%f*x + vec3(%f)) + vec3(%f)) / \n"
1051 " (x * (%f*x + vec3(%f)) + vec3(%f)) \n"
1052 " - vec3(%f); \n"
1053 "} \n",
1054 hable, A, C*B, D*E, A, B, D*F, E/F);
1055 GLSL("sig = %s(sig) / %s(vec3(sig_peak)).x;\n", hable, hable);
1056 break;
1057 }
1058
1059 case PL_TONE_MAPPING_GAMMA:
1060 GLSL("const float cutoff = 0.05; \n"
1061 "float gamma = 1.0/%s; \n"
1062 "float scale = pow(cutoff / sig_peak, gamma) / cutoff; \n"
1063 "sig = mix(scale * sig, \n"
1064 " pow(sig / sig_peak, vec3(gamma)), \n"
1065 " %s(greaterThan(sig, vec3(cutoff)))); \n",
1066 SH_FLOAT(PL_DEF(param, 1.8)),
1067 sh_bvec(sh, 3));
1068 break;
1069
1070 case PL_TONE_MAPPING_LINEAR:
1071 GLSL("sig *= min(%s / sig_peak, 1.0);\n", SH_FLOAT(PL_DEF(param, 1.0)));
1072 break;
1073
1074 case PL_TONE_MAPPING_BT_2390: {
1075 // We first need to encode both sig and sig_peak into PQ space
1076 GLSL("vec4 sig_pq = vec4(sig.rgb, sig_peak); \n"
1077 "sig_pq *= vec4(1.0/%f); \n"
1078 "sig_pq = pow(max(sig_pq, 0.0), vec4(%f)); \n"
1079 "sig_pq = (vec4(%f) + vec4(%f) * sig_pq) \n"
1080 " / (vec4(1.0) + vec4(%f) * sig_pq); \n"
1081 "sig_pq = pow(sig_pq, vec4(%f)); \n",
1082 10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
1083
1084 // Normalize to be relative to the source brightness range
1085 float pqlb = pq_delinearize(src.sig_floor * src.sig_scale);
1086 ident_t pqlb_c = SH_FLOAT(pqlb);
1087 GLSL("float scale = 1.0 / (sig_pq.a - %s); \n"
1088 "sig = clamp(vec3(scale) * (sig_pq.rgb - vec3(%s)), 0.0, 1.0); \n",
1089 pqlb_c, pqlb_c);
1090 if (need_peak) {
1091 // Apply piece-wise hermite spline
1092 GLSL("float maxLum = %s * scale; \n"
1093 "float ks = 1.5 * maxLum - 0.5; \n"
1094 "vec3 tb = (sig - vec3(ks)) / vec3(1.0 - ks); \n"
1095 "vec3 tb2 = tb * tb; \n"
1096 "vec3 tb3 = tb2 * tb; \n"
1097 "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) + \n"
1098 " (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) + \n"
1099 " (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum); \n"
1100 "sig = mix(sig, pb, %s(greaterThan(sig, vec3(ks)))); \n",
1101 SH_FLOAT(pq_delinearize(dst.sig_peak * dst.sig_scale) - pqlb),
1102 sh_bvec(sh, 3));
1103 }
1104 if (need_black) {
1105 // Apply black point adaptation
1106 GLSL("float minLum = %s * scale; \n"
1107 "float p = 4.0; \n"
1108 "if (minLum >= 0.0) \n"
1109 " p = min(1.0 / minLum, 4.0); \n"
1110 "vec3 boost = vec3(minLum) * pow(vec3(1.0) - sig, vec3(p)); \n"
1111 "vec3 sig_lift = sig + boost; \n",
1112 SH_FLOAT(pq_delinearize(dst.sig_floor * dst.sig_scale) - pqlb));
1113 if (need_peak) {
1114 GLSL("if (maxLum < 1.0) { \n"
1115 "sig_lift -= vec3(minLum); \n"
1116 "sig_lift /= 1.0 + minLum / maxLum * pow(1.0 - maxLum, p);\n"
1117 "sig_lift += vec3(minLum); \n"
1118 "} \n");
1119 }
1120 GLSL("sig = mix(sig, sig_lift, %s(lessThan(sig, vec3(1.0)))); \n",
1121 sh_bvec(sh, 3));
1122 }
1123 // Convert back from normalized PQ space to linear light
1124 GLSL("sig = vec3(sig_pq.a - %s) * sig + vec3(%s); \n"
1125 "sig = pow(max(sig, 0.0), vec3(1.0/%f)); \n"
1126 "sig = max(sig - vec3(%f), 0.0) / \n"
1127 " (vec3(%f) - vec3(%f) * sig); \n"
1128 "sig = pow(sig, vec3(1.0/%f)); \n"
1129 "sig *= vec3(%f); \n",
1130 pqlb_c, pqlb_c,
1131 PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE);
1132 break;
1133 }
1134
1135 case PL_TONE_MAPPING_ALGORITHM_COUNT:
1136 pl_unreachable();
1137 }
1138
1139 if (need_peak) {
1140 GLSL("float orig = max(sig_orig[sig_idx], 1e-10); \n"
1141 "vec3 sig_lin = sig_orig * sig[sig_idx] / orig; \n");
1142
1143 // Mix between the per-channel tone mapped `sig` and the linear tone
1144 // mapped `sig_lin` based on the desaturation strength
1145 if (params->desaturation_strength > 0.0) {
1146 float range = need_norm ? 1.0 : dst_range;
1147 GLSL("float coeff = max(sig[sig_idx] - %s, 1e-6) / \n"
1148 " max(sig[sig_idx], 1.0); \n"
1149 "coeff = %s * pow(coeff / %s, %s); \n"
1150 "color.rgb = mix(sig_lin, sig, coeff); \n",
1151 SH_FLOAT(params->desaturation_base / range),
1152 SH_FLOAT(params->desaturation_strength), SH_FLOAT(range),
1153 SH_FLOAT(params->desaturation_exponent));
1154 } else {
1155 GLSL("color.rgb = sig_lin; \n");
1156 }
1157 } else {
1158 // Always use the per-channel tone mapping for black point adaptation
1159 GLSL("color.rgb = sig; \n");
1160 }
1161
1162 // Undo the normalization by `dst_peak`
1163 if (need_norm) {
1164 GLSL("color.rgb = vec3(%s) * color.rgb + vec3(%s); \n",
1165 dst_range_c, dst_floor_c);
1166 }
1167
1168 GLSL("} \n");
1169 }
1170
pl_shader_color_map(pl_shader sh,const struct pl_color_map_params * params,struct pl_color_space src,struct pl_color_space dst,pl_shader_obj * peak_detect_state,bool prelinearized)1171 void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params,
1172 struct pl_color_space src, struct pl_color_space dst,
1173 pl_shader_obj *peak_detect_state,
1174 bool prelinearized)
1175 {
1176 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1177 return;
1178
1179 GLSL("// pl_shader_color_map\n");
1180 GLSL("{\n");
1181 params = PL_DEF(params, &pl_color_map_default_params);
1182
1183 pl_color_space_infer(&src);
1184 pl_color_space_infer_ref(&dst, &src);
1185
1186 if (!pl_color_space_equal(&src, &dst))
1187 sh_describe(sh, "colorspace conversion");
1188
1189 // All operations from here on require linear light as a starting point,
1190 // so we linearize even if src.transfer == dst.transfer when one of the other
1191 // operations needs it
1192 bool need_linear = src.transfer != dst.transfer ||
1193 src.primaries != dst.primaries ||
1194 src.sig_peak > dst.sig_peak ||
1195 src.sig_avg != dst.sig_avg ||
1196 src.sig_floor != dst.sig_floor ||
1197 src.sig_scale != dst.sig_scale ||
1198 src.light != dst.light;
1199 bool need_gamut_warn = false;
1200 bool is_linear = prelinearized;
1201 if (need_linear && !is_linear) {
1202 pl_shader_linearize(sh, src);
1203 is_linear = true;
1204 }
1205
1206 if (need_linear)
1207 pl_shader_ootf(sh, src);
1208
1209 // Tone map to rescale the signal average/peak/black if needed
1210 bool need_peak = src.sig_peak * src.sig_scale >
1211 dst.sig_peak * dst.sig_scale + 1e-6;
1212 bool need_black = fabs(src.sig_floor * src.sig_scale -
1213 dst.sig_floor * dst.sig_scale) > 1e-6;
1214
1215
1216 if (need_peak || need_black) {
1217 pl_shader_tone_map(sh, src, dst, need_peak, need_black,
1218 peak_detect_state, params);
1219 need_gamut_warn = true;
1220 }
1221
1222 // Adapt to the right colorspace (primaries) if necessary
1223 if (src.primaries != dst.primaries) {
1224 const struct pl_raw_primaries *csp_src, *csp_dst;
1225 csp_src = pl_raw_primaries_get(src.primaries),
1226 csp_dst = pl_raw_primaries_get(dst.primaries);
1227 struct pl_matrix3x3 cms_mat;
1228 cms_mat = pl_get_color_mapping_matrix(csp_src, csp_dst, params->intent);
1229
1230 GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) {
1231 .var = pl_var_mat3("cms_matrix"),
1232 .data = PL_TRANSPOSE_3X3(cms_mat.m),
1233 }));
1234
1235 if (!pl_primaries_superset(csp_dst, csp_src)) {
1236 if (params->gamut_clipping) {
1237 ident_t dst_min = SH_FLOAT(dst.sig_floor * dst.sig_scale);
1238 ident_t dst_max = SH_FLOAT(dst.sig_peak * dst.sig_scale);
1239
1240 GLSL("float cmin = min(min(color.r, color.g), color.b); \n"
1241 "if (cmin < %s) { \n"
1242 " float luma = dot(%s, color.rgb); \n"
1243 " float coeff = (%s - cmin) / (luma - cmin); \n"
1244 " coeff = clamp(coeff, 0.0, 1.0); \n"
1245 " color.rgb = mix(color.rgb, vec3(luma), coeff); \n"
1246 "} \n"
1247 "float cmax = 1.0/%s * max(max(color.r, color.g), color.b);\n"
1248 "if (cmax > 1.0) \n"
1249 " color.rgb /= cmax; \n",
1250 dst_min,
1251 sh_luma_coeffs(sh, dst.primaries),
1252 dst_min,
1253 dst_max);
1254
1255 } else {
1256 need_gamut_warn = true;
1257 }
1258 }
1259 }
1260
1261 // Warn for remaining out-of-gamut colors if enabled
1262 if (params->gamut_warning && need_gamut_warn) {
1263 ident_t dst_min = SH_FLOAT(dst.sig_floor * dst.sig_scale);
1264 ident_t dst_max = SH_FLOAT(dst.sig_peak * dst.sig_scale);
1265
1266 GLSL("if (any(greaterThan(color.rgb, vec3(%s + 0.005))) ||\n"
1267 " any(lessThan(color.rgb, vec3(%s - 0.005))))\n"
1268 " color.rgb = vec3(1.0, 0.0, 1.0); // magenta\n",
1269 dst_max, dst_min);
1270 }
1271
1272 if (need_linear)
1273 pl_shader_inverse_ootf(sh, dst);
1274
1275 if (is_linear)
1276 pl_shader_delinearize(sh, dst);
1277
1278 GLSL("}\n");
1279 }
1280
pl_shader_cone_distort(pl_shader sh,struct pl_color_space csp,const struct pl_cone_params * params)1281 void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp,
1282 const struct pl_cone_params *params)
1283 {
1284 if (!params || !params->cones)
1285 return;
1286
1287 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1288 return;
1289
1290 sh_describe(sh, "cone distortion");
1291 GLSL("// pl_shader_cone_distort\n");
1292 GLSL("{\n");
1293
1294 pl_color_space_infer(&csp);
1295 pl_shader_linearize(sh, csp);
1296
1297 struct pl_matrix3x3 cone_mat;
1298 cone_mat = pl_get_cone_matrix(params, pl_raw_primaries_get(csp.primaries));
1299 GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) {
1300 .var = pl_var_mat3("cone_mat"),
1301 .data = PL_TRANSPOSE_3X3(cone_mat.m),
1302 }));
1303
1304 pl_shader_delinearize(sh, csp);
1305 GLSL("}\n");
1306 }
1307
1308 struct sh_dither_obj {
1309 enum pl_dither_method method;
1310 pl_shader_obj lut;
1311 };
1312
sh_dither_uninit(pl_gpu gpu,void * ptr)1313 static void sh_dither_uninit(pl_gpu gpu, void *ptr)
1314 {
1315 struct sh_dither_obj *obj = ptr;
1316 pl_shader_obj_destroy(&obj->lut);
1317 *obj = (struct sh_dither_obj) {0};
1318 }
1319
fill_dither_matrix(void * data,const struct sh_lut_params * params)1320 static void fill_dither_matrix(void *data, const struct sh_lut_params *params)
1321 {
1322 pl_assert(params->width > 0 && params->height > 0 && params->comps == 1);
1323
1324 const struct sh_dither_obj *obj = params->priv;
1325 switch (obj->method) {
1326 case PL_DITHER_ORDERED_LUT:
1327 pl_assert(params->width == params->height);
1328 pl_generate_bayer_matrix(data, params->width);
1329 return;
1330
1331 case PL_DITHER_BLUE_NOISE:
1332 pl_assert(params->width == params->height);
1333 pl_generate_blue_noise(data, params->width);
1334 return;
1335
1336 case PL_DITHER_ORDERED_FIXED:
1337 case PL_DITHER_WHITE_NOISE:
1338 case PL_DITHER_METHOD_COUNT:
1339 return;
1340 }
1341
1342 pl_unreachable();
1343 }
1344
dither_method_is_lut(enum pl_dither_method method)1345 static bool dither_method_is_lut(enum pl_dither_method method)
1346 {
1347 switch (method) {
1348 case PL_DITHER_BLUE_NOISE:
1349 case PL_DITHER_ORDERED_LUT:
1350 return true;
1351 case PL_DITHER_ORDERED_FIXED:
1352 case PL_DITHER_WHITE_NOISE:
1353 return false;
1354 case PL_DITHER_METHOD_COUNT:
1355 break;
1356 }
1357
1358 pl_unreachable();
1359 }
1360
pl_shader_dither(pl_shader sh,int new_depth,pl_shader_obj * dither_state,const struct pl_dither_params * params)1361 void pl_shader_dither(pl_shader sh, int new_depth,
1362 pl_shader_obj *dither_state,
1363 const struct pl_dither_params *params)
1364 {
1365 if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1366 return;
1367
1368 if (new_depth <= 0 || new_depth > 256) {
1369 PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth);
1370 return;
1371 }
1372
1373 sh_describe(sh, "dithering");
1374 GLSL("// pl_shader_dither \n"
1375 "{ \n"
1376 "float bias; \n");
1377
1378 params = PL_DEF(params, &pl_dither_default_params);
1379 if (params->lut_size < 0 || params->lut_size > 8) {
1380 SH_FAIL(sh, "Invalid `lut_size` specified: %d", params->lut_size);
1381 return;
1382 }
1383
1384 enum pl_dither_method method = params->method;
1385 bool can_fixed = sh_glsl(sh).version >= 130;
1386 ident_t lut = NULL;
1387 int lut_size = 0;
1388
1389 if (method == PL_DITHER_ORDERED_FIXED && !can_fixed) {
1390 PL_WARN(sh, "PL_DITHER_ORDERED_FIXED requires glsl version >= 130.."
1391 " falling back.");
1392 goto fallback;
1393 }
1394
1395 if (dither_method_is_lut(method)) {
1396 if (!dither_state) {
1397 PL_WARN(sh, "LUT-based dither method specified but no dither state "
1398 "object given, falling back to non-LUT based methods.");
1399 goto fallback;
1400 }
1401
1402 struct sh_dither_obj *obj;
1403 obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER,
1404 struct sh_dither_obj, sh_dither_uninit);
1405 if (!obj)
1406 goto fallback;
1407
1408 bool changed = obj->method != method;
1409 obj->method = method;
1410
1411 lut_size = 1 << PL_DEF(params->lut_size, 6);
1412 lut = sh_lut(sh, &(struct sh_lut_params) {
1413 .object = &obj->lut,
1414 .type = PL_VAR_FLOAT,
1415 .width = lut_size,
1416 .height = lut_size,
1417 .comps = 1,
1418 .update = changed,
1419 .fill = fill_dither_matrix,
1420 .priv = obj,
1421 });
1422 if (!lut)
1423 goto fallback;
1424 }
1425
1426 goto done;
1427
1428 fallback:
1429 method = can_fixed ? PL_DITHER_ORDERED_FIXED : PL_DITHER_WHITE_NOISE;
1430 // fall through
1431
1432 done: ;
1433
1434 int size = 0;
1435 if (lut) {
1436 size = lut_size;
1437 } else if (method == PL_DITHER_ORDERED_FIXED) {
1438 size = 16; // hard-coded size
1439 }
1440
1441 if (size) {
1442 // Transform the screen position to the cyclic range [0,1)
1443 GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/%s);\n", SH_INT(size));
1444
1445 if (params->temporal) {
1446 int phase = SH_PARAMS(sh).index % 8;
1447 float r = phase * (M_PI / 2); // rotate
1448 float m = phase < 4 ? 1 : -1; // mirror
1449 float mat[2][2] = {
1450 {cos(r), -sin(r) },
1451 {sin(r) * m, cos(r) * m},
1452 };
1453
1454 ident_t rot = sh_var(sh, (struct pl_shader_var) {
1455 .var = pl_var_mat2("dither_rot"),
1456 .data = &mat[0][0],
1457 .dynamic = true,
1458 });
1459 GLSL("pos = fract(%s * pos + vec2(1.0));\n", rot);
1460 }
1461 }
1462
1463 switch (method) {
1464 case PL_DITHER_WHITE_NOISE: {
1465 ident_t prng = sh_prng(sh, params->temporal, NULL);
1466 GLSL("bias = %s;\n", prng);
1467 break;
1468 }
1469
1470 case PL_DITHER_ORDERED_FIXED:
1471 // Bitwise ordered dither using only 32-bit uints
1472 GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u; \n"
1473 // Bitwise merge (morton number)
1474 "xy.x = xy.x ^ xy.y; \n"
1475 "xy = (xy | xy << 2) & uvec2(0x33333333); \n"
1476 "xy = (xy | xy << 1) & uvec2(0x55555555); \n"
1477 // Bitwise inversion
1478 "uint b = xy.x + (xy.y << 1); \n"
1479 "b = (b * 0x0802u & 0x22110u) | \n"
1480 " (b * 0x8020u & 0x88440u); \n"
1481 "b = 0x10101u * b; \n"
1482 "b = (b >> 16) & 0xFFu; \n"
1483 // Generate bias value
1484 "bias = float(b) * 1.0/256.0; \n");
1485 break;
1486
1487 case PL_DITHER_BLUE_NOISE:
1488 case PL_DITHER_ORDERED_LUT:
1489 pl_assert(lut);
1490 GLSL("bias = %s(ivec2(pos * %s));\n", lut, SH_FLOAT(lut_size));
1491 break;
1492
1493 case PL_DITHER_METHOD_COUNT:
1494 pl_unreachable();
1495 }
1496
1497 uint64_t scale = (1LLU << new_depth) - 1;
1498 GLSL("color = vec4(%llu.0) * color + vec4(bias); \n"
1499 "color = floor(color) * vec4(1.0 / %llu.0); \n"
1500 "} \n",
1501 (long long unsigned) scale, (long long unsigned) scale);
1502 }
1503
1504 const struct pl_dither_params pl_dither_default_params = {
1505 .method = PL_DITHER_BLUE_NOISE,
1506 .lut_size = 6,
1507 .temporal = false, // commonly flickers on LCDs
1508 };
1509