1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <math.h>
19 #include "shaders.h"
20 
pl_shader_decode_color(pl_shader sh,struct pl_color_repr * repr,const struct pl_color_adjustment * params)21 void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr,
22                             const struct pl_color_adjustment *params)
23 {
24     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
25         return;
26 
27     sh_describe(sh, "color decoding");
28     GLSL("// pl_shader_decode_color \n"
29          "{ \n");
30 
31     // For the non-linear color systems we need some special input handling
32     // to make sure we don't accidentally screw everything up because of the
33     // alpha multiplication, which only commutes with linear operations.
34     bool is_nonlinear = !pl_color_system_is_linear(repr->sys);
35     if (is_nonlinear && repr->alpha == PL_ALPHA_PREMULTIPLIED) {
36         GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n");
37         repr->alpha = PL_ALPHA_INDEPENDENT;
38     }
39 
40     // XYZ needs special handling due to the input gamma logic
41     if (repr->sys == PL_COLOR_SYSTEM_XYZ) {
42         ident_t scale = SH_FLOAT(pl_color_repr_normalize(repr));
43         GLSL("color.rgb = pow(vec3(%s) * color.rgb, vec3(2.6));\n", scale);
44     }
45 
46     enum pl_color_system orig_sys = repr->sys;
47     struct pl_transform3x3 tr = pl_color_repr_decode(repr, params);
48 
49     if (memcmp(&tr, &pl_transform3x3_identity, sizeof(tr))) {
50         ident_t cmat = sh_var(sh, (struct pl_shader_var) {
51             .var  = pl_var_mat3("cmat"),
52             .data = PL_TRANSPOSE_3X3(tr.mat.m),
53         });
54 
55         ident_t cmat_c = sh_var(sh, (struct pl_shader_var) {
56             .var  = pl_var_vec3("cmat_c"),
57             .data = tr.c,
58         });
59 
60         GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c);
61     }
62 
63     switch (orig_sys) {
64     case PL_COLOR_SYSTEM_BT_2020_C:
65         // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
66         // C'bc = (B'-Y'c) / 1.9404  | C'bc <= 0
67         //      = (B'-Y'c) / 1.5816  | C'bc >  0
68         //
69         // C'rc = (R'-Y'c) / 1.7184  | C'rc <= 0
70         //      = (R'-Y'c) / 0.9936  | C'rc >  0
71         //
72         // as per the BT.2020 specification, table 4. This is a non-linear
73         // transformation because (constant) luminance receives non-equal
74         // contributions from the three different channels.
75         GLSL("// constant luminance conversion                                  \n"
76              "color.br = color.br * mix(vec2(1.5816, 0.9936),                   \n"
77              "                          vec2(1.9404, 1.7184),                   \n"
78              "                          %s(lessThanEqual(color.br, vec2(0.0)))) \n"
79              "           + color.gg;                                            \n",
80              sh_bvec(sh, 2));
81         // Expand channels to camera-linear light. This shader currently just
82         // assumes everything uses the BT.2020 12-bit gamma function, since the
83         // difference between 10 and 12-bit is negligible for anything other
84         // than 12-bit content.
85         GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5),                        \n"
86              "                pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
87              "                    vec3(1.0/0.45)),                             \n"
88              "                %s(lessThanEqual(vec3(0.08145), color.rgb)));    \n",
89              sh_bvec(sh, 3));
90         // Calculate the green channel from the expanded RYcB, and recompress to G'
91         // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
92         GLSL("color.g = (lin.g - 0.2627*lin.r - 0.0593*lin.b)*1.0/0.6780;   \n"
93              "color.g = mix(color.g * 4.5,                                  \n"
94              "              1.0993 * pow(color.g, 0.45) - 0.0993,           \n"
95              "              %s(0.0181 <= color.g));                         \n",
96              sh_bvec(sh, 1));
97         break;
98 
99     case PL_COLOR_SYSTEM_BT_2100_PQ:
100     case PL_COLOR_SYSTEM_BT_2100_HLG: {
101         // Conversion process from the spec:
102         //
103         // 1. L'M'S' = cmat * ICtCp
104         // 2. LMS = linearize(L'M'S')  (EOTF for PQ, inverse OETF for HLG)
105         // 3. RGB = lms2rgb * LMS
106         //
107         // After this we need to invert step 2 to arrive at non-linear RGB.
108         // (It's important we keep the transfer function conversion separate
109         // from the color system decoding, so we have to partially undo our
110         // work here even though we will end up linearizing later on anyway)
111         struct pl_color_space csp = {
112             .transfer = repr->sys == PL_COLOR_SYSTEM_BT_2100_PQ
113                             ? PL_COLOR_TRC_PQ
114                             : PL_COLOR_TRC_HLG,
115         };
116 
117         // Inverted from the matrix in the spec, transposed to column major
118         static const char *bt2100_lms2rgb = "mat3("
119             "  3.43661,  -0.79133, -0.0259499, "
120             " -2.50645,    1.9836, -0.0989137, "
121             "0.0698454, -0.192271,    1.12486) ";
122 
123         pl_shader_linearize(sh, csp);
124         GLSL("color.rgb = %s * color.rgb; \n", bt2100_lms2rgb);
125         pl_shader_delinearize(sh, csp);
126         break;
127     }
128 
129     case PL_COLOR_SYSTEM_UNKNOWN:
130     case PL_COLOR_SYSTEM_RGB:
131     case PL_COLOR_SYSTEM_XYZ:
132     case PL_COLOR_SYSTEM_BT_601:
133     case PL_COLOR_SYSTEM_BT_709:
134     case PL_COLOR_SYSTEM_SMPTE_240M:
135     case PL_COLOR_SYSTEM_BT_2020_NC:
136     case PL_COLOR_SYSTEM_YCGCO:
137         break; // no special post-processing needed
138 
139     case PL_COLOR_SYSTEM_COUNT:
140         pl_unreachable();
141     }
142 
143     if (repr->alpha == PL_ALPHA_INDEPENDENT) {
144         GLSL("color.rgb *= vec3(color.a);\n");
145         repr->alpha = PL_ALPHA_PREMULTIPLIED;
146     }
147 
148     // Gamma adjustment. Doing this here (in non-linear light) is technically
149     // somewhat wrong, but this is just an aesthetic parameter and not really
150     // meant for colorimetric precision, so we don't care too much.
151     if (params && params->gamma != 1.0) {
152         ident_t gamma = sh_var(sh, (struct pl_shader_var) {
153             .var = pl_var_float("gamma"),
154             .data = &params->gamma,
155         });
156         GLSL("color.rgb = pow(color.rgb, vec3(%s)); \n", gamma);
157     }
158 
159     GLSL("}\n");
160 }
161 
pl_shader_encode_color(pl_shader sh,const struct pl_color_repr * repr)162 void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr)
163 {
164     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
165         return;
166 
167     sh_describe(sh, "color encoding");
168     GLSL("// pl_shader_encode_color \n"
169          "{ \n");
170 
171     switch (repr->sys) {
172     case PL_COLOR_SYSTEM_BT_2020_C:
173         // Expand R'G'B' to RGB
174         GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5),                        \n"
175              "                pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
176              "                    vec3(1.0/0.45)),                             \n"
177              "                %s(lessThanEqual(vec3(0.08145), color.rgb)));    \n",
178              sh_bvec(sh, 3));
179 
180         // Compute Yc from RGB and compress to R'Y'cB'
181         GLSL("color.g = dot(vec3(0.2627, 0.6780, 0.0593), lin);     \n"
182              "color.g = mix(color.g * 4.5,                          \n"
183              "              1.0993 * pow(color.g, 0.45) - 0.0993,   \n"
184              "              %s(0.0181 <= color.g));                 \n",
185              sh_bvec(sh, 1));
186 
187         // Compute C'bc and C'rc into color.br
188         GLSL("color.br = color.br - color.gg;                           \n"
189              "color.br *= mix(vec2(1.0/1.5816, 1.0/0.9936),             \n"
190              "                vec2(1.0/1.9404, 1.0/1.7184),             \n"
191              "                %s(lessThanEqual(color.br, vec2(0.0))));  \n",
192              sh_bvec(sh, 2));
193         break;
194 
195     case PL_COLOR_SYSTEM_BT_2100_PQ:
196     case PL_COLOR_SYSTEM_BT_2100_HLG: {
197         struct pl_color_space csp = {
198             .transfer = repr->sys == PL_COLOR_SYSTEM_BT_2100_PQ
199                             ? PL_COLOR_TRC_PQ
200                             : PL_COLOR_TRC_HLG,
201         };
202 
203         // Inverse of the matrix above
204         static const char *bt2100_rgb2lms = "mat3("
205             "0.412109, 0.166748, 0.024170, "
206             "0.523925, 0.720459, 0.075440, "
207             "0.063965, 0.112793, 0.900394) ";
208 
209         pl_shader_linearize(sh, csp);
210         GLSL("color.rgb = %s * color.rgb; \n", bt2100_rgb2lms);
211         pl_shader_delinearize(sh, csp);
212         break;
213     }
214 
215     case PL_COLOR_SYSTEM_UNKNOWN:
216     case PL_COLOR_SYSTEM_RGB:
217     case PL_COLOR_SYSTEM_XYZ:
218     case PL_COLOR_SYSTEM_BT_601:
219     case PL_COLOR_SYSTEM_BT_709:
220     case PL_COLOR_SYSTEM_SMPTE_240M:
221     case PL_COLOR_SYSTEM_BT_2020_NC:
222     case PL_COLOR_SYSTEM_YCGCO:
223         break; // no special pre-processing needed
224 
225     case PL_COLOR_SYSTEM_COUNT:
226         pl_unreachable();
227     }
228 
229     // Since this is a relatively rare operation, bypass it as much as possible
230     bool skip = true;
231     skip &= PL_DEF(repr->sys, PL_COLOR_SYSTEM_RGB) == PL_COLOR_SYSTEM_RGB;
232     skip &= PL_DEF(repr->levels, PL_COLOR_LEVELS_FULL) == PL_COLOR_LEVELS_FULL;
233     skip &= !repr->bits.sample_depth || !repr->bits.color_depth ||
234              repr->bits.sample_depth == repr->bits.color_depth;
235     skip &= !repr->bits.bit_shift;
236 
237     if (!skip) {
238         struct pl_color_repr copy = *repr;
239         ident_t xyzscale = NULL;
240         if (repr->sys == PL_COLOR_SYSTEM_XYZ)
241             xyzscale = SH_FLOAT(1.0 / pl_color_repr_normalize(&copy));
242 
243         struct pl_transform3x3 tr = pl_color_repr_decode(&copy, NULL);
244         pl_transform3x3_invert(&tr);
245 
246         ident_t cmat = sh_var(sh, (struct pl_shader_var) {
247             .var  = pl_var_mat3("cmat"),
248             .data = PL_TRANSPOSE_3X3(tr.mat.m),
249         });
250 
251         ident_t cmat_c = sh_var(sh, (struct pl_shader_var) {
252             .var  = pl_var_vec3("cmat_c"),
253             .data = tr.c,
254         });
255 
256         GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c);
257 
258         if (xyzscale)
259             GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6)) * vec3(%s); \n", xyzscale);
260     }
261 
262     if (repr->alpha == PL_ALPHA_INDEPENDENT)
263         GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n");
264 
265     GLSL("}\n");
266 }
267 
268 // Common constants for SMPTE ST.2084 (PQ)
269 static const float PQ_M1 = 2610./4096 * 1./4,
270                    PQ_M2 = 2523./4096 * 128,
271                    PQ_C1 = 3424./4096,
272                    PQ_C2 = 2413./4096 * 32,
273                    PQ_C3 = 2392./4096 * 32;
274 
275 // Common constants for ARIB STD-B67 (HLG)
276 static const float HLG_A = 0.17883277,
277                    HLG_B = 0.28466892,
278                    HLG_C = 0.55991073;
279 
280 // Common constants for Panasonic V-Log
281 static const float VLOG_B = 0.00873,
282                    VLOG_C = 0.241514,
283                    VLOG_D = 0.598206;
284 
285 // Common constants for Sony S-Log
286 static const float SLOG_A = 0.432699,
287                    SLOG_B = 0.037584,
288                    SLOG_C = 0.616596 + 0.03,
289                    SLOG_P = 3.538813,
290                    SLOG_Q = 0.030001,
291                    SLOG_K2 = 155.0 / 219.0;
292 
pl_shader_linearize(pl_shader sh,struct pl_color_space csp)293 void pl_shader_linearize(pl_shader sh, struct pl_color_space csp)
294 {
295     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
296         return;
297 
298     if (csp.transfer == PL_COLOR_TRC_LINEAR)
299         return;
300 
301     // Note that this clamp may technically violate the definition of
302     // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
303     // displayed on the display where such would be possible. That said, the
304     // problem is that not all gamma curves are well-defined on the values
305     // outside this range, so we ignore it and just clamp anyway for sanity.
306     GLSL("// pl_shader_linearize           \n"
307          "color.rgb = max(color.rgb, 0.0); \n");
308 
309     switch (csp.transfer) {
310     case PL_COLOR_TRC_SRGB:
311         GLSL("color.rgb = mix(color.rgb * vec3(1.0/12.92),               \n"
312              "                pow((color.rgb + vec3(0.055))/vec3(1.055), \n"
313              "                    vec3(2.4)),                            \n"
314              "                %s(lessThan(vec3(0.04045), color.rgb)));   \n",
315              sh_bvec(sh, 3));
316         return;
317     case PL_COLOR_TRC_BT_1886: {
318         float binv = powf(PL_CLAMP(csp.sig_floor, 0.0, 1.0), 1.0/2.4);
319         float a = powf(1.0 - binv, 2.4);
320         float b = binv / (1.0 - binv);
321         GLSL("color.rgb = vec3(%s) * pow(color.rgb + vec3(%s), vec3(2.4));\n",
322              SH_FLOAT(a), SH_FLOAT(b));
323         return;
324     }
325     case PL_COLOR_TRC_GAMMA18:
326         GLSL("color.rgb = pow(color.rgb, vec3(1.8));\n");
327         return;
328     case PL_COLOR_TRC_GAMMA20:
329         GLSL("color.rgb = pow(color.rgb, vec3(2.0));\n");
330         return;
331     case PL_COLOR_TRC_UNKNOWN:
332     case PL_COLOR_TRC_GAMMA22:
333         GLSL("color.rgb = pow(color.rgb, vec3(2.2));\n");
334         return;
335     case PL_COLOR_TRC_GAMMA24:
336         GLSL("color.rgb = pow(color.rgb, vec3(2.4));\n");
337         return;
338     case PL_COLOR_TRC_GAMMA26:
339         GLSL("color.rgb = pow(color.rgb, vec3(2.6));\n");
340         return;
341     case PL_COLOR_TRC_GAMMA28:
342         GLSL("color.rgb = pow(color.rgb, vec3(2.8));\n");
343         return;
344     case PL_COLOR_TRC_PRO_PHOTO:
345         GLSL("color.rgb = mix(color.rgb * vec3(1.0/16.0),              \n"
346              "                pow(color.rgb, vec3(1.8)),               \n"
347              "                %s(lessThan(vec3(0.03125), color.rgb))); \n",
348              sh_bvec(sh, 3));
349         return;
350     case PL_COLOR_TRC_PQ:
351         GLSL("color.rgb = pow(color.rgb, vec3(1.0/%f));         \n"
352              "color.rgb = max(color.rgb - vec3(%f), 0.0)        \n"
353              "             / (vec3(%f) - vec3(%f) * color.rgb); \n"
354              "color.rgb = pow(color.rgb, vec3(1.0/%f));         \n"
355              // PQ's output range is 0-10000, but we need it to be relative to
356              // to PL_COLOR_SDR_WHITE instead, so rescale
357              "color.rgb *= vec3(%f);                            \n",
358              PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE);
359         return;
360     case PL_COLOR_TRC_HLG:
361         GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,         \n"
362              "                exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
363              "                    + vec3(%f),                            \n"
364              "                %s(lessThan(vec3(0.5), color.rgb)));       \n"
365              // Rescale from 0-12 to be relative to PL_COLOR_SDR_WHITE_HLG
366              "color.rgb *= vec3(1.0/%f);                                 \n",
367              HLG_C, HLG_A, HLG_B, sh_bvec(sh, 3), PL_COLOR_SDR_WHITE_HLG);
368         return;
369     case PL_COLOR_TRC_V_LOG:
370         GLSL("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
371              "    pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
372              "              - vec3(%f),                                  \n"
373              "    %s(lessThanEqual(vec3(0.181), color.rgb)));            \n",
374              VLOG_D, VLOG_C, VLOG_B, sh_bvec(sh, 3));
375         return;
376     case PL_COLOR_TRC_S_LOG1:
377         GLSL("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
378              "            - vec3(%f);                                            \n",
379              SLOG_C, SLOG_A, SLOG_B);
380         return;
381     case PL_COLOR_TRC_S_LOG2:
382         GLSL("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f),      \n"
383              "    (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
384              "              - vec3(%f)) * vec3(1.0/%f),                   \n"
385              "    %s(lessThanEqual(vec3(%f), color.rgb)));                \n",
386              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, sh_bvec(sh, 3),
387              SLOG_Q);
388         return;
389     case PL_COLOR_TRC_LINEAR:
390     case PL_COLOR_TRC_COUNT:
391         break;
392     }
393 
394     pl_unreachable();
395 }
396 
pl_shader_delinearize(pl_shader sh,struct pl_color_space csp)397 void pl_shader_delinearize(pl_shader sh, struct pl_color_space csp)
398 {
399     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
400         return;
401 
402     if (csp.transfer == PL_COLOR_TRC_LINEAR)
403         return;
404 
405     GLSL("// pl_shader_delinearize         \n"
406          "color.rgb = max(color.rgb, 0.0); \n");
407 
408     switch (csp.transfer) {
409     case PL_COLOR_TRC_SRGB:
410         GLSL("color.rgb = mix(color.rgb * vec3(12.92),                        \n"
411              "                vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))     \n"
412              "                    - vec3(0.055),                              \n"
413              "                %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n",
414              sh_bvec(sh, 3));
415         return;
416     case PL_COLOR_TRC_BT_1886: {
417         float binv = powf(PL_CLAMP(csp.sig_floor, 0.0, 1.0), 1.0/2.4);
418         float a = powf(1.0 - binv, 2.4);
419         float b = binv / (1.0 - binv);
420         GLSL("color.rgb = pow(vec3(%s) * color.rgb, vec3(1.0/2.4)) - vec3(%s);\n",
421              SH_FLOAT(1.0 / a), SH_FLOAT(b));
422         return;
423     }
424     case PL_COLOR_TRC_GAMMA18:
425         GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.8));\n");
426         return;
427     case PL_COLOR_TRC_GAMMA20:
428         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.0));\n");
429         return;
430     case PL_COLOR_TRC_UNKNOWN:
431     case PL_COLOR_TRC_GAMMA22:
432         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.2));\n");
433         return;
434     case PL_COLOR_TRC_GAMMA24:
435         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));\n");
436         return;
437     case PL_COLOR_TRC_GAMMA26:
438         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6));\n");
439         return;
440     case PL_COLOR_TRC_GAMMA28:
441         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.8));\n");
442         return;
443     case PL_COLOR_TRC_PRO_PHOTO:
444         GLSL("color.rgb = mix(color.rgb * vec3(16.0),                        \n"
445              "                pow(color.rgb, vec3(1.0/1.8)),                 \n"
446              "                %s(lessThanEqual(vec3(0.001953), color.rgb))); \n",
447              sh_bvec(sh, 3));
448         return;
449     case PL_COLOR_TRC_PQ:
450         GLSL("color.rgb *= vec3(1.0/%f);                         \n"
451              "color.rgb = pow(color.rgb, vec3(%f));              \n"
452              "color.rgb = (vec3(%f) + vec3(%f) * color.rgb)      \n"
453              "             / (vec3(1.0) + vec3(%f) * color.rgb); \n"
454              "color.rgb = pow(color.rgb, vec3(%f));              \n",
455              10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
456         return;
457     case PL_COLOR_TRC_HLG:
458         GLSL("color.rgb *= vec3(%f);                                           \n"
459              "color.rgb = mix(vec3(0.5) * sqrt(color.rgb),                     \n"
460              "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n"
461              "                %s(lessThan(vec3(1.0), color.rgb)));             \n",
462              PL_COLOR_SDR_WHITE_HLG, HLG_A, HLG_B, HLG_C, sh_bvec(sh, 3));
463         return;
464     case PL_COLOR_TRC_V_LOG:
465         GLSL("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),       \n"
466              "                vec3(%f) * log(color.rgb + vec3(%f))       \n"
467              "                    + vec3(%f),                            \n"
468              "                %s(lessThanEqual(vec3(0.01), color.rgb))); \n",
469              VLOG_C / M_LN10, VLOG_B, VLOG_D, sh_bvec(sh, 3));
470         return;
471     case PL_COLOR_TRC_S_LOG1:
472         GLSL("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
473              SLOG_A / M_LN10, SLOG_B, SLOG_C);
474         return;
475     case PL_COLOR_TRC_S_LOG2:
476         GLSL("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f),                \n"
477              "                vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
478              "                    + vec3(%f),                                 \n"
479              "                %s(lessThanEqual(vec3(0.0), color.rgb)));       \n",
480              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C,
481              sh_bvec(sh, 3));
482         return;
483     case PL_COLOR_TRC_LINEAR:
484     case PL_COLOR_TRC_COUNT:
485         break;
486     }
487 
488     pl_unreachable();
489 }
490 
491 const struct pl_sigmoid_params pl_sigmoid_default_params = {
492     .center = 0.75,
493     .slope  = 6.50,
494 };
495 
pl_shader_sigmoidize(pl_shader sh,const struct pl_sigmoid_params * params)496 void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params)
497 {
498     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
499         return;
500 
501     params = PL_DEF(params, &pl_sigmoid_default_params);
502     float center = PL_DEF(params->center, 0.75);
503     float slope  = PL_DEF(params->slope, 6.5);
504 
505     // This function needs to go through (0,0) and (1,1), so we compute the
506     // values at 1 and 0, and then scale/shift them, respectively.
507     float offset = 1.0 / (1 + expf(slope * center));
508     float scale  = 1.0 / (1 + expf(slope * (center - 1))) - offset;
509 
510     GLSL("// pl_shader_sigmoidize                                          \n"
511          "color = clamp(color, 0.0, 1.0);                                  \n"
512          "color = vec4(%s) - log(vec4(1.0) / (color * vec4(%s) + vec4(%s)) \n"
513          "                         - vec4(1.0)) * vec4(%s);                \n",
514          SH_FLOAT(center), SH_FLOAT(scale), SH_FLOAT(offset), SH_FLOAT(1.0 / slope));
515 }
516 
pl_shader_unsigmoidize(pl_shader sh,const struct pl_sigmoid_params * params)517 void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params)
518 {
519     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
520         return;
521 
522     // See: pl_shader_sigmoidize
523     params = PL_DEF(params, &pl_sigmoid_default_params);
524     float center = PL_DEF(params->center, 0.75);
525     float slope  = PL_DEF(params->slope, 6.5);
526     float offset = 1.0 / (1 + expf(slope * center));
527     float scale  = 1.0 / (1 + expf(slope * (center - 1))) - offset;
528 
529     GLSL("// pl_shader_unsigmoidize                                           \n"
530          "color = clamp(color, 0.0, 1.0);                                     \n"
531          "color = vec4(%s) / (vec4(1.0) + exp(vec4(%s) * (vec4(%s) - color))) \n"
532          "           - vec4(%s);                                              \n",
533          SH_FLOAT(1.0 / scale), SH_FLOAT(slope), SH_FLOAT(center), SH_FLOAT(offset / scale));
534 }
535 
sh_luma_coeffs(pl_shader sh,enum pl_color_primaries prim)536 static ident_t sh_luma_coeffs(pl_shader sh, enum pl_color_primaries prim)
537 {
538     struct pl_matrix3x3 rgb2xyz;
539     rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(prim));
540 
541     // FIXME: Cannot use `const vec3` due to glslang bug #2025
542     ident_t coeffs = sh_fresh(sh, "luma_coeffs");
543     GLSLH("#define %s vec3(%s, %s, %s) \n", coeffs,
544           SH_FLOAT(rgb2xyz.m[1][0]), // RGB->Y vector
545           SH_FLOAT(rgb2xyz.m[1][1]),
546           SH_FLOAT(rgb2xyz.m[1][2]));
547     return coeffs;
548 }
549 
550 // Applies the OOTF / inverse OOTF - including the sig_scale adaptation
pl_shader_ootf(pl_shader sh,struct pl_color_space csp)551 static void pl_shader_ootf(pl_shader sh, struct pl_color_space csp)
552 {
553     if (csp.sig_scale != 1.0)
554         GLSL("color.rgb *= vec3(%s); \n", SH_FLOAT(csp.sig_scale));
555 
556     if (!csp.light || csp.light == PL_COLOR_LIGHT_DISPLAY)
557         return;
558 
559     GLSL("// pl_shader_ootf                \n"
560          "color.rgb = max(color.rgb, 0.0); \n");
561 
562     switch (csp.light)
563     {
564     case PL_COLOR_LIGHT_SCENE_HLG: {
565         // HLG OOTF from BT.2100, tuned to the indicated peak
566         float peak = csp.sig_peak * PL_COLOR_SDR_WHITE;
567         float gamma = 1.2 + 0.42 * log10(peak / 1000.0);
568         gamma = PL_MAX(gamma, 1.0);
569         GLSL("color.rgb *= vec3(%s * pow(dot(%s, color.rgb), %s));\n",
570              SH_FLOAT(csp.sig_peak / pow(12.0 / PL_COLOR_SDR_WHITE_HLG, gamma)),
571              sh_luma_coeffs(sh, csp.primaries),
572              SH_FLOAT(gamma - 1.0));
573         return;
574     }
575     case PL_COLOR_LIGHT_SCENE_709_1886:
576         // This OOTF is defined by encoding the result as 709 and then decoding
577         // it as 1886; although this is called 709_1886 we actually use the
578         // more precise (by one decimal) values from BT.2020 instead
579         GLSL("color.rgb = mix(color.rgb * vec3(4.5),                    \n"
580              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) \n"
581              "                             - vec3(0.0993),              \n"
582              "                %s(lessThan(vec3(0.0181), color.rgb)));   \n"
583              "color.rgb = pow(color.rgb, vec3(2.4));                    \n",
584              sh_bvec(sh, 3));
585         return;
586     case PL_COLOR_LIGHT_SCENE_1_2:
587         GLSL("color.rgb = pow(color.rgb, vec3(1.2));\n");
588         return;
589     case PL_COLOR_LIGHT_UNKNOWN:
590     case PL_COLOR_LIGHT_DISPLAY:
591     case PL_COLOR_LIGHT_COUNT:
592         break;
593     }
594 
595     pl_unreachable();
596 }
597 
pl_shader_inverse_ootf(pl_shader sh,struct pl_color_space csp)598 static void pl_shader_inverse_ootf(pl_shader sh, struct pl_color_space csp)
599 {
600     if (!csp.light || csp.light == PL_COLOR_LIGHT_DISPLAY)
601         goto done;
602 
603     GLSL("// pl_shader_inverse_ootf        \n"
604          "color.rgb = max(color.rgb, 0.0); \n");
605 
606     switch (csp.light)
607     {
608     case PL_COLOR_LIGHT_SCENE_HLG: {
609         float peak = csp.sig_peak * PL_COLOR_SDR_WHITE;
610         float gamma = 1.2 + 0.42 * log10(peak / 1000.0);
611         gamma = PL_MAX(gamma, 1.0);
612         GLSL("color.rgb *= vec3(1.0/%s);                                \n"
613              "color.rgb /= vec3(max(1e-6, pow(dot(%s, color.rgb),       \n"
614              "                                %s)));                    \n",
615              SH_FLOAT(csp.sig_peak / pow(12.0 / PL_COLOR_SDR_WHITE_HLG, gamma)),
616              sh_luma_coeffs(sh, csp.primaries),
617              SH_FLOAT((gamma - 1.0) / gamma));
618         goto done;
619     }
620     case PL_COLOR_LIGHT_SCENE_709_1886:
621         GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));                         \n"
622              "color.rgb = mix(color.rgb * vec3(1.0/4.5),                         \n"
623              "                pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n"
624              "                    vec3(1/0.45)),                                 \n"
625              "                %s(lessThan(vec3(0.08145), color.rgb)));           \n",
626              sh_bvec(sh, 3));
627         goto done;
628     case PL_COLOR_LIGHT_SCENE_1_2:
629         GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.2));\n");
630         goto done;
631     case PL_COLOR_LIGHT_UNKNOWN:
632     case PL_COLOR_LIGHT_DISPLAY:
633     case PL_COLOR_LIGHT_COUNT:
634         break;
635     }
636 
637     pl_unreachable();
638 
639 done:
640     if (csp.sig_scale != 1.0)
641         GLSL("color.rgb *= vec3(1.0 / %s); \n", SH_FLOAT(csp.sig_scale));
642 }
643 
644 const struct pl_peak_detect_params pl_peak_detect_default_params = {
645     .smoothing_period       = 100.0,
646     .scene_threshold_low    = 5.5,
647     .scene_threshold_high   = 10.0,
648     .overshoot_margin       = 0.05,
649     .minimum_peak           = 1.0,
650 };
651 
652 struct sh_peak_obj {
653     pl_buf buf;
654     struct pl_shader_desc desc;
655     float margin;
656 };
657 
sh_peak_uninit(pl_gpu gpu,void * ptr)658 static void sh_peak_uninit(pl_gpu gpu, void *ptr)
659 {
660     struct sh_peak_obj *obj = ptr;
661     pl_buf_destroy(gpu, &obj->buf);
662     *obj = (struct sh_peak_obj) {0};
663 }
664 
iir_coeff(float rate)665 static inline float iir_coeff(float rate)
666 {
667     float a = 1.0 - cos(1.0 / rate);
668     return sqrt(a*a + 2*a) - a;
669 }
670 
pl_shader_detect_peak(pl_shader sh,struct pl_color_space csp,pl_shader_obj * state,const struct pl_peak_detect_params * params)671 bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp,
672                            pl_shader_obj *state,
673                            const struct pl_peak_detect_params *params)
674 {
675     params = PL_DEF(params, &pl_peak_detect_default_params);
676     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
677         return false;
678 
679     if (!sh_try_compute(sh, 8, 8, true, 2 * sizeof(int32_t))) {
680         PL_ERR(sh, "HDR peak detection requires compute shaders!");
681         return false;
682     }
683 
684     if (sh_glsl(sh).version < 130) {
685         // uint was added in GLSL 130
686         PL_ERR(sh, "HDR peak detection requires GLSL >= 130!");
687         return false;
688     }
689 
690     struct sh_peak_obj *obj;
691     obj = SH_OBJ(sh, state, PL_SHADER_OBJ_PEAK_DETECT, struct sh_peak_obj,
692                  sh_peak_uninit);
693     if (!obj)
694         return false;
695 
696     pl_gpu gpu = SH_GPU(sh);
697     obj->margin = params->overshoot_margin;
698 
699     if (!obj->buf) {
700         obj->desc = (struct pl_shader_desc) {
701             .desc = {
702                 .name   = "PeakDetect",
703                 .type   = PL_DESC_BUF_STORAGE,
704             },
705         };
706 
707         // Note: Don't change this order, `vec2 average` being the first
708         // element is hard-coded in `pl_get_detected_peak`
709         bool ok = true;
710         ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_vec2("average"));
711         ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_sum"));
712         ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_max"));
713         ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_uint("counter"));
714 
715         if (!ok) {
716             PL_ERR(sh, "HDR peak detection exhausts device limits!");
717             return false;
718         }
719 
720         // Create the SSBO
721         size_t size = sh_buf_desc_size(&obj->desc);
722         static const uint8_t zero[32] = {0};
723         pl_assert(sizeof(zero) >= size);
724         struct pl_buf_params buf_params = {
725             .size = size,
726             .host_readable = true,
727             .memory_type = PL_BUF_MEM_DEVICE,
728             .storable = true,
729             .initial_data = zero,
730         };
731 
732         // Attempt creating host-readable SSBO first, suppress errors
733         pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
734         obj->buf = pl_buf_create(gpu, &buf_params);
735         pl_log_level_cap(gpu->log, PL_LOG_NONE);
736 
737         if (!obj->buf) {
738             // Fall back to non-host-readable SSBO
739             buf_params.host_readable = false;
740             obj->buf = pl_buf_create(gpu, &buf_params);
741         }
742 
743         obj->desc.binding.object = obj->buf;
744     }
745 
746     if (!obj->buf) {
747         SH_FAIL(sh, "Failed creating peak detection SSBO!");
748         return false;
749     }
750 
751     // Attach the SSBO and perform the peak detection logic
752     obj->desc.desc.access = PL_DESC_ACCESS_READWRITE;
753     obj->desc.memory = PL_MEMORY_COHERENT;
754     sh_desc(sh, obj->desc);
755 
756     sh_describe(sh, "peak detection");
757     GLSL("// pl_shader_detect_peak \n"
758          "{                        \n"
759          "vec4 color_orig = color; \n");
760 
761     // Decode the color into linear light absolute scale representation
762     pl_color_space_infer(&csp);
763     pl_shader_linearize(sh, csp);
764     pl_shader_ootf(sh, csp);
765 
766     // For performance, we want to do as few atomic operations on global
767     // memory as possible, so use an atomic in shmem for the work group.
768     ident_t wg_sum = sh_fresh(sh, "wg_sum"), wg_max = sh_fresh(sh, "wg_max");
769     GLSLH("shared int %s;   \n", wg_sum);
770     GLSLH("shared int %s;   \n", wg_max);
771     GLSL("%s = 0; %s = 0;   \n"
772          "barrier();        \n",
773          wg_sum, wg_max);
774 
775     // Chosen to avoid overflowing on an 8K buffer
776     const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
777 
778     GLSL("float sig_max = max(max(color.r, color.g), color.b);  \n"
779          "float sig_log = log(max(sig_max, %f));                \n"
780          "int isig_max = int(sig_max * %f);                     \n"
781          "int isig_log = int(sig_log * %f);                     \n",
782          log_min, sig_scale, log_scale);
783 
784     // Update the work group's shared atomics
785     if (sh_glsl(sh).subgroup_size) {
786         GLSL("int group_max = subgroupMax(isig_max);    \n"
787              "int group_sum = subgroupAdd(isig_log);    \n"
788              "if (subgroupElect()) {                    \n"
789              "    atomicMax(%s, group_max);             \n"
790              "    atomicAdd(%s, group_sum);             \n"
791              "}                                         \n"
792              "barrier();                                \n",
793              wg_max, wg_sum);
794     } else {
795         GLSL("atomicMax(%s, isig_max);  \n"
796              "atomicAdd(%s, isig_log);  \n"
797              "barrier();                \n",
798              wg_max, wg_sum);
799     }
800 
801     GLSL("color = color_orig;   \n"
802          "}                     \n");
803 
804     // Have one thread per work group update the global atomics. Do this
805     // at the end of the shader to avoid clobbering `average`, in case the
806     // state object will be used by the same pass.
807     GLSLF("// pl_shader_detect_peak                                             \n"
808           "if (gl_LocalInvocationIndex == 0u) {                                 \n"
809           "    int wg_avg = %s / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);  \n"
810           "    atomicAdd(frame_sum, wg_avg);                                    \n"
811           "    atomicMax(frame_max, %s);                                        \n"
812           "    memoryBarrierBuffer();                                           \n"
813           "}                                                                    \n"
814           "barrier();                                                           \n",
815           wg_sum, wg_max);
816 
817     // Finally, to update the global state per dispatch, we increment a counter
818     GLSLF("if (gl_LocalInvocationIndex == 0u) {                                 \n"
819           "    uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;           \n"
820           "    if (atomicAdd(counter, 1u) == num_wg - 1u) {                     \n"
821           "        vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);\n"
822           "        cur *= vec2(1.0 / %f, 1.0 / %f);                             \n"
823           "        cur.x = exp(cur.x);                                          \n"
824           "        cur.y = max(cur.y, %s);                                      \n",
825           log_scale, sig_scale, SH_FLOAT(PL_DEF(params->minimum_peak, 1.0)));
826 
827     // Set the initial value accordingly if it contains no data
828     GLSLF("        if (average.y == 0.0) \n"
829           "            average = cur;    \n");
830 
831     // Use an IIR low-pass filter to smooth out the detected values
832     GLSLF("        average += %s * (cur - average); \n",
833           SH_FLOAT(iir_coeff(PL_DEF(params->smoothing_period, 100.0))));
834 
835     // Scene change hysteresis
836     float log_db = 10.0 / log(10.0);
837     if (params->scene_threshold_low > 0 && params->scene_threshold_high > 0) {
838         GLSLF("    float delta = abs(log(cur.x / average.x));               \n"
839               "    average = mix(average, cur, smoothstep(%s, %s, delta));  \n",
840               SH_FLOAT(params->scene_threshold_low / log_db),
841               SH_FLOAT(params->scene_threshold_high / log_db));
842     }
843 
844     // Reset SSBO state for the next frame
845     GLSLF("        frame_sum = 0;            \n"
846           "        frame_max = 0;            \n"
847           "        counter = 0u;             \n"
848           "        memoryBarrierBuffer();    \n"
849           "    }                             \n"
850           "}                                 \n");
851 
852     return true;
853 }
854 
pl_get_detected_peak(const pl_shader_obj state,float * out_peak,float * out_avg)855 bool pl_get_detected_peak(const pl_shader_obj state,
856                           float *out_peak, float *out_avg)
857 {
858     if (!state || state->type != PL_SHADER_OBJ_PEAK_DETECT)
859         return false;
860 
861     struct sh_peak_obj *obj = state->priv;
862     pl_gpu gpu = state->gpu;
863     pl_buf buf = obj->buf;
864     if (!buf)
865         return false;
866 
867     float average[2] = {0};
868     pl_assert(obj->buf->params.size >= sizeof(average));
869 
870     if (buf->params.host_readable) {
871 
872         // We can read directly from the SSBO
873         if (!pl_buf_read(gpu, buf, 0, average, sizeof(average))) {
874             PL_ERR(gpu, "Failed reading from peak detect state buffer");
875             return false;
876         }
877 
878     } else {
879 
880         // We can't read directly from the SSBO, go via an intermediary
881         pl_buf tmp = pl_buf_create(gpu, &(struct pl_buf_params) {
882             .size = sizeof(average),
883             .host_readable = true,
884         });
885 
886         if (!tmp) {
887             PL_ERR(gpu, "Failed creating buffer for SSBO read-back");
888             return false;
889         }
890 
891         pl_buf_copy(gpu, tmp, 0, buf, 0, sizeof(average));
892         if (!pl_buf_read(gpu, tmp, 0, average, sizeof(average))) {
893             PL_ERR(gpu, "Failed reading from SSBO read-back buffer");
894             pl_buf_destroy(gpu, &tmp);
895             return false;
896         }
897         pl_buf_destroy(gpu, &tmp);
898 
899     }
900 
901     *out_avg = average[0];
902     *out_peak = average[1];
903 
904     if (obj->margin > 0.0) {
905         *out_peak *= 1.0 + obj->margin;
906         *out_peak = PL_MIN(*out_peak, 10000 / PL_COLOR_SDR_WHITE);
907     }
908 
909     return true;
910 }
911 
pq_delinearize(float x)912 static inline float pq_delinearize(float x)
913 {
914     x *= PL_COLOR_SDR_WHITE / 10000.0;
915     x = powf(x, PQ_M1);
916     x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x);
917     x = pow(x, PQ_M2);
918     return x;
919 }
920 
921 const struct pl_color_map_params pl_color_map_default_params = {
922     .intent                 = PL_INTENT_RELATIVE_COLORIMETRIC,
923     .tone_mapping_algo      = PL_TONE_MAPPING_BT_2390,
924     .desaturation_strength  = 0.90,
925     .desaturation_exponent  = 0.20,
926     .desaturation_base      = 0.18,
927     .gamut_clipping         = true,
928 };
929 
pl_shader_tone_map(pl_shader sh,struct pl_color_space src,struct pl_color_space dst,bool need_peak,bool need_black,pl_shader_obj * peak_detect_state,const struct pl_color_map_params * params)930 static void pl_shader_tone_map(pl_shader sh, struct pl_color_space src,
931                                struct pl_color_space dst,
932                                bool need_peak, bool need_black,
933                                pl_shader_obj *peak_detect_state,
934                                const struct pl_color_map_params *params)
935 {
936     sh_describe(sh, "tone mapping");
937     GLSL("// pl_shader_tone_map \n"
938          "{                     \n"
939          "float sig_peak = %s;  \n"
940          "float sig_avg = %s;   \n",
941          SH_FLOAT(src.sig_peak * src.sig_scale),
942          SH_FLOAT(src.sig_avg * src.sig_scale));
943 
944     if (need_peak) {
945         // To prevent discoloration due to out-of-bounds clipping, we need to
946         // make sure to reduce the value range as far as necessary to keep the
947         // entire signal in range, so tone map based on the brightest
948         // component.
949         GLSL("int sig_idx = 0;                              \n"
950              "if (color[1] > color[sig_idx]) sig_idx = 1;   \n"
951              "if (color[2] > color[sig_idx]) sig_idx = 2;   \n");
952 
953         // Update the variables based on values from the peak detection buffer
954         if (peak_detect_state) {
955             struct sh_peak_obj *obj;
956             obj = SH_OBJ(sh, peak_detect_state, PL_SHADER_OBJ_PEAK_DETECT,
957                          struct sh_peak_obj, sh_peak_uninit);
958             if (obj && obj->buf) {
959                 obj->desc.desc.access = PL_DESC_ACCESS_READONLY;
960                 obj->desc.memory = 0;
961                 sh_desc(sh, obj->desc);
962                 GLSL("sig_avg  = average.x; \n"
963                      "sig_peak = average.y; \n");
964 
965                 // Allow a tiny bit of extra overshoot for the smoothed peak
966                 // values, clamped to the maximum reasonable range.
967                 if (obj->margin > 0.0) {
968                     GLSL("sig_peak = min(sig_peak * %s, %f); \n",
969                          SH_FLOAT(1.0 + obj->margin),
970                          10000 / PL_COLOR_SDR_WHITE);
971                 }
972             }
973         }
974     }
975 
976     // Rescale the input in order to bring it into a representation where the
977     // valid output range is [0.0, 1.0]. This is because (almost) all of the
978     // tone mapping algorithms are defined to map to this range.
979     bool need_norm = params->tone_mapping_algo != PL_TONE_MAPPING_BT_2390 &&
980                      params->tone_mapping_algo != PL_TONE_MAPPING_CLIP;
981     float dst_range = (dst.sig_peak - dst.sig_floor) * dst.sig_scale;
982     dst_range = PL_MAX(dst_range, 1.0);
983 
984     ident_t dst_range_c = sh_const_float(sh, "dst_range", dst_range);
985     ident_t src_floor_c = sh_const_float(sh, "src_floor", src.sig_floor * src.sig_scale);
986     ident_t dst_peak_c = sh_const_float(sh, "dst_peak", dst.sig_peak * dst.sig_scale);
987     ident_t dst_floor_c = sh_const_float(sh, "dst_floor", dst.sig_floor * dst.sig_scale);
988 
989     if (need_norm) {
990         GLSL("color.rgb = vec3(1.0/%s) * (color.rgb - vec3(%s)); \n"
991              "sig_peak = (sig_peak - %s) / %s;                   \n",
992              dst_range_c, src_floor_c, src_floor_c, dst_range_c);
993     }
994 
995     // Rename `color.rgb` to something shorter for conciseness, and also
996     // apply clipping to prevent the tone mapping functions from exploding
997     // for input values exceeding sig_peak
998     GLSL("vec3 sig = clamp(color.rgb, 0.0, sig_peak);   \n"
999          "vec3 sig_orig = color.rgb;                    \n");
1000 
1001     if (need_peak) {
1002         // Scale the signal to compensate for differences in the avg brightness
1003         GLSL("float slope = min(%s, %s / sig_avg); \n"
1004              "sig *= slope;                        \n"
1005              "sig_peak *= slope;                   \n",
1006              SH_FLOAT(PL_DEF(params->max_boost, 1.0)),
1007              sh_const_float(sh, "dst_avg", dst.sig_avg * dst.sig_scale));
1008     }
1009 
1010     float param = params->tone_mapping_param;
1011     switch (params->tone_mapping_algo) {
1012     case PL_TONE_MAPPING_CLIP:
1013         GLSL("sig = clamp(sig, %s, %s); \n", dst_floor_c, dst_peak_c);
1014         break;
1015 
1016     case PL_TONE_MAPPING_MOBIUS:
1017         // Mobius isn't well-defined for sig_peak <= 1.0, but the limit of
1018         // mobius as sig_peak -> 1.0 is a linear function, so we can just skip
1019         // tone-mapping in this case
1020         GLSL("if (sig_peak > 1.0 + 1e-6) {                                      \n"
1021              "    float j = %s;                                                 \n"
1022              // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
1023              // where M(x) = scale * (x+a)/(x+b)
1024              "    float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak); \n"
1025              "    float b = (j*j - 2.0*j*sig_peak + sig_peak) /                 \n"
1026              "              max(1e-6, sig_peak - 1.0);                          \n"
1027              "    float scale = (b*b + 2.0*b*j + j*j) / (b-a);                  \n"
1028              "    sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)),     \n"
1029              "              %s(greaterThan(sig, vec3(j))));                     \n"
1030              "}                                                                 \n",
1031              SH_FLOAT(PL_DEF(param, 0.3)),
1032              sh_bvec(sh, 3));
1033         break;
1034 
1035     case PL_TONE_MAPPING_REINHARD: {
1036         float contrast = PL_DEF(param, 0.5),
1037               offset = (1.0 - contrast) / contrast;
1038         ident_t offset_c = sh_const_float(sh, "offset", offset);
1039         GLSL("sig = sig / (sig + vec3(%s));             \n"
1040              "float scale = (sig_peak + %s) / sig_peak; \n"
1041              "sig *= scale;                             \n",
1042              offset_c, offset_c);
1043         break;
1044     }
1045 
1046     case PL_TONE_MAPPING_HABLE: {
1047         float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
1048         ident_t hable = sh_fresh(sh, "hable");
1049         GLSLH("vec3 %s(vec3 x) {                                \n"
1050               "    return (x * (%f*x + vec3(%f)) + vec3(%f)) /  \n"
1051               "           (x * (%f*x + vec3(%f)) + vec3(%f))    \n"
1052               "           - vec3(%f);                           \n"
1053               "}                                                \n",
1054               hable, A, C*B, D*E, A, B, D*F, E/F);
1055         GLSL("sig = %s(sig) / %s(vec3(sig_peak)).x;\n", hable, hable);
1056         break;
1057     }
1058 
1059     case PL_TONE_MAPPING_GAMMA:
1060         GLSL("const float cutoff = 0.05;                            \n"
1061              "float gamma = 1.0/%s;                                 \n"
1062              "float scale = pow(cutoff / sig_peak, gamma) / cutoff; \n"
1063              "sig = mix(scale * sig,                                \n"
1064              "          pow(sig / sig_peak, vec3(gamma)),           \n"
1065              "          %s(greaterThan(sig, vec3(cutoff))));        \n",
1066              SH_FLOAT(PL_DEF(param, 1.8)),
1067              sh_bvec(sh, 3));
1068         break;
1069 
1070     case PL_TONE_MAPPING_LINEAR:
1071         GLSL("sig *= min(%s / sig_peak, 1.0);\n", SH_FLOAT(PL_DEF(param, 1.0)));
1072         break;
1073 
1074     case PL_TONE_MAPPING_BT_2390: {
1075         // We first need to encode both sig and sig_peak into PQ space
1076         GLSL("vec4 sig_pq = vec4(sig.rgb, sig_peak);                            \n"
1077              "sig_pq *= vec4(1.0/%f);                                           \n"
1078              "sig_pq = pow(max(sig_pq, 0.0), vec4(%f));                         \n"
1079              "sig_pq = (vec4(%f) + vec4(%f) * sig_pq)                           \n"
1080              "          / (vec4(1.0) + vec4(%f) * sig_pq);                      \n"
1081              "sig_pq = pow(sig_pq, vec4(%f));                                   \n",
1082              10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
1083 
1084         // Normalize to be relative to the source brightness range
1085         float pqlb = pq_delinearize(src.sig_floor * src.sig_scale);
1086         ident_t pqlb_c = SH_FLOAT(pqlb);
1087         GLSL("float scale = 1.0 / (sig_pq.a - %s);                              \n"
1088              "sig = clamp(vec3(scale) * (sig_pq.rgb - vec3(%s)), 0.0, 1.0);     \n",
1089              pqlb_c, pqlb_c);
1090         if (need_peak) {
1091             // Apply piece-wise hermite spline
1092             GLSL("float maxLum = %s * scale;                                    \n"
1093                  "float ks = 1.5 * maxLum - 0.5;                                \n"
1094                  "vec3 tb = (sig - vec3(ks)) / vec3(1.0 - ks);                  \n"
1095                  "vec3 tb2 = tb * tb;                                           \n"
1096                  "vec3 tb3 = tb2 * tb;                                          \n"
1097                  "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) +    \n"
1098                  "          (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) +           \n"
1099                  "          (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum);            \n"
1100                  "sig = mix(sig, pb, %s(greaterThan(sig, vec3(ks))));           \n",
1101                  SH_FLOAT(pq_delinearize(dst.sig_peak * dst.sig_scale) - pqlb),
1102                  sh_bvec(sh, 3));
1103         }
1104         if (need_black) {
1105             // Apply black point adaptation
1106             GLSL("float minLum = %s * scale;                                    \n"
1107                  "float p = 4.0;                                                \n"
1108                  "if (minLum >= 0.0)                                            \n"
1109                  "    p = min(1.0 / minLum, 4.0);                               \n"
1110                  "vec3 boost = vec3(minLum) * pow(vec3(1.0) - sig, vec3(p));    \n"
1111                  "vec3 sig_lift = sig + boost;                                  \n",
1112                  SH_FLOAT(pq_delinearize(dst.sig_floor * dst.sig_scale) - pqlb));
1113             if (need_peak) {
1114                  GLSL("if (maxLum < 1.0) {                                      \n"
1115                       "sig_lift -= vec3(minLum);                                \n"
1116                       "sig_lift /= 1.0 + minLum / maxLum * pow(1.0 - maxLum, p);\n"
1117                       "sig_lift += vec3(minLum);                                \n"
1118                       "}                                                        \n");
1119             }
1120             GLSL("sig = mix(sig, sig_lift, %s(lessThan(sig, vec3(1.0))));       \n",
1121                  sh_bvec(sh, 3));
1122         }
1123         // Convert back from normalized PQ space to linear light
1124         GLSL("sig = vec3(sig_pq.a - %s) * sig + vec3(%s);                       \n"
1125              "sig = pow(max(sig, 0.0), vec3(1.0/%f));                           \n"
1126              "sig = max(sig - vec3(%f), 0.0) /                                  \n"
1127              "          (vec3(%f) - vec3(%f) * sig);                            \n"
1128              "sig = pow(sig, vec3(1.0/%f));                                     \n"
1129              "sig *= vec3(%f);                                                  \n",
1130              pqlb_c, pqlb_c,
1131              PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE);
1132         break;
1133     }
1134 
1135     case PL_TONE_MAPPING_ALGORITHM_COUNT:
1136         pl_unreachable();
1137     }
1138 
1139     if (need_peak) {
1140         GLSL("float orig = max(sig_orig[sig_idx], 1e-10);       \n"
1141              "vec3 sig_lin = sig_orig * sig[sig_idx] / orig;    \n");
1142 
1143         // Mix between the per-channel tone mapped `sig` and the linear tone
1144         // mapped `sig_lin` based on the desaturation strength
1145         if (params->desaturation_strength > 0.0) {
1146             float range = need_norm ? 1.0 : dst_range;
1147             GLSL("float coeff = max(sig[sig_idx] - %s, 1e-6) /  \n"
1148                  "              max(sig[sig_idx], 1.0);         \n"
1149                  "coeff = %s * pow(coeff / %s, %s);             \n"
1150                  "color.rgb = mix(sig_lin, sig, coeff);         \n",
1151                  SH_FLOAT(params->desaturation_base / range),
1152                  SH_FLOAT(params->desaturation_strength), SH_FLOAT(range),
1153                  SH_FLOAT(params->desaturation_exponent));
1154         } else {
1155             GLSL("color.rgb = sig_lin; \n");
1156         }
1157     } else {
1158         // Always use the per-channel tone mapping for black point adaptation
1159         GLSL("color.rgb = sig; \n");
1160     }
1161 
1162     // Undo the normalization by `dst_peak`
1163     if (need_norm) {
1164         GLSL("color.rgb = vec3(%s) * color.rgb + vec3(%s); \n",
1165              dst_range_c, dst_floor_c);
1166     }
1167 
1168     GLSL("} \n");
1169 }
1170 
pl_shader_color_map(pl_shader sh,const struct pl_color_map_params * params,struct pl_color_space src,struct pl_color_space dst,pl_shader_obj * peak_detect_state,bool prelinearized)1171 void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params,
1172                          struct pl_color_space src, struct pl_color_space dst,
1173                          pl_shader_obj *peak_detect_state,
1174                          bool prelinearized)
1175 {
1176     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1177         return;
1178 
1179     GLSL("// pl_shader_color_map\n");
1180     GLSL("{\n");
1181     params = PL_DEF(params, &pl_color_map_default_params);
1182 
1183     pl_color_space_infer(&src);
1184     pl_color_space_infer_ref(&dst, &src);
1185 
1186     if (!pl_color_space_equal(&src, &dst))
1187         sh_describe(sh, "colorspace conversion");
1188 
1189     // All operations from here on require linear light as a starting point,
1190     // so we linearize even if src.transfer == dst.transfer when one of the other
1191     // operations needs it
1192     bool need_linear = src.transfer != dst.transfer ||
1193                        src.primaries != dst.primaries ||
1194                        src.sig_peak > dst.sig_peak ||
1195                        src.sig_avg != dst.sig_avg ||
1196                        src.sig_floor != dst.sig_floor ||
1197                        src.sig_scale != dst.sig_scale ||
1198                        src.light != dst.light;
1199     bool need_gamut_warn = false;
1200     bool is_linear = prelinearized;
1201     if (need_linear && !is_linear) {
1202         pl_shader_linearize(sh, src);
1203         is_linear = true;
1204     }
1205 
1206     if (need_linear)
1207         pl_shader_ootf(sh, src);
1208 
1209     // Tone map to rescale the signal average/peak/black if needed
1210     bool need_peak = src.sig_peak * src.sig_scale >
1211                      dst.sig_peak * dst.sig_scale + 1e-6;
1212     bool need_black = fabs(src.sig_floor * src.sig_scale -
1213                            dst.sig_floor * dst.sig_scale) > 1e-6;
1214 
1215 
1216     if (need_peak || need_black) {
1217         pl_shader_tone_map(sh, src, dst, need_peak, need_black,
1218                            peak_detect_state, params);
1219         need_gamut_warn = true;
1220     }
1221 
1222     // Adapt to the right colorspace (primaries) if necessary
1223     if (src.primaries != dst.primaries) {
1224         const struct pl_raw_primaries *csp_src, *csp_dst;
1225         csp_src = pl_raw_primaries_get(src.primaries),
1226         csp_dst = pl_raw_primaries_get(dst.primaries);
1227         struct pl_matrix3x3 cms_mat;
1228         cms_mat = pl_get_color_mapping_matrix(csp_src, csp_dst, params->intent);
1229 
1230         GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) {
1231             .var = pl_var_mat3("cms_matrix"),
1232             .data = PL_TRANSPOSE_3X3(cms_mat.m),
1233         }));
1234 
1235         if (!pl_primaries_superset(csp_dst, csp_src)) {
1236             if (params->gamut_clipping) {
1237                 ident_t dst_min = SH_FLOAT(dst.sig_floor * dst.sig_scale);
1238                 ident_t dst_max = SH_FLOAT(dst.sig_peak * dst.sig_scale);
1239 
1240                 GLSL("float cmin = min(min(color.r, color.g), color.b);         \n"
1241                      "if (cmin < %s) {                                          \n"
1242                      "    float luma = dot(%s, color.rgb);                      \n"
1243                      "    float coeff = (%s - cmin) / (luma - cmin);            \n"
1244                      "    coeff = clamp(coeff, 0.0, 1.0);                       \n"
1245                      "    color.rgb = mix(color.rgb, vec3(luma), coeff);        \n"
1246                      "}                                                         \n"
1247                      "float cmax = 1.0/%s * max(max(color.r, color.g), color.b);\n"
1248                      "if (cmax > 1.0)                                           \n"
1249                      "    color.rgb /= cmax;                                    \n",
1250                      dst_min,
1251                      sh_luma_coeffs(sh, dst.primaries),
1252                      dst_min,
1253                      dst_max);
1254 
1255             } else {
1256                 need_gamut_warn = true;
1257             }
1258         }
1259     }
1260 
1261     // Warn for remaining out-of-gamut colors if enabled
1262     if (params->gamut_warning && need_gamut_warn) {
1263         ident_t dst_min = SH_FLOAT(dst.sig_floor * dst.sig_scale);
1264         ident_t dst_max = SH_FLOAT(dst.sig_peak * dst.sig_scale);
1265 
1266         GLSL("if (any(greaterThan(color.rgb, vec3(%s + 0.005))) ||\n"
1267              "    any(lessThan(color.rgb, vec3(%s - 0.005))))\n"
1268              "    color.rgb = vec3(1.0, 0.0, 1.0); // magenta\n",
1269              dst_max, dst_min);
1270     }
1271 
1272     if (need_linear)
1273         pl_shader_inverse_ootf(sh, dst);
1274 
1275     if (is_linear)
1276         pl_shader_delinearize(sh, dst);
1277 
1278     GLSL("}\n");
1279 }
1280 
pl_shader_cone_distort(pl_shader sh,struct pl_color_space csp,const struct pl_cone_params * params)1281 void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp,
1282                             const struct pl_cone_params *params)
1283 {
1284     if (!params || !params->cones)
1285         return;
1286 
1287     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1288         return;
1289 
1290     sh_describe(sh, "cone distortion");
1291     GLSL("// pl_shader_cone_distort\n");
1292     GLSL("{\n");
1293 
1294     pl_color_space_infer(&csp);
1295     pl_shader_linearize(sh, csp);
1296 
1297     struct pl_matrix3x3 cone_mat;
1298     cone_mat = pl_get_cone_matrix(params, pl_raw_primaries_get(csp.primaries));
1299     GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) {
1300         .var = pl_var_mat3("cone_mat"),
1301         .data = PL_TRANSPOSE_3X3(cone_mat.m),
1302     }));
1303 
1304     pl_shader_delinearize(sh, csp);
1305     GLSL("}\n");
1306 }
1307 
1308 struct sh_dither_obj {
1309     enum pl_dither_method method;
1310     pl_shader_obj lut;
1311 };
1312 
sh_dither_uninit(pl_gpu gpu,void * ptr)1313 static void sh_dither_uninit(pl_gpu gpu, void *ptr)
1314 {
1315     struct sh_dither_obj *obj = ptr;
1316     pl_shader_obj_destroy(&obj->lut);
1317     *obj = (struct sh_dither_obj) {0};
1318 }
1319 
fill_dither_matrix(void * data,const struct sh_lut_params * params)1320 static void fill_dither_matrix(void *data, const struct sh_lut_params *params)
1321 {
1322     pl_assert(params->width > 0 && params->height > 0 && params->comps == 1);
1323 
1324     const struct sh_dither_obj *obj = params->priv;
1325     switch (obj->method) {
1326     case PL_DITHER_ORDERED_LUT:
1327         pl_assert(params->width == params->height);
1328         pl_generate_bayer_matrix(data, params->width);
1329         return;
1330 
1331     case PL_DITHER_BLUE_NOISE:
1332         pl_assert(params->width == params->height);
1333         pl_generate_blue_noise(data, params->width);
1334         return;
1335 
1336     case PL_DITHER_ORDERED_FIXED:
1337     case PL_DITHER_WHITE_NOISE:
1338     case PL_DITHER_METHOD_COUNT:
1339         return;
1340     }
1341 
1342     pl_unreachable();
1343 }
1344 
dither_method_is_lut(enum pl_dither_method method)1345 static bool dither_method_is_lut(enum pl_dither_method method)
1346 {
1347     switch (method) {
1348     case PL_DITHER_BLUE_NOISE:
1349     case PL_DITHER_ORDERED_LUT:
1350         return true;
1351     case PL_DITHER_ORDERED_FIXED:
1352     case PL_DITHER_WHITE_NOISE:
1353         return false;
1354     case PL_DITHER_METHOD_COUNT:
1355         break;
1356     }
1357 
1358     pl_unreachable();
1359 }
1360 
pl_shader_dither(pl_shader sh,int new_depth,pl_shader_obj * dither_state,const struct pl_dither_params * params)1361 void pl_shader_dither(pl_shader sh, int new_depth,
1362                       pl_shader_obj *dither_state,
1363                       const struct pl_dither_params *params)
1364 {
1365     if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
1366         return;
1367 
1368     if (new_depth <= 0 || new_depth > 256) {
1369         PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth);
1370         return;
1371     }
1372 
1373     sh_describe(sh, "dithering");
1374     GLSL("// pl_shader_dither \n"
1375         "{                    \n"
1376         "float bias;          \n");
1377 
1378     params = PL_DEF(params, &pl_dither_default_params);
1379     if (params->lut_size < 0 || params->lut_size > 8) {
1380         SH_FAIL(sh, "Invalid `lut_size` specified: %d", params->lut_size);
1381         return;
1382     }
1383 
1384     enum pl_dither_method method = params->method;
1385     bool can_fixed = sh_glsl(sh).version >= 130;
1386     ident_t lut = NULL;
1387     int lut_size = 0;
1388 
1389     if (method == PL_DITHER_ORDERED_FIXED && !can_fixed) {
1390         PL_WARN(sh, "PL_DITHER_ORDERED_FIXED requires glsl version >= 130.."
1391                 " falling back.");
1392         goto fallback;
1393     }
1394 
1395     if (dither_method_is_lut(method)) {
1396         if (!dither_state) {
1397             PL_WARN(sh, "LUT-based dither method specified but no dither state "
1398                     "object given, falling back to non-LUT based methods.");
1399             goto fallback;
1400         }
1401 
1402         struct sh_dither_obj *obj;
1403         obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER,
1404                      struct sh_dither_obj, sh_dither_uninit);
1405         if (!obj)
1406             goto fallback;
1407 
1408         bool changed = obj->method != method;
1409         obj->method = method;
1410 
1411         lut_size = 1 << PL_DEF(params->lut_size, 6);
1412         lut = sh_lut(sh, &(struct sh_lut_params) {
1413             .object = &obj->lut,
1414             .type = PL_VAR_FLOAT,
1415             .width = lut_size,
1416             .height = lut_size,
1417             .comps = 1,
1418             .update = changed,
1419             .fill = fill_dither_matrix,
1420             .priv = obj,
1421         });
1422         if (!lut)
1423             goto fallback;
1424     }
1425 
1426     goto done;
1427 
1428 fallback:
1429     method = can_fixed ? PL_DITHER_ORDERED_FIXED : PL_DITHER_WHITE_NOISE;
1430     // fall through
1431 
1432 done: ;
1433 
1434     int size = 0;
1435     if (lut) {
1436         size = lut_size;
1437     } else if (method == PL_DITHER_ORDERED_FIXED) {
1438         size = 16; // hard-coded size
1439     }
1440 
1441     if (size) {
1442         // Transform the screen position to the cyclic range [0,1)
1443         GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/%s);\n", SH_INT(size));
1444 
1445         if (params->temporal) {
1446             int phase = SH_PARAMS(sh).index % 8;
1447             float r = phase * (M_PI / 2); // rotate
1448             float m = phase < 4 ? 1 : -1; // mirror
1449             float mat[2][2] = {
1450                 {cos(r),     -sin(r)    },
1451                 {sin(r) * m,  cos(r) * m},
1452             };
1453 
1454             ident_t rot = sh_var(sh, (struct pl_shader_var) {
1455                 .var  = pl_var_mat2("dither_rot"),
1456                 .data = &mat[0][0],
1457                 .dynamic = true,
1458             });
1459             GLSL("pos = fract(%s * pos + vec2(1.0));\n", rot);
1460         }
1461     }
1462 
1463     switch (method) {
1464     case PL_DITHER_WHITE_NOISE: {
1465         ident_t prng = sh_prng(sh, params->temporal, NULL);
1466         GLSL("bias = %s;\n", prng);
1467         break;
1468     }
1469 
1470     case PL_DITHER_ORDERED_FIXED:
1471         // Bitwise ordered dither using only 32-bit uints
1472         GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u;     \n"
1473              // Bitwise merge (morton number)
1474              "xy.x = xy.x ^ xy.y;                      \n"
1475              "xy = (xy | xy << 2) & uvec2(0x33333333); \n"
1476              "xy = (xy | xy << 1) & uvec2(0x55555555); \n"
1477              // Bitwise inversion
1478              "uint b = xy.x + (xy.y << 1);             \n"
1479              "b = (b * 0x0802u & 0x22110u) |           \n"
1480              "    (b * 0x8020u & 0x88440u);            \n"
1481              "b = 0x10101u * b;                        \n"
1482              "b = (b >> 16) & 0xFFu;                   \n"
1483              // Generate bias value
1484              "bias = float(b) * 1.0/256.0;             \n");
1485         break;
1486 
1487     case PL_DITHER_BLUE_NOISE:
1488     case PL_DITHER_ORDERED_LUT:
1489         pl_assert(lut);
1490         GLSL("bias = %s(ivec2(pos * %s));\n", lut, SH_FLOAT(lut_size));
1491         break;
1492 
1493     case PL_DITHER_METHOD_COUNT:
1494         pl_unreachable();
1495     }
1496 
1497     uint64_t scale = (1LLU << new_depth) - 1;
1498     GLSL("color = vec4(%llu.0) * color + vec4(bias); \n"
1499          "color = floor(color) * vec4(1.0 / %llu.0); \n"
1500          "}                                          \n",
1501          (long long unsigned) scale, (long long unsigned) scale);
1502 }
1503 
1504 const struct pl_dither_params pl_dither_default_params = {
1505     .method     = PL_DITHER_BLUE_NOISE,
1506     .lut_size   = 6,
1507     .temporal   = false, // commonly flickers on LCDs
1508 };
1509