1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <math.h>
19 
20 #include "common.h"
21 #include "filters.h"
22 #include "shaders.h"
23 #include "dispatch.h"
24 
25 struct cached_frame {
26     uint64_t signature;
27     uint64_t params_hash; // for detecting `pl_render_params` changes
28     struct pl_color_space color;
29     struct pl_icc_profile profile;
30     pl_tex tex;
31     bool evict; // for garbage collection
32 };
33 
34 struct sampler {
35     pl_shader_obj upscaler_state;
36     pl_shader_obj downscaler_state;
37 };
38 
39 struct osd_vertex {
40     float pos[2];
41     float coord[2];
42     float color[4];
43 };
44 
45 struct pl_renderer {
46     pl_gpu gpu;
47     pl_dispatch dp;
48     pl_log log;
49 
50     // Texture format to use for intermediate textures
51     pl_fmt fbofmt[5];
52 
53     // Cached feature checks (inverted)
54     bool disable_compute;       // disable the use of compute shaders
55     bool disable_sampling;      // disable use of advanced scalers
56     bool disable_debanding;     // disable the use of debanding shaders
57     bool disable_linear_hdr;    // disable linear scaling for HDR signals
58     bool disable_linear_sdr;    // disable linear scaling for SDR signals
59     bool disable_blending;      // disable blending for the target/fbofmt
60     bool disable_overlay;       // disable rendering overlays
61     bool disable_icc;           // disable usage of ICC profiles
62     bool disable_peak_detect;   // disable peak detection shader
63     bool disable_grain;         // disable AV1 grain code
64     bool disable_hooks;         // disable user hooks / custom shaders
65     bool disable_mixing;        // disable frame mixing
66 
67     // Shader resource objects and intermediate textures (FBOs)
68     pl_shader_obj peak_detect_state;
69     pl_shader_obj dither_state;
70     pl_shader_obj icc_state;
71     pl_shader_obj grain_state[4];
72     pl_shader_obj lut_state[3];
73     PL_ARRAY(pl_tex) fbos;
74     struct sampler sampler_main;
75     struct sampler samplers_src[4];
76     struct sampler samplers_dst[4];
77 
78     // Temporary storage for vertex/index data
79     PL_ARRAY(struct osd_vertex) osd_vertices;
80     PL_ARRAY(uint16_t) osd_indices;
81     struct pl_vertex_attrib osd_attribs[3];
82 
83     // Frame cache (for frame mixing / interpolation)
84     PL_ARRAY(struct cached_frame) frames;
85     PL_ARRAY(pl_tex) frame_fbos;
86 };
87 
88 enum {
89     // Index into `lut_state`
90     LUT_IMAGE,
91     LUT_TARGET,
92     LUT_PARAMS,
93 };
94 
find_fbo_format(pl_renderer rr)95 static void find_fbo_format(pl_renderer rr)
96 {
97     struct {
98         enum pl_fmt_type type;
99         int depth;
100         enum pl_fmt_caps caps;
101     } configs[] = {
102         // Prefer floating point formats first
103         {PL_FMT_FLOAT, 16, PL_FMT_CAP_LINEAR},
104         {PL_FMT_FLOAT, 16, PL_FMT_CAP_SAMPLEABLE},
105 
106         // Otherwise, fall back to unorm/snorm, preferring linearly sampleable
107         {PL_FMT_UNORM, 16, PL_FMT_CAP_LINEAR},
108         {PL_FMT_SNORM, 16, PL_FMT_CAP_LINEAR},
109         {PL_FMT_UNORM, 16, PL_FMT_CAP_SAMPLEABLE},
110         {PL_FMT_SNORM, 16, PL_FMT_CAP_SAMPLEABLE},
111 
112         // As a final fallback, allow 8-bit FBO formats (for UNORM only)
113         {PL_FMT_UNORM, 8, PL_FMT_CAP_LINEAR},
114         {PL_FMT_UNORM, 8, PL_FMT_CAP_SAMPLEABLE},
115     };
116 
117     pl_fmt fmt = NULL;
118     for (int i = 0; i < PL_ARRAY_SIZE(configs); i++) {
119         fmt = pl_find_fmt(rr->gpu, configs[i].type, 4, configs[i].depth, 0,
120                           configs[i].caps | PL_FMT_CAP_RENDERABLE);
121         if (fmt) {
122             rr->fbofmt[4] = fmt;
123 
124             // Probe the right variant for each number of channels, falling
125             // back to the next biggest format
126             for (int c = 1; c < 4; c++) {
127                 rr->fbofmt[c] = pl_find_fmt(rr->gpu, configs[i].type, c,
128                                             configs[i].depth, 0, fmt->caps);
129                 rr->fbofmt[c] = PL_DEF(rr->fbofmt[c], rr->fbofmt[c+1]);
130             }
131             break;
132         }
133     }
134 
135     if (!fmt) {
136         PL_WARN(rr, "Found no renderable FBO format! Most features disabled");
137         return;
138     }
139 
140     if (!(fmt->caps & PL_FMT_CAP_STORABLE)) {
141         PL_INFO(rr, "Found no storable FBO format; compute shaders disabled");
142         rr->disable_compute = true;
143     }
144 
145     if (fmt->type != PL_FMT_FLOAT) {
146         PL_INFO(rr, "Found no floating point FBO format; linear light "
147                 "processing disabled for HDR material");
148         rr->disable_linear_hdr = true;
149     }
150 
151     if (fmt->component_depth[0] < 16) {
152         PL_WARN(rr, "FBO format precision low (<16 bit); linear light "
153                 "processing disabled");
154         rr->disable_linear_sdr = true;
155     }
156 }
157 
pl_renderer_create(pl_log log,pl_gpu gpu)158 pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu)
159 {
160     pl_renderer rr = pl_alloc_ptr(NULL, rr);
161     *rr = (struct pl_renderer) {
162         .gpu  = gpu,
163         .log = log,
164         .dp  = pl_dispatch_create(log, gpu),
165         .osd_attribs = {
166             {
167                 .name = "pos",
168                 .offset = offsetof(struct osd_vertex, pos),
169                 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
170             }, {
171                 .name = "coord",
172                 .offset = offsetof(struct osd_vertex, coord),
173                 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
174             }, {
175                 .name = "osd_color",
176                 .offset = offsetof(struct osd_vertex, color),
177                 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 4),
178             }
179         },
180     };
181 
182     assert(rr->dp);
183     find_fbo_format(rr);
184     return rr;
185 }
186 
sampler_destroy(pl_renderer rr,struct sampler * sampler)187 static void sampler_destroy(pl_renderer rr, struct sampler *sampler)
188 {
189     pl_shader_obj_destroy(&sampler->upscaler_state);
190     pl_shader_obj_destroy(&sampler->downscaler_state);
191 }
192 
pl_renderer_destroy(pl_renderer * p_rr)193 void pl_renderer_destroy(pl_renderer *p_rr)
194 {
195     pl_renderer rr = *p_rr;
196     if (!rr)
197         return;
198 
199     // Free all intermediate FBOs
200     for (int i = 0; i < rr->fbos.num; i++)
201         pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]);
202     for (int i = 0; i < rr->frames.num; i++)
203         pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
204     for (int i = 0; i < rr->frame_fbos.num; i++)
205         pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]);
206 
207     // Free all shader resource objects
208     pl_shader_obj_destroy(&rr->peak_detect_state);
209     pl_shader_obj_destroy(&rr->dither_state);
210     pl_shader_obj_destroy(&rr->icc_state);
211     for (int i = 0; i < PL_ARRAY_SIZE(rr->lut_state); i++)
212         pl_shader_obj_destroy(&rr->lut_state[i]);
213     for (int i = 0; i < PL_ARRAY_SIZE(rr->grain_state); i++)
214         pl_shader_obj_destroy(&rr->grain_state[i]);
215 
216     // Free all samplers
217     sampler_destroy(rr, &rr->sampler_main);
218     for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_src); i++)
219         sampler_destroy(rr, &rr->samplers_src[i]);
220     for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_dst); i++)
221         sampler_destroy(rr, &rr->samplers_dst[i]);
222 
223     pl_dispatch_destroy(&rr->dp);
224     pl_free_ptr(p_rr);
225 }
226 
pl_renderer_save(pl_renderer rr,uint8_t * out_cache)227 size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache)
228 {
229     return pl_dispatch_save(rr->dp, out_cache);
230 }
231 
pl_renderer_load(pl_renderer rr,const uint8_t * cache)232 void pl_renderer_load(pl_renderer rr, const uint8_t *cache)
233 {
234     pl_dispatch_load(rr->dp, cache);
235 }
236 
pl_renderer_flush_cache(pl_renderer rr)237 void pl_renderer_flush_cache(pl_renderer rr)
238 {
239     for (int i = 0; i < rr->frames.num; i++)
240         pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
241     rr->frames.num = 0;
242 
243     pl_shader_obj_destroy(&rr->peak_detect_state);
244 }
245 
246 const struct pl_render_params pl_render_default_params = {
247     .upscaler           = &pl_filter_spline36,
248     .downscaler         = &pl_filter_mitchell,
249     .frame_mixer        = &pl_oversample_frame_mixer,
250     .lut_entries        = 64,
251     .polar_cutoff       = 0.001,
252 
253     .sigmoid_params     = &pl_sigmoid_default_params,
254     .peak_detect_params = &pl_peak_detect_default_params,
255     .color_map_params   = &pl_color_map_default_params,
256     .dither_params      = &pl_dither_default_params,
257 };
258 
259 const struct pl_render_params pl_render_high_quality_params = {
260     .upscaler           = &pl_filter_ewa_lanczos,
261     .downscaler         = &pl_filter_mitchell,
262     .frame_mixer        = &pl_oversample_frame_mixer,
263     .lut_entries        = 64,
264     .polar_cutoff       = 0.001,
265 
266     .deband_params      = &pl_deband_default_params,
267     .sigmoid_params     = &pl_sigmoid_default_params,
268     .peak_detect_params = &pl_peak_detect_default_params,
269     .color_map_params   = &pl_color_map_default_params,
270     .dither_params      = &pl_dither_default_params,
271 };
272 
273 // This is only used as a sentinel, to use the GLSL implementation
oversample(const struct pl_filter_function * k,double x)274 static double oversample(const struct pl_filter_function *k, double x)
275 {
276     pl_unreachable();
277 }
278 
279 static const struct pl_filter_function oversample_kernel = {
280     .weight = oversample,
281     .tunable = {true},
282     .params = {0.0},
283 };
284 
285 const struct pl_filter_config pl_filter_oversample = {
286     .kernel = &oversample_kernel,
287 };
288 
289 const struct pl_filter_preset pl_frame_mixers[] = {
290     { "none",           NULL,                       "No frame mixing" },
291     { "oversample",     &pl_filter_oversample,      "Oversample (AKA SmoothMotion)" },
292     { "mitchell_clamp", &pl_filter_mitchell_clamp,  "Cubic spline (clamped)" },
293     {0}
294 };
295 
296 const int pl_num_frame_mixers = PL_ARRAY_SIZE(pl_frame_mixers) - 1;
297 
298 const struct pl_filter_preset pl_scale_filters[] = {
299     {"none",                NULL,                   "Built-in sampling"},
300     {"oversample",          &pl_filter_oversample,  "Oversample (Aspect-preserving NN)"},
301     COMMON_FILTER_PRESETS,
302     {0}
303 };
304 
305 const int pl_num_scale_filters = PL_ARRAY_SIZE(pl_scale_filters) - 1;
306 
307 #define FBOFMT(n) (params->disable_fbos ? NULL : rr->fbofmt[n])
308 
309 // Represents a "in-flight" image, which is either a shader that's in the
310 // process of producing some sort of image, or a texture that needs to be
311 // sampled from
312 struct img {
313     // Effective texture size, always set
314     int w, h;
315 
316     // Recommended format (falls back to FBOFMT otherwise), only for shaders
317     pl_fmt fmt;
318 
319     // Exactly *one* of these two is set:
320     pl_shader sh;
321     pl_tex tex;
322 
323     // Current effective source area, will be sampled by the main scaler
324     struct pl_rect2df rect;
325 
326     // The current effective colorspace
327     struct pl_color_repr repr;
328     struct pl_color_space color;
329     int comps;
330 };
331 
332 // Plane 'type', ordered by incrementing priority
333 enum plane_type {
334     PLANE_INVALID = 0,
335     PLANE_ALPHA,
336     PLANE_CHROMA,
337     PLANE_LUMA,
338     PLANE_RGB,
339     PLANE_XYZ,
340 };
341 
342 struct pass_state {
343     void *tmp;
344     pl_renderer rr;
345     const struct pl_render_params *params;
346     struct pl_render_info info; // for info callback
347 
348     // Represents the "current" image which we're in the process of rendering.
349     // This is initially set by pass_read_image, and all of the subsequent
350     // rendering steps will mutate this in-place.
351     struct img img;
352 
353     // Represents the "reference rect". Canonically, this is functionally
354     // equivalent to `image.crop`, but both guaranteed to be valid, and also
355     // updates as the refplane evolves (e.g. due to user hook prescalers)
356     struct pl_rect2df ref_rect;
357 
358     // Integer version of `target.crop`. Semantically identical.
359     struct pl_rect2d dst_rect;
360 
361     // Cached copies of the `image` / `target` for this rendering pass,
362     // corrected to make sure all rects etc. are properly defaulted/inferred.
363     struct pl_frame image;
364     struct pl_frame target;
365 
366     // Some extra plane metadata, inferred from `planes`
367     enum plane_type src_type[4];
368     enum plane_type dst_type[4];
369     int src_ref, dst_ref; // index into `planes`
370 
371     // Metadata for `rr->fbos`
372     bool *fbos_used;
373 };
374 
info_callback(void * priv,const struct pl_dispatch_info * dinfo)375 static void info_callback(void *priv, const struct pl_dispatch_info *dinfo)
376 {
377     struct pass_state *pass = priv;
378     const struct pl_render_params *params = pass->params;
379     if (!params->info_callback)
380         return;
381 
382     pass->info.pass = dinfo;
383     params->info_callback(params->info_priv, &pass->info);
384     if (pass->info.stage == PL_RENDER_STAGE_FRAME)
385         pass->info.index++;
386 }
387 
get_fbo(struct pass_state * pass,int w,int h,pl_fmt fmt,int comps)388 static pl_tex get_fbo(struct pass_state *pass, int w, int h, pl_fmt fmt, int comps)
389 {
390     pl_renderer rr = pass->rr;
391     comps = PL_DEF(comps, 4);
392     fmt = PL_DEF(fmt, rr->fbofmt[comps]);
393     if (!fmt)
394         return NULL;
395 
396     struct pl_tex_params params = {
397         .w = w,
398         .h = h,
399         .format = fmt,
400         .sampleable = true,
401         .renderable = true,
402         .storable   = fmt->caps & PL_FMT_CAP_STORABLE,
403     };
404 
405     int best_idx = -1;
406     int best_diff = 0;
407 
408     // Find the best-fitting texture out of rr->fbos
409     for (int i = 0; i < rr->fbos.num; i++) {
410         if (pass->fbos_used[i])
411             continue;
412 
413         // Orthogonal distance, with penalty for format mismatches
414         int diff = abs(rr->fbos.elem[i]->params.w - w) +
415                    abs(rr->fbos.elem[i]->params.h - h) +
416                    ((rr->fbos.elem[i]->params.format != fmt) ? 1000 : 0);
417 
418         if (best_idx < 0 || diff < best_diff) {
419             best_idx = i;
420             best_diff = diff;
421         }
422     }
423 
424     // No texture found at all, add a new one
425     if (best_idx < 0) {
426         best_idx = rr->fbos.num;
427         PL_ARRAY_APPEND(rr, rr->fbos, NULL);
428         pl_grow(pass->tmp, &pass->fbos_used, rr->fbos.num * sizeof(bool));
429         pass->fbos_used[best_idx] = false;
430     }
431 
432     if (!pl_tex_recreate(rr->gpu, &rr->fbos.elem[best_idx], &params))
433         return NULL;
434 
435     pass->fbos_used[best_idx] = true;
436     return rr->fbos.elem[best_idx];
437 }
438 
439 // Forcibly convert an img to `tex`, dispatching where necessary
img_tex(struct pass_state * pass,struct img * img)440 static pl_tex img_tex(struct pass_state *pass, struct img *img)
441 {
442     if (img->tex) {
443         pl_assert(!img->sh);
444         return img->tex;
445     }
446 
447     pl_renderer rr = pass->rr;
448     pl_tex tex = get_fbo(pass, img->w, img->h, img->fmt, img->comps);
449     img->fmt = NULL;
450 
451     if (!tex) {
452         PL_ERR(rr, "Failed creating FBO texture! Disabling advanced rendering..");
453         memset(rr->fbofmt, 0, sizeof(rr->fbofmt));
454         pl_dispatch_abort(rr->dp, &img->sh);
455         return NULL;
456     }
457 
458     pl_assert(img->sh);
459     bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
460         .shader = &img->sh,
461         .target = tex,
462     });
463 
464     if (!ok) {
465         PL_ERR(rr, "Failed dispatching intermediate pass!");
466         img->sh = pl_dispatch_begin(rr->dp);
467         return NULL;
468     }
469 
470     img->tex = tex;
471     return img->tex;
472 }
473 
474 // Forcibly convert an img to `sh`, sampling where necessary
img_sh(struct pass_state * pass,struct img * img)475 static pl_shader img_sh(struct pass_state *pass, struct img *img)
476 {
477     if (img->sh) {
478         pl_assert(!img->tex);
479         return img->sh;
480     }
481 
482     pl_assert(img->tex);
483     img->sh = pl_dispatch_begin(pass->rr->dp);
484     pl_shader_sample_direct(img->sh, &(struct pl_sample_src) {
485         .tex = img->tex,
486     });
487 
488     img->tex = NULL;
489     return img->sh;
490 }
491 
492 enum sampler_type {
493     SAMPLER_DIRECT,  // pick based on texture caps
494     SAMPLER_NEAREST, // direct sampling, force nearest
495     SAMPLER_BICUBIC, // fast bicubic scaling
496     SAMPLER_COMPLEX, // complex custom filters
497     SAMPLER_OVERSAMPLE,
498 };
499 
500 enum sampler_dir {
501     SAMPLER_NOOP, // 1:1 scaling
502     SAMPLER_UP,   // upscaling
503     SAMPLER_DOWN, // downscaling
504 };
505 
506 struct sampler_info {
507     const struct pl_filter_config *config; // if applicable
508     enum sampler_type type;
509     enum sampler_dir dir;
510     enum sampler_dir dir_sep[2];
511 };
512 
sample_src_info(struct pass_state * pass,const struct pl_sample_src * src)513 static struct sampler_info sample_src_info(struct pass_state *pass,
514                                            const struct pl_sample_src *src)
515 {
516     const struct pl_render_params *params = pass->params;
517     struct sampler_info info = {0};
518     pl_renderer rr = pass->rr;
519 
520     float rx = src->new_w / fabs(pl_rect_w(src->rect));
521     if (rx < 1.0 - 1e-6) {
522         info.dir_sep[0] = SAMPLER_DOWN;
523     } else if (rx > 1.0 + 1e-6) {
524         info.dir_sep[0] = SAMPLER_UP;
525     }
526 
527     float ry = src->new_h / fabs(pl_rect_h(src->rect));
528     if (ry < 1.0 - 1e-6) {
529         info.dir_sep[1] = SAMPLER_DOWN;
530     } else if (ry > 1.0 + 1e-6) {
531         info.dir_sep[1] = SAMPLER_UP;
532     }
533 
534     // We use PL_MAX so downscaling overrides upscaling when choosing scalers
535     info.dir = PL_MAX(info.dir_sep[0], info.dir_sep[1]);
536     switch (info.dir) {
537     case SAMPLER_DOWN:
538         info.config = params->downscaler;
539         break;
540     case SAMPLER_UP:
541         info.config = params->upscaler;
542         break;
543     case SAMPLER_NOOP:
544         info.type = SAMPLER_NEAREST;
545         return info;
546     }
547 
548     int comps = PL_DEF(src->components, 4);
549     if (!FBOFMT(comps) || rr->disable_sampling || !info.config) {
550         info.type = SAMPLER_DIRECT;
551     } else if (info.config->kernel->weight == oversample) {
552         info.type = SAMPLER_OVERSAMPLE;
553     } else {
554         info.type = SAMPLER_COMPLEX;
555 
556         // Try using faster replacements for GPU built-in scalers
557         pl_fmt texfmt = src->tex ? src->tex->params.format : rr->fbofmt[comps];
558         bool can_linear = texfmt->caps & PL_FMT_CAP_LINEAR;
559         bool can_fast = info.dir == SAMPLER_UP || params->skip_anti_aliasing;
560 
561         if (can_fast && !params->disable_builtin_scalers) {
562             if (can_linear && info.config == &pl_filter_bicubic)
563                 info.type = SAMPLER_BICUBIC;
564             if (can_linear && info.config == &pl_filter_bilinear)
565                 info.type = SAMPLER_DIRECT;
566             if (info.config == &pl_filter_nearest)
567                 info.type = can_linear ? SAMPLER_NEAREST : SAMPLER_DIRECT;
568         }
569     }
570 
571     return info;
572 }
573 
dispatch_sampler(struct pass_state * pass,pl_shader sh,struct sampler * sampler,bool no_compute,const struct pl_sample_src * src)574 static void dispatch_sampler(struct pass_state *pass, pl_shader sh,
575                              struct sampler *sampler, bool no_compute,
576                              const struct pl_sample_src *src)
577 {
578     const struct pl_render_params *params = pass->params;
579     if (!sampler)
580         goto fallback;
581 
582     pl_renderer rr = pass->rr;
583     struct sampler_info info = sample_src_info(pass, src);
584     pl_shader_obj *lut = NULL;
585     switch (info.dir) {
586     case SAMPLER_NOOP:
587         goto fallback;
588     case SAMPLER_DOWN:
589         lut = &sampler->downscaler_state;
590         break;
591     case SAMPLER_UP:
592         lut = &sampler->upscaler_state;
593         break;
594     }
595 
596     switch (info.type) {
597     case SAMPLER_DIRECT:
598         goto fallback;
599     case SAMPLER_NEAREST:
600         pl_shader_sample_nearest(sh, src);
601         return;
602     case SAMPLER_OVERSAMPLE:
603         pl_shader_sample_oversample(sh, src, info.config->kernel->params[0]);
604         return;
605     case SAMPLER_BICUBIC:
606         pl_shader_sample_bicubic(sh, src);
607         return;
608     case SAMPLER_COMPLEX:
609         break; // continue below
610     }
611 
612     pl_assert(lut);
613     struct pl_sample_filter_params fparams = {
614         .filter      = *info.config,
615         .lut_entries = params->lut_entries,
616         .cutoff      = params->polar_cutoff,
617         .antiring    = params->antiringing_strength,
618         .no_compute  = rr->disable_compute || no_compute,
619         .no_widening = params->skip_anti_aliasing,
620         .lut         = lut,
621     };
622 
623     bool ok;
624     if (info.config->polar) {
625         // Polar samplers are always a single function call
626         ok = pl_shader_sample_polar(sh, src, &fparams);
627     } else if (info.dir_sep[0] && info.dir_sep[1]) {
628         // Scaling is needed in both directions
629         pl_shader tsh = pl_dispatch_begin(rr->dp);
630         ok = pl_shader_sample_ortho(tsh, PL_SEP_VERT, src, &fparams);
631         if (!ok) {
632             pl_dispatch_abort(rr->dp, &tsh);
633             goto done;
634         }
635 
636         struct img img = {
637             .sh = tsh,
638             .w  = src->tex->params.w,
639             .h  = src->new_h,
640             .comps = src->components,
641         };
642 
643         struct pl_sample_src src2 = *src;
644         src2.tex = img_tex(pass, &img);
645         src2.scale = 1.0;
646         ok = src2.tex && pl_shader_sample_ortho(sh, PL_SEP_HORIZ, &src2, &fparams);
647     } else if (info.dir_sep[0]) {
648         // Scaling is needed only in the horizontal direction
649         ok = pl_shader_sample_ortho(sh, PL_SEP_HORIZ, src, &fparams);
650     } else {
651         // Scaling is needed only in the vertical direction
652         pl_assert(info.dir_sep[1]);
653         ok = pl_shader_sample_ortho(sh, PL_SEP_VERT, src, &fparams);
654     }
655 
656 done:
657     if (!ok) {
658         PL_ERR(rr, "Failed dispatching scaler.. disabling");
659         rr->disable_sampling = true;
660         goto fallback;
661     }
662 
663     return;
664 
665 fallback:
666     // If all else fails, fall back to auto sampling
667     pl_shader_sample_direct(sh, src);
668 }
669 
swizzle_color(pl_shader sh,int comps,const int comp_map[4],bool force_alpha)670 static void swizzle_color(pl_shader sh, int comps, const int comp_map[4],
671                           bool force_alpha)
672 {
673     ident_t orig = sh_fresh(sh, "orig_color");
674     GLSL("vec4 %s = color;   \n"
675          "color = vec4(0.0); \n", orig);
676 
677     static const int def_map[4] = {0, 1, 2, 3};
678     comp_map = PL_DEF(comp_map, def_map);
679 
680     for (int c = 0; c < comps; c++) {
681         if (comp_map[c] >= 0)
682             GLSL("color[%d] = %s[%d]; \n", c, orig, comp_map[c]);
683     }
684 
685     if (force_alpha)
686         GLSL("color.a = %s.a; \n", orig);
687 }
688 
draw_overlays(struct pass_state * pass,pl_tex fbo,int comps,const int comp_map[4],const struct pl_overlay * overlays,int num,struct pl_color_space color,struct pl_color_repr repr,bool use_sigmoid,struct pl_transform2x2 * scale)689 static void draw_overlays(struct pass_state *pass, pl_tex fbo,
690                           int comps, const int comp_map[4],
691                           const struct pl_overlay *overlays, int num,
692                           struct pl_color_space color, struct pl_color_repr repr,
693                           bool use_sigmoid, struct pl_transform2x2 *scale)
694 {
695     const struct pl_render_params *params = pass->params;
696     pl_renderer rr = pass->rr;
697     if (num <= 0 || rr->disable_overlay)
698         return;
699 
700     enum pl_fmt_caps caps = fbo->params.format->caps;
701     if (!rr->disable_blending && !(caps & PL_FMT_CAP_BLENDABLE)) {
702         PL_WARN(rr, "Trying to draw an overlay to a non-blendable target. "
703                 "Alpha blending is disabled, results may be incorrect!");
704         rr->disable_blending = true;
705     }
706 
707     for (int n = 0; n < num; n++) {
708         struct pl_overlay ol = overlays[n];
709         struct pl_overlay_part fallback;
710         if (!ol.tex) {
711             // Backwards compatibility
712             ol.tex = ol.plane.texture;
713             ol.parts = &fallback;
714             ol.num_parts = 1;
715             fallback = (struct pl_overlay_part) {
716                 .src = {
717                     .x0 = -ol.plane.shift_x,
718                     .y0 = -ol.plane.shift_y,
719                     .x1 = ol.tex->params.w - ol.plane.shift_x,
720                     .y1 = ol.tex->params.h - ol.plane.shift_y,
721                 },
722                 .dst = ol.rect,
723                 .color = {
724                     ol.base_color[0],
725                     ol.base_color[1],
726                     ol.base_color[2],
727                     1.0,
728                 },
729             };
730         }
731 
732         if (!ol.num_parts)
733             continue;
734 
735         // Construct vertex/index buffers
736         rr->osd_vertices.num = 0;
737         rr->osd_indices.num = 0;
738         for (int i = 0; i < ol.num_parts; i++) {
739             const struct pl_overlay_part *part = &ol.parts[i];
740 
741 #define EMIT_VERT(x, y)                                                         \
742             do {                                                                \
743                 float pos[2] = { part->dst.x, part->dst.y };                    \
744                 if (scale)                                                      \
745                     pl_transform2x2_apply(scale, pos);                          \
746                 PL_ARRAY_APPEND(rr, rr->osd_vertices, (struct osd_vertex) {     \
747                     .pos = {                                                    \
748                         2.0 * (pos[0] / fbo->params.w) - 1.0,                   \
749                         2.0 * (pos[1] / fbo->params.h) - 1.0,                   \
750                     },                                                          \
751                     .coord = {                                                  \
752                         part->src.x / ol.tex->params.w,                         \
753                         part->src.y / ol.tex->params.h,                         \
754                     },                                                          \
755                     .color = {                                                  \
756                         part->color[0], part->color[1],                         \
757                         part->color[2], part->color[3],                         \
758                     },                                                          \
759                 });                                                             \
760             } while (0)
761 
762             int idx_base = rr->osd_vertices.num;
763             EMIT_VERT(x0, y0); // idx 0: top left
764             EMIT_VERT(x1, y0); // idx 1: top right
765             EMIT_VERT(x0, y1); // idx 2: bottom left
766             EMIT_VERT(x1, y1); // idx 3: bottom right
767             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 0);
768             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1);
769             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2);
770             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2);
771             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1);
772             PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 3);
773         }
774 
775         // Draw parts
776         pl_shader sh = pl_dispatch_begin(rr->dp);
777         ident_t tex = sh_desc(sh, (struct pl_shader_desc) {
778             .desc = {
779                 .name = "osd_tex",
780                 .type = PL_DESC_SAMPLED_TEX,
781             },
782             .binding = {
783                 .object = ol.tex,
784                 .sample_mode = (ol.tex->params.format->caps & PL_FMT_CAP_LINEAR)
785                     ? PL_TEX_SAMPLE_LINEAR
786                     : PL_TEX_SAMPLE_NEAREST,
787             },
788         });
789 
790         sh_describe(sh, "overlay");
791         GLSL("// overlay \n");
792 
793         switch (ol.mode) {
794         case PL_OVERLAY_NORMAL:
795             GLSL("vec4 color = %s(%s, coord); \n",
796                  sh_tex_fn(sh, ol.tex->params), tex);
797             break;
798         case PL_OVERLAY_MONOCHROME:
799             GLSL("vec4 color = osd_color; \n");
800             break;
801         case PL_OVERLAY_MODE_COUNT:
802             pl_unreachable();
803         };
804 
805         sh->res.output = PL_SHADER_SIG_COLOR;
806         pl_shader_decode_color(sh, &ol.repr, NULL);
807         pl_shader_color_map(sh, params->color_map_params, ol.color, color,
808                             NULL, false);
809 
810         if (use_sigmoid)
811             pl_shader_sigmoidize(sh, params->sigmoid_params);
812 
813         repr.alpha = PL_ALPHA_PREMULTIPLIED;
814         pl_shader_encode_color(sh, &repr);
815         if (ol.mode == PL_OVERLAY_MONOCHROME) {
816             GLSL("color.rgba *= %s(%s, coord).r; \n",
817                  sh_tex_fn(sh, ol.tex->params), tex);
818         }
819 
820         swizzle_color(sh, comps, comp_map, true);
821 
822         struct pl_blend_params blend_params = {
823             .src_rgb = PL_BLEND_ONE,
824             .src_alpha = PL_BLEND_SRC_ALPHA,
825             // FIXME: What if the target is not premultiplied?
826             .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA,
827             .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA,
828         };
829 
830         bool ok = pl_dispatch_vertex(rr->dp, &(struct pl_dispatch_vertex_params) {
831             .shader = &sh,
832             .target = fbo,
833             .blend_params = rr->disable_blending ? NULL : &blend_params,
834             .vertex_stride = sizeof(struct osd_vertex),
835             .num_vertex_attribs = ol.mode == PL_OVERLAY_NORMAL ? 2 : 3,
836             .vertex_attribs = rr->osd_attribs,
837             .vertex_position_idx = 0,
838             .vertex_coords = PL_COORDS_NORMALIZED,
839             .vertex_type = PL_PRIM_TRIANGLE_LIST,
840             .vertex_count = rr->osd_indices.num,
841             .vertex_data = rr->osd_vertices.elem,
842             .index_data = rr->osd_indices.elem,
843         });
844 
845         if (!ok) {
846             PL_ERR(rr, "Failed rendering overlays!");
847             rr->disable_overlay = true;
848             return;
849         }
850     }
851 }
852 
get_hook_tex(void * priv,int width,int height)853 static pl_tex get_hook_tex(void *priv, int width, int height)
854 {
855     struct pass_state *pass = priv;
856 
857     return get_fbo(pass, width, height, NULL, 4);
858 }
859 
860 // Returns if any hook was applied (even if there were errors)
pass_hook(struct pass_state * pass,struct img * img,enum pl_hook_stage stage)861 static bool pass_hook(struct pass_state *pass, struct img *img,
862                       enum pl_hook_stage stage)
863 {
864     const struct pl_render_params *params = pass->params;
865     pl_renderer rr = pass->rr;
866     if (!rr->fbofmt[4] || rr->disable_hooks)
867         return false;
868 
869     bool ret = false;
870 
871     for (int n = 0; n < params->num_hooks; n++) {
872         const struct pl_hook *hook = params->hooks[n];
873         if (!(hook->stages & stage))
874             continue;
875 
876         PL_TRACE(rr, "Dispatching hook %d stage 0x%x", n, stage);
877         struct pl_hook_params hparams = {
878             .gpu = rr->gpu,
879             .dispatch = rr->dp,
880             .get_tex = get_hook_tex,
881             .priv = pass,
882             .stage = stage,
883             .rect = img->rect,
884             .repr = img->repr,
885             .color = img->color,
886             .components = img->comps,
887             .src_rect = pass->ref_rect,
888             .dst_rect = pass->dst_rect,
889         };
890 
891         // TODO: Add some sort of `test` API function to the hooks that allows
892         // us to skip having to touch the `img` state at all for no-ops
893 
894         switch (hook->input) {
895         case PL_HOOK_SIG_NONE:
896             break;
897 
898         case PL_HOOK_SIG_TEX: {
899             hparams.tex = img_tex(pass, img);
900             if (!hparams.tex) {
901                 PL_ERR(rr, "Failed dispatching shader prior to hook!");
902                 goto error;
903             }
904             break;
905         }
906 
907         case PL_HOOK_SIG_COLOR:
908             hparams.sh = img_sh(pass, img);
909             break;
910 
911         case PL_HOOK_SIG_COUNT:
912             pl_unreachable();
913         }
914 
915         struct pl_hook_res res = hook->hook(hook->priv, &hparams);
916         if (res.failed) {
917             PL_ERR(rr, "Failed executing hook, disabling");
918             goto error;
919         }
920 
921         bool resizable = pl_hook_stage_resizable(stage);
922         switch (res.output) {
923         case PL_HOOK_SIG_NONE:
924             break;
925 
926         case PL_HOOK_SIG_TEX:
927             if (!resizable) {
928                 if (res.tex->params.w != img->w ||
929                     res.tex->params.h != img->h ||
930                     !pl_rect2d_eq(res.rect, img->rect))
931                 {
932                     PL_ERR(rr, "User hook tried resizing non-resizable stage!");
933                     goto error;
934                 }
935             }
936 
937             *img = (struct img) {
938                 .tex = res.tex,
939                 .repr = res.repr,
940                 .color = res.color,
941                 .comps = res.components,
942                 .rect = res.rect,
943                 .w = res.tex->params.w,
944                 .h = res.tex->params.h,
945             };
946             break;
947 
948         case PL_HOOK_SIG_COLOR:
949             if (!resizable) {
950                 if (res.sh->output_w != img->w ||
951                     res.sh->output_h != img->h ||
952                     !pl_rect2d_eq(res.rect, img->rect))
953                 {
954                     PL_ERR(rr, "User hook tried resizing non-resizable stage!");
955                     goto error;
956                 }
957             }
958 
959             *img = (struct img) {
960                 .sh = res.sh,
961                 .repr = res.repr,
962                 .color = res.color,
963                 .comps = res.components,
964                 .rect = res.rect,
965                 .w = res.sh->output_w,
966                 .h = res.sh->output_h,
967             };
968             break;
969 
970         case PL_HOOK_SIG_COUNT:
971             pl_unreachable();
972         }
973 
974         // a hook was performed successfully
975         ret = true;
976     }
977 
978     return ret;
979 
980 error:
981     rr->disable_hooks = true;
982 
983     // Make sure the state remains as valid as possible, even if the resulting
984     // shaders might end up nonsensical, to prevent segfaults
985     if (!img->tex && !img->sh)
986         img->sh = pl_dispatch_begin(rr->dp);
987     return ret;
988 }
989 
990 // `deband_src` results
991 enum {
992     DEBAND_NOOP = 0, // no debanding was performing
993     DEBAND_NORMAL,   // debanding was performed, the plane should still be scaled
994     DEBAND_SCALED,   // debanding took care of scaling as well
995 };
996 
deband_src(struct pass_state * pass,pl_shader psh,struct pl_sample_src * psrc)997 static int deband_src(struct pass_state *pass, pl_shader psh,
998                       struct pl_sample_src *psrc)
999 {
1000     const struct pl_render_params *params = pass->params;
1001     const struct pl_frame *image = &pass->image;
1002     pl_renderer rr = pass->rr;
1003     if (rr->disable_debanding || !params->deband_params)
1004         return DEBAND_NOOP;
1005 
1006     if (!(psrc->tex->params.format->caps & PL_FMT_CAP_LINEAR)) {
1007         PL_WARN(rr, "Debanding requires uploaded textures to be linearly "
1008                 "sampleable (params.sample_mode = PL_TEX_SAMPLE_LINEAR)! "
1009                 "Disabling debanding..");
1010         rr->disable_debanding = true;
1011         return DEBAND_NOOP;
1012     }
1013 
1014     // The debanding shader can replace direct GPU sampling
1015     bool deband_scales = sample_src_info(pass, psrc).type == SAMPLER_DIRECT;
1016 
1017     pl_shader sh = psh;
1018     struct pl_sample_src *src = psrc;
1019     struct pl_sample_src fixed;
1020     if (!deband_scales) {
1021         // Only sample/deband the relevant cut-out, but round it to the nearest
1022         // integer to avoid doing fractional scaling
1023         fixed = *src;
1024         fixed.rect.x0 = floorf(fixed.rect.x0);
1025         fixed.rect.y0 = floorf(fixed.rect.y0);
1026         fixed.rect.x1 = ceilf(fixed.rect.x1);
1027         fixed.rect.y1 = ceilf(fixed.rect.y1);
1028         fixed.new_w = pl_rect_w(fixed.rect);
1029         fixed.new_h = pl_rect_h(fixed.rect);
1030         src = &fixed;
1031 
1032         if (fixed.new_w == psrc->new_w &&
1033             fixed.new_h == psrc->new_h &&
1034             pl_rect2d_eq(fixed.rect, psrc->rect))
1035         {
1036             // If there's nothing left to be done (i.e. we're already rendering
1037             // an exact integer crop without scaling), also skip the scalers
1038             deband_scales = true;
1039         } else {
1040             sh = pl_dispatch_begin_ex(rr->dp, true);
1041         }
1042     }
1043 
1044     // Divide the deband grain scale by the effective current colorspace nominal
1045     // peak, to make sure the output intensity of the grain is as independent
1046     // of the source as possible, even though it happens this early in the
1047     // process (well before any linearization / output adaptation)
1048     struct pl_deband_params dparams = *params->deband_params;
1049     float scale = pl_color_transfer_nominal_peak(image->color.transfer)
1050                 * image->color.sig_scale;
1051     dparams.grain /= scale;
1052 
1053     pl_shader_deband(sh, src, &dparams);
1054 
1055     if (deband_scales)
1056         return DEBAND_SCALED;
1057 
1058     struct img img = {
1059         .sh = sh,
1060         .w  = src->new_w,
1061         .h  = src->new_h,
1062         .comps = src->components,
1063     };
1064 
1065     pl_tex new = img_tex(pass, &img);
1066     if (!new) {
1067         PL_ERR(rr, "Failed dispatching debanding shader.. disabling debanding!");
1068         rr->disable_debanding = true;
1069         return DEBAND_NOOP;
1070     }
1071 
1072     // Update the original pl_sample_src to point to the new texture
1073     psrc->tex = new;
1074     psrc->rect.x0 -= src->rect.x0;
1075     psrc->rect.y0 -= src->rect.y0;
1076     psrc->rect.x1 -= src->rect.x0;
1077     psrc->rect.y1 -= src->rect.y0;
1078     psrc->scale = 1.0;
1079     return DEBAND_NORMAL;
1080 }
1081 
hdr_update_peak(struct pass_state * pass)1082 static void hdr_update_peak(struct pass_state *pass)
1083 {
1084     const struct pl_render_params *params = pass->params;
1085     pl_renderer rr = pass->rr;
1086     if (!params->peak_detect_params || !pl_color_space_is_hdr(pass->img.color))
1087         goto cleanup;
1088 
1089     if (rr->disable_compute || rr->disable_peak_detect)
1090         goto cleanup;
1091 
1092     float src_peak = pass->img.color.sig_peak * pass->img.color.sig_scale;
1093     float dst_peak = pass->target.color.sig_peak * pass->target.color.sig_scale;
1094     if (src_peak <= dst_peak + 1e-6)
1095         goto cleanup; // no adaptation needed
1096 
1097     if (params->lut && params->lut_type == PL_LUT_CONVERSION)
1098         goto cleanup; // LUT handles tone mapping
1099 
1100     if (!FBOFMT(4) && !params->allow_delayed_peak_detect) {
1101         PL_WARN(rr, "Disabling peak detection because "
1102                 "`allow_delayed_peak_detect` is false, but lack of FBOs "
1103                 "forces the result to be delayed.");
1104         rr->disable_peak_detect = true;
1105         goto cleanup;
1106     }
1107 
1108     bool ok = pl_shader_detect_peak(img_sh(pass, &pass->img), pass->img.color,
1109                                     &rr->peak_detect_state,
1110                                     params->peak_detect_params);
1111     if (!ok) {
1112         PL_WARN(rr, "Failed creating HDR peak detection shader.. disabling");
1113         rr->disable_peak_detect = true;
1114         goto cleanup;
1115     }
1116 
1117     return;
1118 
1119 cleanup:
1120     // No peak detection required or supported, so clean up the state to avoid
1121     // confusing it with later frames where peak detection is enabled again
1122     pl_shader_obj_destroy(&rr->peak_detect_state);
1123 }
1124 
1125 struct plane_state {
1126     enum plane_type type;
1127     struct pl_plane plane;
1128     struct img img; // for per-plane shaders
1129 };
1130 
1131 static const char *plane_type_names[] = {
1132     [PLANE_INVALID] = "invalid",
1133     [PLANE_ALPHA]   = "alpha",
1134     [PLANE_CHROMA]  = "chroma",
1135     [PLANE_LUMA]    = "luma",
1136     [PLANE_RGB]     = "rgb",
1137     [PLANE_XYZ]     = "xyz",
1138 };
1139 
log_plane_info(pl_renderer rr,const struct plane_state * st)1140 static void log_plane_info(pl_renderer rr, const struct plane_state *st)
1141 {
1142     const struct pl_plane *plane = &st->plane;
1143     PL_TRACE(rr, "    Type: %s", plane_type_names[st->type]);
1144 
1145     switch (plane->components) {
1146     case 0:
1147         PL_TRACE(rr, "    Components: (none)");
1148         break;
1149     case 1:
1150         PL_TRACE(rr, "    Components: {%d}",
1151                  plane->component_mapping[0]);
1152         break;
1153     case 2:
1154         PL_TRACE(rr, "    Components: {%d %d}",
1155                  plane->component_mapping[0],
1156                  plane->component_mapping[1]);
1157         break;
1158     case 3:
1159         PL_TRACE(rr, "    Components: {%d %d %d}",
1160                  plane->component_mapping[0],
1161                  plane->component_mapping[1],
1162                  plane->component_mapping[2]);
1163         break;
1164     case 4:
1165         PL_TRACE(rr, "    Components: {%d %d %d %d}",
1166                  plane->component_mapping[0],
1167                  plane->component_mapping[1],
1168                  plane->component_mapping[2],
1169                  plane->component_mapping[3]);
1170         break;
1171     }
1172 
1173     PL_TRACE(rr, "    Rect: {%f %f} -> {%f %f}",
1174              st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1);
1175 
1176     PL_TRACE(rr, "    Bits: %d (used) / %d (sampled), shift %d",
1177              st->img.repr.bits.color_depth,
1178              st->img.repr.bits.sample_depth,
1179              st->img.repr.bits.bit_shift);
1180 }
1181 
1182 // Returns true if grain was applied
plane_av1_grain(struct pass_state * pass,int plane_idx,struct plane_state * st,const struct plane_state * ref,const struct pl_frame * image)1183 static bool plane_av1_grain(struct pass_state *pass, int plane_idx,
1184                             struct plane_state *st,
1185                             const struct plane_state *ref,
1186                             const struct pl_frame *image)
1187 {
1188     const struct pl_render_params *params = pass->params;
1189     pl_renderer rr = pass->rr;
1190     if (rr->disable_grain)
1191         return false;
1192 
1193     struct img *img = &st->img;
1194     struct pl_plane *plane = &st->plane;
1195     struct pl_color_repr repr = st->img.repr;
1196     struct pl_av1_grain_params grain_params = {
1197         .data = image->av1_grain,
1198         .luma_tex = ref->plane.texture,
1199         .repr = &repr,
1200         .components = plane->components,
1201     };
1202 
1203     for (int c = 0; c < plane->components; c++)
1204         grain_params.component_mapping[c] = plane->component_mapping[c];
1205 
1206     for (int c = 0; c < ref->plane.components; c++) {
1207         if (ref->plane.component_mapping[c] == PL_CHANNEL_Y)
1208             grain_params.luma_comp = c;
1209     }
1210 
1211     if (!pl_needs_av1_grain(&grain_params))
1212         return false;
1213 
1214     if (!FBOFMT(plane->components)) {
1215         PL_ERR(rr, "AV1 grain required but no renderable format available.. "
1216               "disabling!");
1217         rr->disable_grain = true;
1218         return false;
1219     }
1220 
1221     grain_params.tex = img_tex(pass, img);
1222     if (!grain_params.tex)
1223         return false;
1224 
1225     img->sh = pl_dispatch_begin_ex(rr->dp, true);
1226     if (!pl_shader_av1_grain(img->sh, &rr->grain_state[plane_idx], &grain_params)) {
1227         pl_dispatch_abort(rr->dp, &img->sh);
1228         rr->disable_grain = true;
1229         return false;
1230     }
1231 
1232     img->tex = NULL;
1233     if (!img_tex(pass, img)) {
1234         PL_ERR(rr, "Failed applying AV1 grain.. disabling!");
1235         pl_dispatch_abort(rr->dp, &img->sh);
1236         img->tex = grain_params.tex;
1237         rr->disable_grain = true;
1238         return false;
1239     }
1240 
1241     img->repr = repr;
1242     return true;
1243 }
1244 
1245 static const enum pl_hook_stage plane_hook_stages[] = {
1246     [PLANE_ALPHA]   = PL_HOOK_ALPHA_INPUT,
1247     [PLANE_CHROMA]  = PL_HOOK_CHROMA_INPUT,
1248     [PLANE_LUMA]    = PL_HOOK_LUMA_INPUT,
1249     [PLANE_RGB]     = PL_HOOK_RGB_INPUT,
1250     [PLANE_XYZ]     = PL_HOOK_XYZ_INPUT,
1251 };
1252 
guess_frame_lut_type(const struct pl_frame * frame,bool reversed)1253 static enum pl_lut_type guess_frame_lut_type(const struct pl_frame *frame,
1254                                              bool reversed)
1255 {
1256     if (!frame->lut)
1257         return PL_LUT_UNKNOWN;
1258     if (frame->lut_type)
1259         return frame->lut_type;
1260 
1261     enum pl_color_system sys_in = frame->lut->repr_in.sys;
1262     enum pl_color_system sys_out = frame->lut->repr_out.sys;
1263     if (reversed)
1264         PL_SWAP(sys_in, sys_out);
1265 
1266     if (sys_in == PL_COLOR_SYSTEM_RGB && sys_out == sys_in)
1267         return PL_LUT_NORMALIZED;
1268 
1269     if (sys_in == frame->repr.sys && sys_out == PL_COLOR_SYSTEM_RGB)
1270         return PL_LUT_CONVERSION;
1271 
1272     // Unknown, just fall back to the default
1273     return PL_LUT_NATIVE;
1274 }
1275 
merge_fmt(pl_renderer rr,const struct img * a,const struct img * b)1276 static pl_fmt merge_fmt(pl_renderer rr,
1277                                       const struct img *a, const struct img *b)
1278 {
1279     pl_fmt fmta = a->tex ? a->tex->params.format : a->fmt;
1280     pl_fmt fmtb = b->tex->params.format;
1281     pl_assert(fmta && fmtb);
1282     if (fmta->type != fmtb->type)
1283         return NULL;
1284 
1285     int num_comps = PL_MIN(4, a->comps + b->comps);
1286     int min_depth = PL_MAX(a->repr.bits.sample_depth, b->repr.bits.sample_depth);
1287 
1288     // Only return formats that support all relevant caps of both formats
1289     const enum pl_fmt_caps mask = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR;
1290     enum pl_fmt_caps req_caps = (fmta->caps & mask) | (fmtb->caps & mask);
1291 
1292     return pl_find_fmt(rr->gpu, fmta->type, num_comps, min_depth, 0, req_caps);
1293 }
1294 
1295 // Applies a series of rough heuristics to figure out whether we expect any
1296 // performance gains from plane merging. This is basically a series of checks
1297 // for operations that we *know* benefit from merged planes
want_merge(struct pass_state * pass,const struct plane_state * st,const struct plane_state * ref)1298 static bool want_merge(struct pass_state *pass,
1299                        const struct plane_state *st,
1300                        const struct plane_state *ref)
1301 {
1302     const struct pl_render_params *params = pass->params;
1303     const pl_renderer rr = pass->rr;
1304     if (!rr->fbofmt[4])
1305         return false;
1306 
1307     // Debanding
1308     if (!rr->disable_debanding && params->deband_params)
1309         return true;
1310 
1311     // Other plane hooks, which are generally nontrivial
1312     enum pl_hook_stage stage = plane_hook_stages[st->type];
1313     for (int i = 0; i < params->num_hooks; i++) {
1314         if (params->hooks[i]->stages & stage)
1315             return true;
1316     }
1317 
1318     // Non-trivial scaling
1319     struct pl_sample_src src = {
1320         .new_w = ref->img.w,
1321         .new_h = ref->img.h,
1322         .rect = {
1323             .x1 = st->img.w,
1324             .y1 = st->img.h,
1325         },
1326     };
1327 
1328     struct sampler_info info = sample_src_info(pass, &src);
1329     if (info.type == SAMPLER_COMPLEX)
1330         return true;
1331 
1332     // AV1 grain synthesis, can be merged for compatible channels, saving on
1333     // redundant sampling of the grain/offset textures
1334     struct pl_av1_grain_params grain_params = {
1335         .data = pass->image.av1_grain,
1336         .repr = (struct pl_color_repr *) &st->img.repr,
1337         .components = st->plane.components,
1338     };
1339 
1340     for (int c = 0; c < st->plane.components; c++)
1341         grain_params.component_mapping[c] = st->plane.component_mapping[c];
1342 
1343     if (!rr->disable_grain && pl_needs_av1_grain(&grain_params))
1344         return true;
1345 
1346     return false;
1347 }
1348 
1349 // This scales and merges all of the source images, and initializes pass->img.
pass_read_image(struct pass_state * pass)1350 static bool pass_read_image(struct pass_state *pass)
1351 {
1352     const struct pl_render_params *params = pass->params;
1353     struct pl_frame *image = &pass->image;
1354     pl_renderer rr = pass->rr;
1355 
1356     struct plane_state planes[4];
1357     struct plane_state *ref = &planes[pass->src_ref];
1358 
1359     for (int i = 0; i < image->num_planes; i++) {
1360         planes[i] = (struct plane_state) {
1361             .type = pass->src_type[i],
1362             .plane = image->planes[i],
1363             .img = {
1364                 .w = image->planes[i].texture->params.w,
1365                 .h = image->planes[i].texture->params.h,
1366                 .tex = image->planes[i].texture,
1367                 .repr = image->repr,
1368                 .color = image->color,
1369                 .comps = image->planes[i].components,
1370             },
1371         };
1372     }
1373 
1374     // Original ref texture, even after preprocessing
1375     pl_tex ref_tex = ref->plane.texture;
1376 
1377     // Merge all compatible planes into 'combined' shaders
1378     for (int i = 0; i < image->num_planes; i++) {
1379         struct plane_state *sti = &planes[i];
1380         if (!sti->type)
1381             continue;
1382         if (!want_merge(pass, sti, ref))
1383             continue;
1384 
1385         for (int j = i+1; j < image->num_planes; j++) {
1386             struct plane_state *stj = &planes[j];
1387             bool merge = sti->type == stj->type &&
1388                          sti->img.w == stj->img.w &&
1389                          sti->img.h == stj->img.h &&
1390                          sti->plane.shift_x == stj->plane.shift_x &&
1391                          sti->plane.shift_y == stj->plane.shift_y;
1392             if (!merge)
1393                 continue;
1394 
1395             pl_fmt fmt = merge_fmt(rr, &sti->img, &stj->img);
1396             if (!fmt)
1397                 continue;
1398 
1399             PL_TRACE(rr, "Merging plane %d into plane %d", j, i);
1400             pl_shader sh = sti->img.sh;
1401             if (!sh) {
1402                 sh = sti->img.sh = pl_dispatch_begin_ex(pass->rr->dp, true);
1403                 sh_describe(sh, "merging planes");
1404                 GLSL("vec4 tmp; \n");
1405                 pl_shader_sample_direct(sh, &(struct pl_sample_src) {
1406                     .tex = sti->img.tex,
1407                 });
1408                 sti->img.tex = NULL;
1409             }
1410 
1411             pl_shader psh = pl_dispatch_begin_ex(pass->rr->dp, true);
1412             pl_shader_sample_direct(psh, &(struct pl_sample_src) {
1413                 .tex = stj->img.tex,
1414             });
1415 
1416             ident_t sub = sh_subpass(sh, psh);
1417             pl_dispatch_abort(rr->dp, &psh);
1418             if (!sub)
1419                 break; // skip merging
1420 
1421             GLSL("tmp = %s(); \n", sub);
1422             for (int jc = 0; jc < stj->img.comps; jc++) {
1423                 int map = stj->plane.component_mapping[jc];
1424                 if (!map)
1425                     continue;
1426                 int ic = sti->img.comps++;
1427                 pl_assert(ic < 4);
1428                 GLSL("color[%d] = tmp[%d]; \n", ic, jc);
1429                 sti->plane.components = sti->img.comps;
1430                 sti->plane.component_mapping[ic] = map;
1431             }
1432 
1433             sti->img.fmt = fmt;
1434             *stj = (struct plane_state) {0};
1435         }
1436 
1437         if (!img_tex(pass, &sti->img)) {
1438             PL_ERR(rr, "Failed dispatching plane merging shader, disabling FBOs!");
1439             memset(rr->fbofmt, 0, sizeof(rr->fbofmt));
1440             return false;
1441         }
1442     }
1443 
1444     // Compute the sampling rc of each plane
1445     for (int i = 0; i < image->num_planes; i++) {
1446         struct plane_state *st = &planes[i];
1447         if (!st->type)
1448             continue;
1449 
1450         float rx = (float) ref_tex->params.w / st->plane.texture->params.w,
1451               ry = (float) ref_tex->params.h / st->plane.texture->params.h;
1452 
1453         // Only accept integer scaling ratios. This accounts for the fact that
1454         // fractionally subsampled planes get rounded up to the nearest integer
1455         // size, which we want to discard.
1456         float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
1457               rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
1458 
1459         float sx = st->plane.shift_x,
1460               sy = st->plane.shift_y;
1461 
1462         st->img.rect = (struct pl_rect2df) {
1463             .x0 = (image->crop.x0 - sx) / rrx,
1464             .y0 = (image->crop.y0 - sy) / rry,
1465             .x1 = (image->crop.x1 - sx) / rrx,
1466             .y1 = (image->crop.y1 - sy) / rry,
1467         };
1468 
1469         PL_TRACE(rr, "Plane %d:", i);
1470         log_plane_info(rr, st);
1471 
1472         // Perform AV1 grain synthesis if needed. Do this first because it
1473         // requires unmodified plane sizes, and also because it's closer to the
1474         // intent of the spec (which is to apply synthesis effectively during
1475         // decoding)
1476 
1477         if (plane_av1_grain(pass, i, st, ref, image)) {
1478             PL_TRACE(rr, "After AV1 grain:");
1479             log_plane_info(rr, st);
1480         }
1481 
1482         if (pass_hook(pass, &st->img, plane_hook_stages[st->type])) {
1483             PL_TRACE(rr, "After user hooks:");
1484             log_plane_info(rr, st);
1485         }
1486 
1487         // Update the conceptual width/height after applying plane shaders
1488         st->img.w = roundf(pl_rect_w(st->img.rect));
1489         st->img.h = roundf(pl_rect_h(st->img.rect));
1490     }
1491 
1492     pl_shader sh = pl_dispatch_begin_ex(rr->dp, true);
1493     sh_require(sh, PL_SHADER_SIG_NONE, 0, 0);
1494 
1495     // Initialize the color to black
1496     const char *neutral = "0.0, 0.0, 0.0";
1497     if (pl_color_system_is_ycbcr_like(image->repr.sys))
1498         neutral = "0.0, 0.5, 0.5";
1499 
1500     GLSL("vec4 color = vec4(%s, 1.0);            \n"
1501          "// pass_read_image                     \n"
1502          "{                                      \n"
1503          "vec4 tmp;                              \n",
1504          neutral);
1505 
1506     // For quality reasons, explicitly drop subpixel offsets from the ref rect
1507     // and re-add them as part of `pass->img.rect`, always rounding towards 0.
1508     // Additionally, drop anamorphic subpixel mismatches.
1509     float off_x = ref->img.rect.x0 - truncf(ref->img.rect.x0),
1510           off_y = ref->img.rect.y0 - truncf(ref->img.rect.y0),
1511           stretch_x = roundf(pl_rect_w(ref->img.rect)) / pl_rect_w(ref->img.rect),
1512           stretch_y = roundf(pl_rect_h(ref->img.rect)) / pl_rect_h(ref->img.rect);
1513 
1514     bool has_alpha = false;
1515     for (int i = 0; i < image->num_planes; i++) {
1516         struct plane_state *st = &planes[i];
1517         const struct pl_plane *plane = &st->plane;
1518         if (!st->type)
1519             continue;
1520 
1521         float scale_x = pl_rect_w(st->img.rect) / pl_rect_w(ref->img.rect),
1522               scale_y = pl_rect_h(st->img.rect) / pl_rect_h(ref->img.rect),
1523               base_x = st->img.rect.x0 - scale_x * off_x,
1524               base_y = st->img.rect.y0 - scale_y * off_y;
1525 
1526         struct pl_sample_src src = {
1527             .tex        = st->img.tex,
1528             .components = plane->components,
1529             .address_mode = plane->address_mode,
1530             .scale      = pl_color_repr_normalize(&st->img.repr),
1531             .new_w      = ref->img.w,
1532             .new_h      = ref->img.h,
1533             .rect = {
1534                 base_x,
1535                 base_y,
1536                 base_x + stretch_x * pl_rect_w(st->img.rect),
1537                 base_y + stretch_y * pl_rect_h(st->img.rect),
1538             },
1539         };
1540 
1541         PL_TRACE(rr, "Aligning plane %d: {%f %f %f %f} -> {%f %f %f %f}",
1542                  i, st->img.rect.x0, st->img.rect.y0,
1543                  st->img.rect.x1, st->img.rect.y1,
1544                  src.rect.x0, src.rect.y0,
1545                  src.rect.x1, src.rect.y1);
1546 
1547         pl_shader psh = pl_dispatch_begin_ex(rr->dp, true);
1548         if (deband_src(pass, psh,  &src) != DEBAND_SCALED)
1549             dispatch_sampler(pass, psh, &rr->samplers_src[i], false, &src);
1550 
1551         ident_t sub = sh_subpass(sh, psh);
1552         if (!sub) {
1553             // Can't merge shaders, so instead force FBO indirection here
1554             struct img inter_img = {
1555                 .sh = psh,
1556                 .w = ref->img.w,
1557                 .h = ref->img.h,
1558                 .comps = src.components,
1559             };
1560 
1561             pl_tex inter_tex = img_tex(pass, &inter_img);
1562             if (!inter_tex) {
1563                 PL_ERR(rr, "Failed dispatching subpass for plane.. disabling "
1564                        "all plane shaders");
1565                 rr->disable_sampling = true;
1566                 rr->disable_debanding = true;
1567                 rr->disable_grain = true;
1568                 pl_dispatch_abort(rr->dp, &sh);
1569                 return false;
1570             }
1571 
1572             psh = pl_dispatch_begin_ex(rr->dp, true);
1573             pl_shader_sample_direct(psh, &(struct pl_sample_src) {
1574                 .tex = inter_tex,
1575             });
1576 
1577             sub = sh_subpass(sh, psh);
1578             pl_assert(sub);
1579         }
1580 
1581         GLSL("tmp = %s();\n", sub);
1582         for (int c = 0; c < src.components; c++) {
1583             if (plane->component_mapping[c] < 0)
1584                 continue;
1585             GLSL("color[%d] = tmp[%d];\n", plane->component_mapping[c], c);
1586 
1587             has_alpha |= plane->component_mapping[c] == PL_CHANNEL_A;
1588         }
1589 
1590         // we don't need it anymore
1591         pl_dispatch_abort(rr->dp, &psh);
1592     }
1593 
1594     GLSL("}\n");
1595 
1596     pass->img = (struct img) {
1597         .sh     = sh,
1598         .w      = ref->img.w,
1599         .h      = ref->img.h,
1600         .repr   = ref->img.repr,
1601         .color  = image->color,
1602         .comps  = has_alpha ? 4 : 3,
1603         .rect   = {
1604             off_x,
1605             off_y,
1606             off_x + pl_rect_w(ref->img.rect) / stretch_x,
1607             off_y + pl_rect_h(ref->img.rect) / stretch_y,
1608         },
1609     };
1610 
1611     // Update the reference rect to our adjusted image coordinates
1612     pass->ref_rect = pass->img.rect;
1613 
1614     pass_hook(pass, &pass->img, PL_HOOK_NATIVE);
1615 
1616     // Apply LUT logic and colorspace conversion
1617     enum pl_lut_type lut_type = guess_frame_lut_type(image, false);
1618     sh = img_sh(pass, &pass->img);
1619     bool needs_conversion = true;
1620 
1621     if (lut_type == PL_LUT_NATIVE || lut_type == PL_LUT_CONVERSION) {
1622         // Fix bit depth normalization before applying LUT
1623         float scale = pl_color_repr_normalize(&pass->img.repr);
1624         GLSL("color *= vec4(%s); \n", SH_FLOAT(scale));
1625         pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]);
1626 
1627         if (lut_type == PL_LUT_CONVERSION) {
1628             pass->img.repr.sys = PL_COLOR_SYSTEM_RGB;
1629             pass->img.repr.levels = PL_COLOR_LEVELS_FULL;
1630             needs_conversion = false;
1631         }
1632     }
1633 
1634     if (needs_conversion)
1635         pl_shader_decode_color(sh, &pass->img.repr, params->color_adjustment);
1636     if (lut_type == PL_LUT_NORMALIZED)
1637         pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]);
1638 
1639     pass_hook(pass, &pass->img, PL_HOOK_RGB);
1640     sh = NULL;
1641 
1642     // HDR peak detection, do this as early as possible
1643     hdr_update_peak(pass);
1644     return true;
1645 }
1646 
pass_scale_main(struct pass_state * pass)1647 static bool pass_scale_main(struct pass_state *pass)
1648 {
1649     const struct pl_render_params *params = pass->params;
1650     pl_renderer rr = pass->rr;
1651 
1652     if (!FBOFMT(pass->img.comps)) {
1653         PL_TRACE(rr, "Skipping main scaler (no FBOs)");
1654         return true;
1655     }
1656 
1657     struct img *img = &pass->img;
1658     struct pl_sample_src src = {
1659         .components = img->comps,
1660         .new_w      = abs(pl_rect_w(pass->dst_rect)),
1661         .new_h      = abs(pl_rect_h(pass->dst_rect)),
1662         .rect       = img->rect,
1663     };
1664 
1665     const struct pl_frame *image = &pass->image;
1666     bool need_fbo = image->num_overlays > 0;
1667     need_fbo |= rr->peak_detect_state && !params->allow_delayed_peak_detect;
1668 
1669     // Force FBO indirection if this shader is non-resizable
1670     int out_w, out_h;
1671     if (img->sh && pl_shader_output_size(img->sh, &out_w, &out_h))
1672         need_fbo |= out_w != src.new_w || out_h != src.new_h;
1673 
1674     struct sampler_info info = sample_src_info(pass, &src);
1675     bool use_sigmoid = info.dir == SAMPLER_UP && params->sigmoid_params;
1676     bool use_linear  = use_sigmoid || info.dir == SAMPLER_DOWN;
1677 
1678     // We need to enable the full rendering pipeline if there are any user
1679     // shaders / hooks that might depend on it.
1680     uint64_t scaling_hooks = PL_HOOK_PRE_OVERLAY | PL_HOOK_PRE_KERNEL |
1681                              PL_HOOK_POST_KERNEL;
1682     uint64_t linear_hooks = PL_HOOK_LINEAR | PL_HOOK_SIGMOID;
1683 
1684     for (int i = 0; i < params->num_hooks; i++) {
1685         if (params->hooks[i]->stages & (scaling_hooks | linear_hooks)) {
1686             need_fbo = true;
1687             if (params->hooks[i]->stages & linear_hooks)
1688                 use_linear = true;
1689             if (params->hooks[i]->stages & PL_HOOK_SIGMOID)
1690                 use_sigmoid = true;
1691         }
1692     }
1693 
1694     if (info.dir == SAMPLER_NOOP && !need_fbo) {
1695         pl_assert(src.new_w == img->w && src.new_h == img->h);
1696         PL_TRACE(rr, "Skipping main scaler (would be no-op)");
1697         return true;
1698     }
1699 
1700     if (info.type == SAMPLER_DIRECT && !need_fbo) {
1701         img->w = src.new_w;
1702         img->h = src.new_h;
1703         PL_TRACE(rr, "Skipping main scaler (free sampling)");
1704         return true;
1705     }
1706 
1707     // Hard-disable both sigmoidization and linearization when required
1708     if (params->disable_linear_scaling || rr->disable_linear_sdr)
1709         use_sigmoid = use_linear = false;
1710 
1711     // Avoid sigmoidization for HDR content because it clips to [0,1]
1712     if (pl_color_transfer_is_hdr(img->color.transfer)) {
1713         use_sigmoid = false;
1714         // Also disable linearization if necessary
1715         if (rr->disable_linear_hdr)
1716             use_linear = false;
1717     }
1718 
1719     if (use_linear) {
1720         pl_shader_linearize(img_sh(pass, img), img->color);
1721         img->color.transfer = PL_COLOR_TRC_LINEAR;
1722         pass_hook(pass, img, PL_HOOK_LINEAR);
1723     }
1724 
1725     if (use_sigmoid) {
1726         pl_shader_sigmoidize(img_sh(pass, img), params->sigmoid_params);
1727         pass_hook(pass, img, PL_HOOK_SIGMOID);
1728     }
1729 
1730     pass_hook(pass, img, PL_HOOK_PRE_OVERLAY);
1731 
1732     img->tex = img_tex(pass, img);
1733     if (!img->tex)
1734         return false;
1735 
1736     // Draw overlay on top of the intermediate image if needed, accounting
1737     // for possible stretching needed due to mismatch between the ref and src
1738     struct pl_transform2x2 tf = pl_transform2x2_identity;
1739     if (!pl_rect2d_eq(img->rect, image->crop)) {
1740         float rx = pl_rect_w(img->rect) / pl_rect_w(image->crop),
1741               ry = pl_rect_w(img->rect) / pl_rect_w(image->crop);
1742 
1743         tf = (struct pl_transform2x2) {
1744             .mat = {{{ rx, 0.0 }, { 0.0, ry }}},
1745             .c = {
1746                 img->rect.x0 - image->crop.x0 * rx,
1747                 img->rect.y0 - image->crop.y0 * ry
1748             },
1749         };
1750     }
1751 
1752     draw_overlays(pass, img->tex, img->comps, NULL, image->overlays,
1753                   image->num_overlays, img->color, img->repr, use_sigmoid, &tf);
1754 
1755     pass_hook(pass, img, PL_HOOK_PRE_KERNEL);
1756 
1757     src.tex = img_tex(pass, img);
1758     pl_shader sh = pl_dispatch_begin_ex(rr->dp, true);
1759     dispatch_sampler(pass, sh, &rr->sampler_main, false, &src);
1760     *img = (struct img) {
1761         .sh     = sh,
1762         .w      = src.new_w,
1763         .h      = src.new_h,
1764         .repr   = img->repr,
1765         .rect   = { 0, 0, src.new_w, src.new_h },
1766         .color  = img->color,
1767         .comps  = img->comps,
1768     };
1769 
1770     pass_hook(pass, img, PL_HOOK_POST_KERNEL);
1771 
1772     if (use_sigmoid)
1773         pl_shader_unsigmoidize(img_sh(pass, img), params->sigmoid_params);
1774 
1775     pass_hook(pass, img, PL_HOOK_SCALED);
1776     return true;
1777 }
1778 
1779 #define CLEAR_COL(params)                                                       \
1780     (float[4]) {                                                                \
1781         (params)->background_color[0],                                          \
1782         (params)->background_color[1],                                          \
1783         (params)->background_color[2],                                          \
1784         1.0 - (params)->background_transparency,                                \
1785     }
1786 
pass_output_target(struct pass_state * pass)1787 static bool pass_output_target(struct pass_state *pass)
1788 {
1789     const struct pl_render_params *params = pass->params;
1790     const struct pl_frame *image = &pass->image;
1791     const struct pl_frame *target = &pass->target;
1792     pl_renderer rr = pass->rr;
1793 
1794     struct img *img = &pass->img;
1795     pl_shader sh = img_sh(pass, img);
1796 
1797     // Color management
1798     bool prelinearized = false;
1799     bool need_conversion = true;
1800     assert(image->color.primaries == img->color.primaries);
1801     assert(image->color.light == img->color.light);
1802     if (img->color.transfer == PL_COLOR_TRC_LINEAR)
1803         prelinearized = true;
1804 
1805     bool need_icc = !params->ignore_icc_profiles &&
1806                     (image->profile.data || target->profile.data) &&
1807                     !pl_icc_profile_equal(&image->profile, &target->profile);
1808 
1809     if (params->force_icc_lut || params->force_3dlut)
1810         need_icc |= !pl_color_space_equal(&image->color, &target->color);
1811     need_icc &= !rr->disable_icc;
1812 
1813     if (params->lut) {
1814         struct pl_color_space lut_in = params->lut->color_in;
1815         struct pl_color_space lut_out = params->lut->color_out;
1816         switch (params->lut_type) {
1817         case PL_LUT_UNKNOWN:
1818         case PL_LUT_NATIVE:
1819             pl_color_space_merge(&lut_in, &image->color);
1820             pl_color_space_merge(&lut_out, &image->color);
1821             break;
1822         case PL_LUT_CONVERSION:
1823             pl_color_space_merge(&lut_in, &image->color);
1824             pl_color_space_merge(&lut_out, &target->color);
1825             // Conversion LUT the highest priority
1826             need_icc = false;
1827             need_conversion = false;
1828             break;
1829         case PL_LUT_NORMALIZED:
1830             if (!prelinearized) {
1831                 // PL_LUT_NORMALIZED wants linear input data
1832                 pl_shader_linearize(sh, img->color);
1833                 img->color.transfer = PL_COLOR_TRC_LINEAR;
1834                 prelinearized = true;
1835             }
1836             pl_color_space_merge(&lut_in, &img->color);
1837             pl_color_space_merge(&lut_out, &img->color);
1838             break;
1839         }
1840 
1841         pl_shader_color_map(sh, params->color_map_params, image->color, lut_in,
1842                             NULL, prelinearized);
1843 
1844         if (params->lut_type == PL_LUT_NORMALIZED) {
1845             GLSLF("color.rgb *= vec3(1.0/%s); \n",
1846                   SH_FLOAT(pl_color_transfer_nominal_peak(lut_in.transfer)));
1847         }
1848 
1849         pl_shader_custom_lut(sh, params->lut, &rr->lut_state[LUT_PARAMS]);
1850 
1851         if (params->lut_type == PL_LUT_NORMALIZED) {
1852             GLSLF("color.rgb *= vec3(%s); \n",
1853                   SH_FLOAT(pl_color_transfer_nominal_peak(lut_out.transfer)));
1854         }
1855 
1856         if (params->lut_type != PL_LUT_CONVERSION) {
1857             pl_shader_color_map(sh, params->color_map_params, lut_out, img->color,
1858                                 NULL, false);
1859         }
1860     }
1861 
1862 #ifdef PL_HAVE_LCMS
1863 
1864     if (need_icc) {
1865         struct pl_icc_color_space src = {
1866             .color = image->color,
1867             .profile = image->profile,
1868         };
1869 
1870         struct pl_icc_color_space dst = {
1871             .color = target->color,
1872             .profile = target->profile,
1873         };
1874 
1875         if (params->ignore_icc_profiles)
1876             src.profile = dst.profile = (struct pl_icc_profile) {0};
1877 
1878         struct pl_icc_result res;
1879         bool ok = pl_icc_update(sh, &src, &dst, &rr->icc_state, &res,
1880                                 PL_DEF(params->icc_params, params->lut3d_params));
1881         if (!ok) {
1882             rr->disable_icc = true;
1883             goto fallback;
1884         }
1885 
1886         // current -> ICC in
1887         pl_shader_color_map(sh, params->color_map_params, image->color,
1888                             res.src_color, &rr->peak_detect_state, prelinearized);
1889         // ICC in -> ICC out
1890         pl_icc_apply(sh, &rr->icc_state);
1891         // ICC out -> target
1892         pl_shader_color_map(sh, params->color_map_params, res.dst_color,
1893                             target->color, NULL, false);
1894 
1895         need_conversion = false;
1896     }
1897 
1898 fallback:
1899 
1900 #else // !PL_HAVE_LCMS
1901 
1902     if (need_icc) {
1903         PL_WARN(rr, "An ICC profile was set, but libplacebo is built without "
1904                 "support for LittleCMS! Disabling..");
1905         rr->disable_icc = true;
1906     }
1907 
1908 #endif
1909 
1910     if (need_conversion) {
1911         // current -> target
1912         pl_shader_color_map(sh, params->color_map_params, image->color,
1913                             target->color, &rr->peak_detect_state, prelinearized);
1914     }
1915 
1916     // Apply color blindness simulation if requested
1917     if (params->cone_params)
1918         pl_shader_cone_distort(sh, target->color, params->cone_params);
1919 
1920     enum pl_lut_type lut_type = guess_frame_lut_type(target, true);
1921     if (lut_type == PL_LUT_NORMALIZED || lut_type == PL_LUT_CONVERSION)
1922         pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]);
1923 
1924     // Apply the color scale separately, after encoding is done, to make sure
1925     // that the intermediate FBO (if any) has the correct precision.
1926     struct pl_color_repr repr = target->repr;
1927     float scale = pl_color_repr_normalize(&repr);
1928     if (lut_type != PL_LUT_CONVERSION)
1929         pl_shader_encode_color(sh, &repr);
1930     if (lut_type == PL_LUT_NATIVE)
1931         pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]);
1932     pass_hook(pass, img, PL_HOOK_OUTPUT);
1933     sh = NULL;
1934 
1935     const struct pl_plane *ref = &target->planes[pass->dst_ref];
1936     bool flipped_x = pass->dst_rect.x1 < pass->dst_rect.x0,
1937          flipped_y = pass->dst_rect.y1 < pass->dst_rect.y0;
1938 
1939     if (!params->skip_target_clearing && pl_frame_is_cropped(target))
1940         pl_frame_clear_rgba(rr->gpu, target, CLEAR_COL(params));
1941 
1942     for (int p = 0; p < target->num_planes; p++) {
1943         const struct pl_plane *plane = &target->planes[p];
1944         float rx = (float) plane->texture->params.w / ref->texture->params.w,
1945               ry = (float) plane->texture->params.h / ref->texture->params.h;
1946 
1947         // Only accept integer scaling ratios. This accounts for the fact
1948         // that fractionally subsampled planes get rounded up to the
1949         // nearest integer size, which we want to over-render.
1950         float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
1951               rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
1952         float sx = plane->shift_x, sy = plane->shift_y;
1953 
1954         struct pl_rect2df dst_rectf = {
1955             .x0 = (pass->dst_rect.x0 - sx) * rrx,
1956             .y0 = (pass->dst_rect.y0 - sy) * rry,
1957             .x1 = (pass->dst_rect.x1 - sx) * rrx,
1958             .y1 = (pass->dst_rect.y1 - sy) * rry,
1959         };
1960 
1961         // Normalize to make the math easier
1962         pl_rect2df_normalize(&dst_rectf);
1963 
1964         // Round the output rect
1965         int rx0 = floorf(dst_rectf.x0), ry0 = floorf(dst_rectf.y0),
1966             rx1 =  ceilf(dst_rectf.x1), ry1 =  ceilf(dst_rectf.y1);
1967 
1968         PL_TRACE(rr, "Subsampled target %d: {%f %f %f %f} -> {%d %d %d %d}",
1969                  p, dst_rectf.x0, dst_rectf.y0,
1970                  dst_rectf.x1, dst_rectf.y1,
1971                  rx0, ry0, rx1, ry1);
1972 
1973         if (target->num_planes > 1) {
1974 
1975             // Planar input, so we need to sample from an intermediate FBO
1976             struct pl_sample_src src = {
1977                 .tex        = img_tex(pass, img),
1978                 .new_w      = rx1 - rx0,
1979                 .new_h      = ry1 - ry0,
1980                 .rect = {
1981                     .x0 = (rx0 - dst_rectf.x0) / rrx,
1982                     .x1 = (rx1 - dst_rectf.x0) / rrx,
1983                     .y0 = (ry0 - dst_rectf.y0) / rry,
1984                     .y1 = (ry1 - dst_rectf.y0) / rry,
1985                 },
1986             };
1987 
1988             if (!src.tex) {
1989                 PL_ERR(rr, "Output requires multiple planes, but FBOs are "
1990                        "unavailable. This combination is unsupported.");
1991                 return false;
1992             }
1993 
1994             PL_TRACE(rr, "Sampling %dx%d img aligned from {%f %f %f %f}",
1995                      pass->img.w, pass->img.h,
1996                      src.rect.x0, src.rect.y0,
1997                      src.rect.x1, src.rect.y1);
1998 
1999             for (int c = 0; c < plane->components; c++) {
2000                 if (plane->component_mapping[c] < 0)
2001                     continue;
2002                 src.component_mask |= 1 << plane->component_mapping[c];
2003             }
2004 
2005             sh = pl_dispatch_begin(rr->dp);
2006             dispatch_sampler(pass, sh, &rr->samplers_dst[p],
2007                              !plane->texture->params.storable, &src);
2008 
2009         } else {
2010 
2011             // Single plane, so we can directly re-use the img shader unless
2012             // it's incompatible with the FBO capabilities
2013             bool is_comp = pl_shader_is_compute(img_sh(pass, img));
2014             if (is_comp && !plane->texture->params.storable) {
2015                 if (!img_tex(pass, img)) {
2016                     PL_ERR(rr, "Rendering requires compute shaders, but output "
2017                            "is not storable, and FBOs are unavailable. This "
2018                            "combination is unsupported.");
2019                     return false;
2020                 }
2021             }
2022 
2023             sh = img_sh(pass, img);
2024             img->sh = NULL;
2025 
2026         }
2027 
2028         GLSL("color *= vec4(1.0 / %s); \n", SH_FLOAT(scale));
2029         swizzle_color(sh, plane->components, plane->component_mapping, false);
2030 
2031         if (params->dither_params) {
2032             // Ignore dithering for > 16-bit FBOs by default, since it makes
2033             // little sense to do so (and probably just adds errors)
2034             int depth = repr.bits.sample_depth;
2035             if (depth && (depth <= 16 || params->force_dither))
2036                 pl_shader_dither(sh, depth, &rr->dither_state, params->dither_params);
2037         }
2038 
2039         bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
2040             .shader = &sh,
2041             .target = plane->texture,
2042             .blend_params = params->blend_params,
2043             .rect = {
2044                 .x0 = flipped_x ? rx1 : rx0,
2045                 .y0 = flipped_y ? ry1 : ry0,
2046                 .x1 = flipped_x ? rx0 : rx1,
2047                 .y1 = flipped_y ? ry0 : ry1,
2048             },
2049         });
2050 
2051         if (!ok)
2052             return false;
2053 
2054         // Render any overlays, including overlays that need to be rendered
2055         // from the `image` itself, but which couldn't be rendered as
2056         // part of the intermediate scaling pass due to missing FBOs.
2057         if (image->num_overlays > 0 && !FBOFMT(img->comps)) {
2058             // The original image dimensions need to be scaled by the effective
2059             // end-to-end scaling ratio to compensate for the mismatch in
2060             // pixel coordinates between the image and target.
2061             float scale_x = pl_rect_w(dst_rectf) / pl_rect_w(image->crop),
2062                   scale_y = pl_rect_h(dst_rectf) / pl_rect_h(image->crop);
2063 
2064             struct pl_transform2x2 iscale = {
2065                 .mat = {{{ scale_x, 0.0 }, { 0.0, scale_y }}},
2066                 .c = {
2067                     // If the image was rendered with an offset relative to the
2068                     // target crop, we also need to shift the overlays.
2069                     dst_rectf.x0 - image->crop.x0 * scale_x,
2070                     dst_rectf.y0 - image->crop.y0 * scale_y,
2071                 },
2072             };
2073 
2074             draw_overlays(pass, plane->texture, plane->components,
2075                           plane->component_mapping, image->overlays,
2076                           image->num_overlays, target->color, target->repr,
2077                           false, &iscale);
2078         }
2079 
2080         struct pl_transform2x2 tscale = {
2081             .mat = {{{ rrx, 0.0 }, { 0.0, rry }}},
2082             .c = { -sx, -sy },
2083         };
2084 
2085         draw_overlays(pass, plane->texture, plane->components,
2086                       plane->component_mapping, target->overlays,
2087                       target->num_overlays, target->color, target->repr,
2088                       false, &tscale);
2089     }
2090 
2091     *img = (struct img) {0};
2092     return true;
2093 }
2094 
2095 #define require(expr)                                                           \
2096   do {                                                                          \
2097       if (!(expr)) {                                                            \
2098           PL_ERR(rr, "Validation failed: %s (%s:%d)",                           \
2099                   #expr, __FILE__, __LINE__);                                   \
2100           return false;                                                         \
2101       }                                                                         \
2102   } while (0)
2103 
2104 #define validate_plane(plane, param)                                            \
2105   do {                                                                          \
2106       require((plane).texture);                                                 \
2107       require((plane).texture->params.param);                                   \
2108       require((plane).components > 0 && (plane).components <= 4);               \
2109       for (int c = 0; c < (plane).components; c++) {                            \
2110           require((plane).component_mapping[c] >= PL_CHANNEL_NONE &&            \
2111                   (plane).component_mapping[c] <= PL_CHANNEL_A);                \
2112       }                                                                         \
2113   } while (0)
2114 
2115 #define validate_overlay(overlay)                                               \
2116   do {                                                                          \
2117       require(!(overlay).tex ^ !(overlay).plane.texture);                       \
2118       if ((overlay).tex) {                                                      \
2119           require((overlay).tex->params.sampleable);                            \
2120           require((overlay).num_parts >= 0);                                    \
2121           for (int n = 0; n < (overlay).num_parts; n++) {                       \
2122               const struct pl_overlay_part *p = &(overlay).parts[n];            \
2123               require(pl_rect_w(p->dst) && pl_rect_h(p->dst));                  \
2124           }                                                                     \
2125       } else {                                                                  \
2126           require((overlay).num_parts == 0);                                    \
2127           require((overlay).plane.texture->params.sampleable);                  \
2128           require(pl_rect_w((overlay).rect) && pl_rect_h((overlay).rect));      \
2129       }                                                                         \
2130   } while (0)
2131 
2132 // Perform some basic validity checks on incoming structs to help catch invalid
2133 // API usage. This is not an exhaustive check. In particular, enums are not
2134 // bounds checked. This is because most functions accepting enums already
2135 // abort() in the default case, and because it's not the intent of this check
2136 // to catch all instances of memory corruption - just common logic bugs.
validate_structs(pl_renderer rr,const struct pl_frame * image,const struct pl_frame * target)2137 static bool validate_structs(pl_renderer rr,
2138                              const struct pl_frame *image,
2139                              const struct pl_frame *target)
2140 {
2141     // Rendering to/from a frame with no planes is technically allowed, but so
2142     // pointless that it's more likely to be a user error worth catching.
2143     require(image->num_planes > 0 && image->num_planes <= PL_MAX_PLANES);
2144     require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES);
2145     for (int i = 0; i < image->num_planes; i++)
2146         validate_plane(image->planes[i], sampleable);
2147     for (int i = 0; i < target->num_planes; i++)
2148         validate_plane(target->planes[i], renderable);
2149 
2150     float src_w = pl_rect_w(image->crop), src_h = pl_rect_h(image->crop);
2151     float dst_w = pl_rect_w(target->crop), dst_h = pl_rect_h(target->crop);
2152     require(!src_w == !src_h);
2153     require(!dst_w == !dst_h);
2154 
2155     require(image->num_overlays >= 0);
2156     require(target->num_overlays >= 0);
2157     for (int i = 0; i < image->num_overlays; i++)
2158         validate_overlay(image->overlays[i]);
2159     for (int i = 0; i < target->num_overlays; i++)
2160         validate_overlay(target->overlays[i]);
2161 
2162     return true;
2163 }
2164 
detect_plane_type(const struct pl_plane * plane,const struct pl_color_repr * repr)2165 static inline enum plane_type detect_plane_type(const struct pl_plane *plane,
2166                                                 const struct pl_color_repr *repr)
2167 {
2168     if (pl_color_system_is_ycbcr_like(repr->sys)) {
2169         int t = PLANE_INVALID;
2170         for (int c = 0; c < plane->components; c++) {
2171             switch (plane->component_mapping[c]) {
2172             case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue;
2173             case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue;
2174 
2175             case PL_CHANNEL_CB:
2176             case PL_CHANNEL_CR:
2177                 t = PL_MAX(t, PLANE_CHROMA);
2178                 continue;
2179 
2180             default: continue;
2181             }
2182         }
2183 
2184         pl_assert(t);
2185         return t;
2186     }
2187 
2188     // Extra test for exclusive / separated alpha plane
2189     if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A)
2190         return PLANE_ALPHA;
2191 
2192     switch (repr->sys) {
2193     case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB
2194     case PL_COLOR_SYSTEM_RGB: return PLANE_RGB;
2195     case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ;
2196 
2197     // For the switch completeness check
2198     case PL_COLOR_SYSTEM_BT_601:
2199     case PL_COLOR_SYSTEM_BT_709:
2200     case PL_COLOR_SYSTEM_SMPTE_240M:
2201     case PL_COLOR_SYSTEM_BT_2020_NC:
2202     case PL_COLOR_SYSTEM_BT_2020_C:
2203     case PL_COLOR_SYSTEM_BT_2100_PQ:
2204     case PL_COLOR_SYSTEM_BT_2100_HLG:
2205     case PL_COLOR_SYSTEM_YCGCO:
2206     case PL_COLOR_SYSTEM_COUNT:
2207         break;
2208     }
2209 
2210     pl_unreachable();
2211 }
2212 
default_rect(struct pl_rect2df * rc,const struct pl_rect2df * backup)2213 static inline void default_rect(struct pl_rect2df *rc,
2214                                 const struct pl_rect2df *backup)
2215 {
2216     if (!rc->x0 && !rc->y0 && !rc->x1 && !rc->y1)
2217         *rc = *backup;
2218 }
2219 
fix_refs_and_rects(struct pass_state * pass)2220 static void fix_refs_and_rects(struct pass_state *pass)
2221 {
2222     struct pl_frame *image = &pass->image;
2223     struct pl_frame *target = &pass->target;
2224 
2225     // Find the ref planes
2226     for (int i = 0; i < image->num_planes; i++) {
2227         pass->src_type[i] = detect_plane_type(&image->planes[i], &image->repr);
2228         switch (pass->src_type[i]) {
2229         case PLANE_RGB:
2230         case PLANE_LUMA:
2231         case PLANE_XYZ:
2232             pass->src_ref = i;
2233             break;
2234         case PLANE_CHROMA:
2235         case PLANE_ALPHA:
2236             break;
2237         case PLANE_INVALID:
2238             pl_unreachable();
2239         }
2240     }
2241 
2242     for (int i = 0; i < target->num_planes; i++) {
2243         pass->dst_type[i] = detect_plane_type(&target->planes[i], &target->repr);
2244         switch (pass->dst_type[i]) {
2245         case PLANE_RGB:
2246         case PLANE_LUMA:
2247         case PLANE_XYZ:
2248             pass->dst_ref = i;
2249             break;
2250         case PLANE_CHROMA:
2251         case PLANE_ALPHA:
2252             break;
2253         case PLANE_INVALID:
2254             pl_unreachable();
2255         }
2256     }
2257 
2258     // Fix the rendering rects
2259     struct pl_rect2df *src = &image->crop, *dst = &target->crop;
2260     pl_tex src_ref = pass->image.planes[pass->src_ref].texture;
2261     pl_tex dst_ref = pass->target.planes[pass->dst_ref].texture;
2262 
2263     if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) {
2264         src->x1 = src_ref->params.w;
2265         src->y1 = src_ref->params.h;
2266     };
2267 
2268     if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) {
2269         dst->x1 = dst_ref->params.w;
2270         dst->y1 = dst_ref->params.h;
2271     }
2272 
2273     // Keep track of whether the end-to-end rendering is flipped
2274     bool flipped_x = (src->x0 > src->x1) != (dst->x0 > dst->x1),
2275          flipped_y = (src->y0 > src->y1) != (dst->y0 > dst->y1);
2276 
2277     // Normalize both rects to make the math easier
2278     pl_rect2df_normalize(src);
2279     pl_rect2df_normalize(dst);
2280 
2281     // Round the output rect and clip it to the framebuffer dimensions
2282     float rx0 = roundf(PL_MAX(dst->x0, 0.0)),
2283           ry0 = roundf(PL_MAX(dst->y0, 0.0)),
2284           rx1 = roundf(PL_MIN(dst->x1, dst_ref->params.w)),
2285           ry1 = roundf(PL_MIN(dst->y1, dst_ref->params.h));
2286 
2287     // Adjust the src rect corresponding to the rounded crop
2288     float scale_x = pl_rect_w(*src) / pl_rect_w(*dst),
2289           scale_y = pl_rect_h(*src) / pl_rect_h(*dst),
2290           base_x = src->x0,
2291           base_y = src->y0;
2292 
2293     src->x0 = base_x + (rx0 - dst->x0) * scale_x;
2294     src->x1 = base_x + (rx1 - dst->x0) * scale_x;
2295     src->y0 = base_y + (ry0 - dst->y0) * scale_y;
2296     src->y1 = base_y + (ry1 - dst->y0) * scale_y;
2297 
2298     // Update dst_rect to the rounded values and re-apply flip if needed. We
2299     // always do this in the `dst` rather than the `src`` because this allows
2300     // e.g. polar sampling compute shaders to work.
2301     *dst = (struct pl_rect2df) {
2302         .x0 = flipped_x ? rx1 : rx0,
2303         .y0 = flipped_y ? ry1 : ry0,
2304         .x1 = flipped_x ? rx0 : rx1,
2305         .y1 = flipped_y ? ry0 : ry1,
2306     };
2307 
2308     // Copies of the above, for convenience
2309     pass->ref_rect = *src;
2310     pass->dst_rect = (struct pl_rect2d) {
2311         dst->x0, dst->y0, dst->x1, dst->y1,
2312     };
2313 }
2314 
frame_ref(const struct pl_frame * frame)2315 static pl_tex frame_ref(const struct pl_frame *frame)
2316 {
2317     pl_assert(frame->num_planes);
2318     for (int i = 0; i < frame->num_planes; i++) {
2319         switch (detect_plane_type(&frame->planes[i], &frame->repr)) {
2320         case PLANE_RGB:
2321         case PLANE_LUMA:
2322         case PLANE_XYZ:
2323             return frame->planes[i].texture;
2324         case PLANE_CHROMA:
2325         case PLANE_ALPHA:
2326             continue;
2327         case PLANE_INVALID:
2328             pl_unreachable();
2329         }
2330     }
2331 
2332     return frame->planes[0].texture;
2333 }
2334 
fix_color_space(struct pl_frame * frame)2335 static void fix_color_space(struct pl_frame *frame)
2336 {
2337     pl_tex tex = frame_ref(frame);
2338 
2339     // If the primaries are not known, guess them based on the resolution
2340     if (!frame->color.primaries)
2341         frame->color.primaries = pl_color_primaries_guess(tex->params.w, tex->params.h);
2342 
2343     pl_color_space_infer(&frame->color);
2344 
2345     // For UNORM formats, we can infer the sampled bit depth from the texture
2346     // itself. This is ignored for other format types, because the logic
2347     // doesn't really work out for them anyways, and it's best not to do
2348     // anything too crazy unless the user provides explicit details.
2349     struct pl_bit_encoding *bits = &frame->repr.bits;
2350     if (!bits->sample_depth && tex->params.format->type == PL_FMT_UNORM) {
2351         // Just assume the first component's depth is canonical. This works in
2352         // practice, since for cases like rgb565 we want to use the lower depth
2353         // anyway. Plus, every format has at least one component.
2354         bits->sample_depth = tex->params.format->component_depth[0];
2355 
2356         // If we don't know the color depth, assume it spans the full range of
2357         // the texture. Otherwise, clamp it to the texture depth.
2358         bits->color_depth = PL_DEF(bits->color_depth, bits->sample_depth);
2359         bits->color_depth = PL_MIN(bits->color_depth, bits->sample_depth);
2360 
2361         // If the texture depth is higher than the known color depth, assume
2362         // the colors were left-shifted.
2363         bits->bit_shift += bits->sample_depth - bits->color_depth;
2364     }
2365 }
2366 
pass_infer_state(struct pass_state * pass)2367 static bool pass_infer_state(struct pass_state *pass)
2368 {
2369     // Backwards compatibility hacks
2370     struct pl_frame *image = &pass->image;
2371     struct pl_frame *target = &pass->target;
2372     default_rect(&image->crop, &image->src_rect);
2373     default_rect(&target->crop, &target->dst_rect);
2374 
2375     if (!target->num_planes && target->fbo) {
2376         target->num_planes = 1;
2377         target->planes[0] = (struct pl_plane) {
2378             .texture = target->fbo,
2379             .components = target->fbo->params.format->num_components,
2380             .component_mapping = {0, 1, 2, 3},
2381         };
2382     }
2383 
2384     if (!validate_structs(pass->rr, image, target))
2385         return false;
2386 
2387     fix_refs_and_rects(pass);
2388     fix_color_space(image);
2389 
2390     // Infer the target color space info based on the image's
2391     pl_color_space_infer_ref(&target->color, &image->color);
2392     fix_color_space(target);
2393     return true;
2394 }
2395 
draw_empty_overlays(pl_renderer rr,const struct pl_frame * ptarget,const struct pl_render_params * params)2396 static bool draw_empty_overlays(pl_renderer rr,
2397                                 const struct pl_frame *ptarget,
2398                                 const struct pl_render_params *params)
2399 {
2400     if (!params->skip_target_clearing)
2401         pl_frame_clear_rgba(rr->gpu, ptarget, CLEAR_COL(params));
2402 
2403     if (!ptarget->num_overlays)
2404         return true;
2405 
2406     struct pass_state pass = {
2407         .rr = rr,
2408         .params = params,
2409         .target = *ptarget,
2410         .info.stage = PL_RENDER_STAGE_FRAME,
2411     };
2412 
2413     struct pl_frame *target = &pass.target;
2414     require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES);
2415     for (int i = 0; i < target->num_planes; i++)
2416         validate_plane(target->planes[i], renderable);
2417     require(target->num_overlays >= 0);
2418     for (int i = 0; i < target->num_overlays; i++)
2419         validate_overlay(target->overlays[i]);
2420     fix_color_space(target);
2421 
2422     pl_dispatch_callback(rr->dp, &pass, info_callback);
2423     pl_dispatch_reset_frame(rr->dp);
2424 
2425     pl_tex ref = frame_ref(target);
2426     for (int p = 0; p < target->num_planes; p++) {
2427         const struct pl_plane *plane = &target->planes[p];
2428         // Math replicated from `pass_output_target`
2429         float rx = (float) plane->texture->params.w / ref->params.w,
2430               ry = (float) plane->texture->params.h / ref->params.h;
2431         float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
2432               rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
2433         float sx = plane->shift_x, sy = plane->shift_y;
2434 
2435         struct pl_transform2x2 tscale = {
2436             .mat = {{{ rrx, 0.0 }, { 0.0, rry }}},
2437             .c = { -sx, -sy },
2438         };
2439 
2440         draw_overlays(&pass, plane->texture, plane->components,
2441                       plane->component_mapping, target->overlays,
2442                       target->num_overlays, target->color, target->repr,
2443                       false, &tscale);
2444     }
2445 
2446     return true;
2447 }
2448 
pl_render_image(pl_renderer rr,const struct pl_frame * pimage,const struct pl_frame * ptarget,const struct pl_render_params * params)2449 bool pl_render_image(pl_renderer rr, const struct pl_frame *pimage,
2450                      const struct pl_frame *ptarget,
2451                      const struct pl_render_params *params)
2452 {
2453     params = PL_DEF(params, &pl_render_default_params);
2454     pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants);
2455     if (!pimage)
2456         return draw_empty_overlays(rr, ptarget, params);
2457 
2458     struct pass_state pass = {
2459         .rr = rr,
2460         .params = params,
2461         .image = *pimage,
2462         .target = *ptarget,
2463         .info.stage = PL_RENDER_STAGE_FRAME,
2464     };
2465 
2466     if (!pass_infer_state(&pass))
2467         return false;
2468 
2469     pass.tmp = pl_tmp(NULL),
2470     pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool));
2471 
2472     pl_dispatch_callback(rr->dp, &pass, info_callback);
2473     pl_dispatch_reset_frame(rr->dp);
2474 
2475     for (int i = 0; i < params->num_hooks; i++) {
2476         if (params->hooks[i]->reset)
2477             params->hooks[i]->reset(params->hooks[i]->priv);
2478     }
2479 
2480     if (!pass_read_image(&pass))
2481         goto error;
2482 
2483     if (!pass_scale_main(&pass))
2484         goto error;
2485 
2486     if (!pass_output_target(&pass))
2487         goto error;
2488 
2489     pl_free(pass.tmp);
2490     return true;
2491 
2492 error:
2493     pl_dispatch_abort(rr->dp, &pass.img.sh);
2494     pl_free(pass.tmp);
2495     PL_ERR(rr, "Failed rendering image!");
2496     return false;
2497 }
2498 
render_params_hash(const struct pl_render_params * params_orig)2499 static uint64_t render_params_hash(const struct pl_render_params *params_orig)
2500 {
2501     struct pl_render_params params = *params_orig;
2502     uint64_t hash = 0;
2503 
2504 #define HASH_PTR(ptr)                                                           \
2505     do {                                                                        \
2506         if (ptr) {                                                              \
2507             pl_hash_merge(&hash, pl_mem_hash(ptr, sizeof(*ptr)));               \
2508             ptr = NULL;                                                         \
2509         }                                                                       \
2510     } while (0)
2511 
2512 #define HASH_FILTER(scaler)                                                     \
2513     do {                                                                        \
2514         if (scaler) {                                                           \
2515             struct pl_filter_config filter = *scaler;                           \
2516             HASH_PTR(filter.kernel);                                            \
2517             HASH_PTR(filter.window);                                            \
2518             pl_hash_merge(&hash, pl_mem_hash(&filter, sizeof(filter)));         \
2519             scaler = NULL;                                                      \
2520         }                                                                       \
2521     } while (0)
2522 
2523     HASH_FILTER(params.upscaler);
2524     HASH_FILTER(params.downscaler);
2525     HASH_FILTER(params.frame_mixer);
2526 
2527     HASH_PTR(params.deband_params);
2528     HASH_PTR(params.sigmoid_params);
2529     HASH_PTR(params.color_adjustment);
2530     HASH_PTR(params.peak_detect_params);
2531     HASH_PTR(params.color_map_params);
2532     HASH_PTR(params.dither_params);
2533     HASH_PTR(params.cone_params);
2534     HASH_PTR(params.blend_params);
2535 
2536 #ifdef PL_HAVE_LCMS
2537     HASH_PTR(params.icc_params);
2538     HASH_PTR(params.lut3d_params);
2539 #endif
2540 
2541     // Hash all hooks
2542     for (int i = 0; i < params.num_hooks; i++)
2543         pl_hash_merge(&hash, pl_mem_hash(&params.hooks[i], sizeof(params.hooks[i])));
2544     params.hooks = NULL;
2545 
2546     // Hash the LUT by only looking at the signature
2547     if (params.lut) {
2548         pl_hash_merge(&hash, params.lut->signature);
2549         params.lut = NULL;
2550     }
2551 
2552     pl_hash_merge(&hash, pl_mem_hash(&params, sizeof(params)));
2553     return hash;
2554 }
2555 
2556 #define MAX_MIX_FRAMES 16
2557 
pl_render_image_mix(pl_renderer rr,const struct pl_frame_mix * images,const struct pl_frame * ptarget,const struct pl_render_params * params)2558 bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images,
2559                          const struct pl_frame *ptarget,
2560                          const struct pl_render_params *params)
2561 {
2562     if (!images->num_frames)
2563         return pl_render_image(rr, NULL, ptarget, params);
2564 
2565     params = PL_DEF(params, &pl_render_default_params);
2566     uint64_t params_hash = render_params_hash(params);
2567     pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants);
2568 
2569     require(images->num_frames >= 1);
2570     for (int i = 0; i < images->num_frames - 1; i++)
2571         require(images->timestamps[i] <= images->timestamps[i+1]);
2572 
2573     // As the canonical reference, find the nearest neighbour frame
2574     const struct pl_image *refimg = images->frames[0];
2575     float best = fabs(images->timestamps[0]);
2576     for (int i = 1; i < images->num_frames; i++) {
2577         float dist = fabs(images->timestamps[i]);
2578         if (dist < best) {
2579             refimg = images->frames[i];
2580             best = dist;
2581             continue;
2582         } else {
2583             break;
2584         }
2585     }
2586 
2587     struct pass_state pass = {
2588         .rr = rr,
2589         .params = params,
2590         .image = *refimg,
2591         .target = *ptarget,
2592         .info.stage = PL_RENDER_STAGE_BLEND,
2593     };
2594 
2595     if (!params->frame_mixer || rr->disable_mixing || !FBOFMT(4))
2596         goto fallback;
2597 
2598     // Can't reasonably interpolate a single image, so just directly render it
2599     if (images->num_frames == 1)
2600         goto fallback;
2601 
2602     if (!pass_infer_state(&pass))
2603         return false;
2604 
2605     int out_w = abs(pl_rect_w(pass.dst_rect)),
2606         out_h = abs(pl_rect_h(pass.dst_rect));
2607 
2608     int fidx = 0;
2609     struct cached_frame frames[MAX_MIX_FRAMES];
2610     float weights[MAX_MIX_FRAMES];
2611     float wsum = 0.0;
2612     pass.tmp = pl_tmp(NULL);
2613 
2614     // Garbage collect the cache by evicting all frames from the cache that are
2615     // not determined to still be required
2616     for (int i = 0; i < rr->frames.num; i++)
2617         rr->frames.elem[i].evict = true;
2618 
2619     // Traverse the input frames and determine/prepare the ones we need
2620     for (int i = 0; i < images->num_frames; i++) {
2621         uint64_t sig = images->signatures[i];
2622         float pts = images->timestamps[i];
2623         PL_TRACE(rr, "Considering image with signature 0x%llx, pts %f",
2624                  (unsigned long long) sig, pts);
2625 
2626         float weight;
2627 
2628         // For backwards compatibility, treat !kernel as oversample
2629         const struct pl_filter_function *kernel = params->frame_mixer->kernel;
2630         kernel = PL_DEF(kernel, &oversample_kernel);
2631         if (kernel->weight == oversample) {
2632 
2633             // Compute the visible interval [pts, end] of this frame
2634             float end = i+1 < images->num_frames ? images->timestamps[i+1] : INFINITY;
2635             if (pts > images->vsync_duration || end < 0.0) {
2636                 PL_TRACE(rr, "  -> Skipping: no intersection with vsync");
2637                 continue;
2638             } else {
2639                 pts = PL_MAX(pts, 0.0);
2640                 end = PL_MIN(end, images->vsync_duration);
2641                 pl_assert(end >= pts);
2642             }
2643 
2644             // Weight is the fraction of vsync interval that frame is visible
2645             weight = (end - pts) / images->vsync_duration;
2646             PL_TRACE(rr, "  -> Frame [%f, %f] intersects [%f, %f] = weight %f",
2647                      pts, end, 0.0, images->vsync_duration, weight);
2648 
2649             if (weight < kernel->params[0]) {
2650                 PL_TRACE(rr, "     (culling due to threshold)");
2651                 weight = 0.0;
2652             }
2653 
2654         } else {
2655 
2656             if (fabs(pts) >= kernel->radius) {
2657                 PL_TRACE(rr, "  -> Skipping: outside filter radius (%f)",
2658                          kernel->radius);
2659                 continue;
2660             }
2661 
2662             // Weight is directly sampled from the filter
2663             weight = pl_filter_sample(params->frame_mixer, pts);
2664             PL_TRACE(rr, "  -> Filter offset %f = weight %f", pts, weight);
2665 
2666         }
2667 
2668         struct cached_frame *f = NULL;
2669         for (int j = 0; j < rr->frames.num; j++) {
2670             if (rr->frames.elem[j].signature == sig) {
2671                 f = &rr->frames.elem[j];
2672                 f->evict = false;
2673                 break;
2674             }
2675         }
2676 
2677         // Skip frames with negligible contributions. Do this after the loop
2678         // above to make sure these frames don't get evicted just yet.
2679         const float cutoff = 1e-3;
2680         if (fabs(weight) <= cutoff) {
2681             PL_TRACE(rr, "   -> Skipping: weight (%f) below threshold (%f)",
2682                      weight, cutoff);
2683             continue;
2684         }
2685 
2686         if (!f) {
2687             // Signature does not exist in the cache at all yet,
2688             // so grow the cache by this entry.
2689             PL_ARRAY_GROW(rr, rr->frames);
2690             f = &rr->frames.elem[rr->frames.num++];
2691             *f = (struct cached_frame) {
2692                 .signature = sig,
2693                 .color = images->frames[i]->color,
2694                 .profile = images->frames[i]->profile,
2695             };
2696         }
2697 
2698         // Check to see if we can blindly reuse this cache entry. This is the
2699         // case if either the params are compatible, or the user doesn't care
2700         bool can_reuse = f->tex;
2701         if (can_reuse && !params->preserve_mixing_cache) {
2702             can_reuse = f->tex->params.w == out_w &&
2703                         f->tex->params.h == out_h &&
2704                         f->params_hash == params_hash;
2705         }
2706 
2707         if (!can_reuse) {
2708             // If we can't reuse the entry, we need to re-render this frame
2709             PL_TRACE(rr, "  -> Cached texture missing or invalid.. (re)creating");
2710             if (!f->tex) {
2711                 if (PL_ARRAY_POP(rr->frame_fbos, &f->tex))
2712                     pl_tex_invalidate(rr->gpu, f->tex);
2713             }
2714 
2715             bool ok = pl_tex_recreate(rr->gpu, &f->tex, &(struct pl_tex_params) {
2716                 .w = out_w,
2717                 .h = out_h,
2718                 .format = rr->fbofmt[4],
2719                 .sampleable = true,
2720                 .renderable = true,
2721                 .storable = rr->fbofmt[4]->caps & PL_FMT_CAP_STORABLE,
2722             });
2723 
2724             if (!ok) {
2725                 PL_ERR(rr, "Could not create intermediate texture for "
2726                        "frame mixing.. disabling!");
2727                 rr->disable_mixing = true;
2728                 goto fallback;
2729             }
2730 
2731             struct pass_state inter_pass = {
2732                 .rr = rr,
2733                 .tmp = pass.tmp,
2734                 .params = pass.params,
2735                 .fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool)),
2736                 .image = *images->frames[i],
2737                 .target = pass.target,
2738                 .info.stage = PL_RENDER_STAGE_FRAME,
2739             };
2740 
2741             // Render a single frame up to `pass_output_target`
2742             if (!pass_infer_state(&inter_pass))
2743                 goto error;
2744 
2745             pl_dispatch_callback(rr->dp, &inter_pass, info_callback);
2746             pl_dispatch_reset_frame(rr->dp);
2747             for (int n = 0; n < params->num_hooks; n++) {
2748                 if (params->hooks[n]->reset)
2749                     params->hooks[n]->reset(params->hooks[n]->priv);
2750             }
2751 
2752             if (!pass_read_image(&inter_pass))
2753                 goto error;
2754             if (!pass_scale_main(&inter_pass))
2755                 goto error;
2756 
2757             pl_assert(inter_pass.img.w == out_w &&
2758                       inter_pass.img.h == out_h);
2759 
2760             ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
2761                 .shader = &inter_pass.img.sh,
2762                 .target = f->tex,
2763             });
2764             if (!ok)
2765                 goto error;
2766 
2767             f->params_hash = params_hash;
2768             f->color = inter_pass.img.color;
2769         }
2770 
2771         pl_assert(fidx < MAX_MIX_FRAMES);
2772         frames[fidx] = *f;
2773         weights[fidx] = weight;
2774         wsum += weight;
2775         fidx++;
2776     }
2777 
2778     // Evict the frames we *don't* need
2779     for (int i = 0; i < rr->frames.num; ) {
2780         if (rr->frames.elem[i].evict) {
2781             PL_TRACE(rr, "Evicting frame with signature %llx from cache",
2782                      (unsigned long long) rr->frames.elem[i].signature);
2783             PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex);
2784             PL_ARRAY_REMOVE_AT(rr->frames, i);
2785             continue;
2786         } else {
2787             i++;
2788         }
2789     }
2790 
2791     // Sample and mix the output color
2792     pl_dispatch_callback(rr->dp, &pass, info_callback);
2793     pl_dispatch_reset_frame(rr->dp);
2794     pass.info.index = fidx;
2795 
2796     pl_shader sh = pl_dispatch_begin(rr->dp);
2797     sh_describe(sh, "frame mixing");
2798     sh->res.output = PL_SHADER_SIG_COLOR;
2799     sh->output_w = out_w;
2800     sh->output_h = out_h;
2801 
2802     // The color space to mix the frames in. Arbitrarily use the newest frame's
2803     // color, since this is unlikely to change very often mid-playback.
2804     pl_assert(fidx > 0);
2805     const struct pl_color_space mix_color = frames[fidx - 1].color;
2806 
2807     GLSL("vec4 color;                   \n"
2808          "// pl_render_image_mix        \n"
2809          "{                             \n"
2810          "vec4 mix_color = vec4(0.0);   \n");
2811 
2812     for (int i = 0; i < fidx; i++) {
2813         const struct pl_tex_params *tpars = &frames[i].tex->params;
2814 
2815         // Use linear sampling if desired and possible
2816         enum pl_tex_sample_mode sample_mode = PL_TEX_SAMPLE_NEAREST;
2817         if ((tpars->w != out_w || tpars->h != out_h) &&
2818             (tpars->format->caps & PL_FMT_CAP_LINEAR))
2819         {
2820             sample_mode = PL_TEX_SAMPLE_LINEAR;
2821         }
2822 
2823         ident_t pos, tex = sh_bind(sh, frames[i].tex, PL_TEX_ADDRESS_CLAMP,
2824                                    sample_mode, "frame", NULL, &pos, NULL, NULL);
2825 
2826         GLSL("color = %s(%s, %s); \n", sh_tex_fn(sh, *tpars), tex, pos);
2827 
2828         // Note: This ignores differences in ICC profile, which we decide to
2829         // just simply not care about. Doing that properly would require
2830         // converting between different image profiles, and the headache of
2831         // finagling that state is just not worth it because this is an
2832         // exceptionally unlikely hypothetical.
2833         pl_shader_color_map(sh, NULL, frames[i].color, mix_color, NULL, false);
2834 
2835         ident_t weight = sh_var(sh, (struct pl_shader_var) {
2836             .var = pl_var_float("weight"),
2837             .data = &(float){ weights[i] / wsum },
2838             .dynamic = true,
2839         });
2840 
2841         GLSL("mix_color += %s * color; \n", weight);
2842     }
2843 
2844     GLSL("color = mix_color; \n"
2845          "}                  \n");
2846 
2847     // Dispatch this to the destination
2848     pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool));
2849     pass.img = (struct img) {
2850         .sh = sh,
2851         .w = out_w,
2852         .h = out_h,
2853         .comps = 4,
2854         .color = mix_color,
2855         .repr = {
2856             .sys = PL_COLOR_SYSTEM_RGB,
2857             .levels = PL_COLOR_LEVELS_PC,
2858             .alpha = PL_ALPHA_PREMULTIPLIED,
2859         },
2860     };
2861 
2862     for (int i = 0; i < params->num_hooks; i++) {
2863         if (params->hooks[i]->reset)
2864             params->hooks[i]->reset(params->hooks[i]->priv);
2865     }
2866 
2867     if (!pass_output_target(&pass))
2868         goto fallback;
2869 
2870     pl_free(pass.tmp);
2871     return true;
2872 
2873 error:
2874     PL_ERR(rr, "Could not render image for frame mixing.. disabling!");
2875     rr->disable_mixing = true;
2876     // fall through
2877 
2878 fallback:
2879     pl_free(pass.tmp);
2880     return pl_render_image(rr, refimg, ptarget, params);
2881 
2882 
2883 }
2884 
pl_frame_set_chroma_location(struct pl_frame * frame,enum pl_chroma_location chroma_loc)2885 void pl_frame_set_chroma_location(struct pl_frame *frame,
2886                                   enum pl_chroma_location chroma_loc)
2887 {
2888     pl_tex ref = frame_ref(frame);
2889 
2890     if (ref) {
2891         // Texture dimensions are already known, so apply the chroma location
2892         // only to subsampled planes
2893         int ref_w = ref->params.w, ref_h = ref->params.h;
2894 
2895         for (int i = 0; i < frame->num_planes; i++) {
2896             struct pl_plane *plane = &frame->planes[i];
2897             pl_tex tex = plane->texture;
2898             bool subsampled = tex->params.w < ref_w || tex->params.h < ref_h;
2899             if (subsampled)
2900                 pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y);
2901         }
2902     } else {
2903         // Texture dimensions are not yet known, so apply the chroma location
2904         // to all chroma planes, regardless of subsampling
2905         for (int i = 0; i < frame->num_planes; i++) {
2906             struct pl_plane *plane = &frame->planes[i];
2907             if (detect_plane_type(plane, &frame->repr) == PLANE_CHROMA)
2908                 pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y);
2909         }
2910     }
2911 }
2912 
pl_frame_from_swapchain(struct pl_frame * out_frame,const struct pl_swapchain_frame * frame)2913 void pl_frame_from_swapchain(struct pl_frame *out_frame,
2914                              const struct pl_swapchain_frame *frame)
2915 {
2916     pl_tex fbo = frame->fbo;
2917     *out_frame = (struct pl_frame) {
2918         .num_planes = 1,
2919         .planes = {{
2920             .texture = fbo,
2921             .components = fbo->params.format->num_components,
2922             .component_mapping = {0, 1, 2, 3},
2923         }},
2924         .crop = { 0, 0, fbo->params.w, fbo->params.h },
2925         .repr = frame->color_repr,
2926         .color = frame->color_space,
2927     };
2928 
2929     if (frame->flipped)
2930         PL_SWAP(out_frame->crop.y0, out_frame->crop.y1);
2931 }
2932 
pl_frame_is_cropped(const struct pl_frame * frame)2933 bool pl_frame_is_cropped(const struct pl_frame *frame)
2934 {
2935     int x0 = roundf(PL_MIN(frame->crop.x0, frame->crop.x1)),
2936         y0 = roundf(PL_MIN(frame->crop.y0, frame->crop.y1)),
2937         x1 = roundf(PL_MAX(frame->crop.x0, frame->crop.x1)),
2938         y1 = roundf(PL_MAX(frame->crop.y0, frame->crop.y1));
2939 
2940     pl_tex ref = frame_ref(frame);
2941     pl_assert(ref);
2942 
2943     if (!x0 && !x1)
2944         x1 = ref->params.w;
2945     if (!y0 && !y1)
2946         y1 = ref->params.h;
2947 
2948     return x0 > 0 || y0 > 0 || x1 < ref->params.w || y1 < ref->params.h;
2949 }
2950 
pl_frame_clear_rgba(pl_gpu gpu,const struct pl_frame * frame,const float rgba[4])2951 void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame,
2952                          const float rgba[4])
2953 {
2954     struct pl_color_repr repr = frame->repr;
2955     struct pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL);
2956     pl_transform3x3_invert(&tr);
2957 
2958     float encoded[3] = { rgba[0], rgba[1], rgba[2] };
2959     pl_transform3x3_apply(&tr, encoded);
2960 
2961     float mult = frame->repr.alpha == PL_ALPHA_INDEPENDENT ? 1.0 : rgba[3];
2962     for (int p = 0; p < frame->num_planes; p++) {
2963         const struct pl_plane *plane =  &frame->planes[p];
2964         float clear[4] = { 0.0, 0.0, 0.0, rgba[3] };
2965         for (int c = 0; c < plane->components; c++) {
2966             if (plane->component_mapping[c] >= 0)
2967                 clear[c] = mult * encoded[plane->component_mapping[c]];
2968         }
2969 
2970         pl_tex_clear(gpu, plane->texture, clear);
2971     }
2972 }
2973