1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <math.h>
19
20 #include "common.h"
21 #include "filters.h"
22 #include "shaders.h"
23 #include "dispatch.h"
24
25 struct cached_frame {
26 uint64_t signature;
27 uint64_t params_hash; // for detecting `pl_render_params` changes
28 struct pl_color_space color;
29 struct pl_icc_profile profile;
30 pl_tex tex;
31 bool evict; // for garbage collection
32 };
33
34 struct sampler {
35 pl_shader_obj upscaler_state;
36 pl_shader_obj downscaler_state;
37 };
38
39 struct osd_vertex {
40 float pos[2];
41 float coord[2];
42 float color[4];
43 };
44
45 struct pl_renderer {
46 pl_gpu gpu;
47 pl_dispatch dp;
48 pl_log log;
49
50 // Texture format to use for intermediate textures
51 pl_fmt fbofmt[5];
52
53 // Cached feature checks (inverted)
54 bool disable_compute; // disable the use of compute shaders
55 bool disable_sampling; // disable use of advanced scalers
56 bool disable_debanding; // disable the use of debanding shaders
57 bool disable_linear_hdr; // disable linear scaling for HDR signals
58 bool disable_linear_sdr; // disable linear scaling for SDR signals
59 bool disable_blending; // disable blending for the target/fbofmt
60 bool disable_overlay; // disable rendering overlays
61 bool disable_icc; // disable usage of ICC profiles
62 bool disable_peak_detect; // disable peak detection shader
63 bool disable_grain; // disable AV1 grain code
64 bool disable_hooks; // disable user hooks / custom shaders
65 bool disable_mixing; // disable frame mixing
66
67 // Shader resource objects and intermediate textures (FBOs)
68 pl_shader_obj peak_detect_state;
69 pl_shader_obj dither_state;
70 pl_shader_obj icc_state;
71 pl_shader_obj grain_state[4];
72 pl_shader_obj lut_state[3];
73 PL_ARRAY(pl_tex) fbos;
74 struct sampler sampler_main;
75 struct sampler samplers_src[4];
76 struct sampler samplers_dst[4];
77
78 // Temporary storage for vertex/index data
79 PL_ARRAY(struct osd_vertex) osd_vertices;
80 PL_ARRAY(uint16_t) osd_indices;
81 struct pl_vertex_attrib osd_attribs[3];
82
83 // Frame cache (for frame mixing / interpolation)
84 PL_ARRAY(struct cached_frame) frames;
85 PL_ARRAY(pl_tex) frame_fbos;
86 };
87
88 enum {
89 // Index into `lut_state`
90 LUT_IMAGE,
91 LUT_TARGET,
92 LUT_PARAMS,
93 };
94
find_fbo_format(pl_renderer rr)95 static void find_fbo_format(pl_renderer rr)
96 {
97 struct {
98 enum pl_fmt_type type;
99 int depth;
100 enum pl_fmt_caps caps;
101 } configs[] = {
102 // Prefer floating point formats first
103 {PL_FMT_FLOAT, 16, PL_FMT_CAP_LINEAR},
104 {PL_FMT_FLOAT, 16, PL_FMT_CAP_SAMPLEABLE},
105
106 // Otherwise, fall back to unorm/snorm, preferring linearly sampleable
107 {PL_FMT_UNORM, 16, PL_FMT_CAP_LINEAR},
108 {PL_FMT_SNORM, 16, PL_FMT_CAP_LINEAR},
109 {PL_FMT_UNORM, 16, PL_FMT_CAP_SAMPLEABLE},
110 {PL_FMT_SNORM, 16, PL_FMT_CAP_SAMPLEABLE},
111
112 // As a final fallback, allow 8-bit FBO formats (for UNORM only)
113 {PL_FMT_UNORM, 8, PL_FMT_CAP_LINEAR},
114 {PL_FMT_UNORM, 8, PL_FMT_CAP_SAMPLEABLE},
115 };
116
117 pl_fmt fmt = NULL;
118 for (int i = 0; i < PL_ARRAY_SIZE(configs); i++) {
119 fmt = pl_find_fmt(rr->gpu, configs[i].type, 4, configs[i].depth, 0,
120 configs[i].caps | PL_FMT_CAP_RENDERABLE);
121 if (fmt) {
122 rr->fbofmt[4] = fmt;
123
124 // Probe the right variant for each number of channels, falling
125 // back to the next biggest format
126 for (int c = 1; c < 4; c++) {
127 rr->fbofmt[c] = pl_find_fmt(rr->gpu, configs[i].type, c,
128 configs[i].depth, 0, fmt->caps);
129 rr->fbofmt[c] = PL_DEF(rr->fbofmt[c], rr->fbofmt[c+1]);
130 }
131 break;
132 }
133 }
134
135 if (!fmt) {
136 PL_WARN(rr, "Found no renderable FBO format! Most features disabled");
137 return;
138 }
139
140 if (!(fmt->caps & PL_FMT_CAP_STORABLE)) {
141 PL_INFO(rr, "Found no storable FBO format; compute shaders disabled");
142 rr->disable_compute = true;
143 }
144
145 if (fmt->type != PL_FMT_FLOAT) {
146 PL_INFO(rr, "Found no floating point FBO format; linear light "
147 "processing disabled for HDR material");
148 rr->disable_linear_hdr = true;
149 }
150
151 if (fmt->component_depth[0] < 16) {
152 PL_WARN(rr, "FBO format precision low (<16 bit); linear light "
153 "processing disabled");
154 rr->disable_linear_sdr = true;
155 }
156 }
157
pl_renderer_create(pl_log log,pl_gpu gpu)158 pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu)
159 {
160 pl_renderer rr = pl_alloc_ptr(NULL, rr);
161 *rr = (struct pl_renderer) {
162 .gpu = gpu,
163 .log = log,
164 .dp = pl_dispatch_create(log, gpu),
165 .osd_attribs = {
166 {
167 .name = "pos",
168 .offset = offsetof(struct osd_vertex, pos),
169 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
170 }, {
171 .name = "coord",
172 .offset = offsetof(struct osd_vertex, coord),
173 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
174 }, {
175 .name = "osd_color",
176 .offset = offsetof(struct osd_vertex, color),
177 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 4),
178 }
179 },
180 };
181
182 assert(rr->dp);
183 find_fbo_format(rr);
184 return rr;
185 }
186
sampler_destroy(pl_renderer rr,struct sampler * sampler)187 static void sampler_destroy(pl_renderer rr, struct sampler *sampler)
188 {
189 pl_shader_obj_destroy(&sampler->upscaler_state);
190 pl_shader_obj_destroy(&sampler->downscaler_state);
191 }
192
pl_renderer_destroy(pl_renderer * p_rr)193 void pl_renderer_destroy(pl_renderer *p_rr)
194 {
195 pl_renderer rr = *p_rr;
196 if (!rr)
197 return;
198
199 // Free all intermediate FBOs
200 for (int i = 0; i < rr->fbos.num; i++)
201 pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]);
202 for (int i = 0; i < rr->frames.num; i++)
203 pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
204 for (int i = 0; i < rr->frame_fbos.num; i++)
205 pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]);
206
207 // Free all shader resource objects
208 pl_shader_obj_destroy(&rr->peak_detect_state);
209 pl_shader_obj_destroy(&rr->dither_state);
210 pl_shader_obj_destroy(&rr->icc_state);
211 for (int i = 0; i < PL_ARRAY_SIZE(rr->lut_state); i++)
212 pl_shader_obj_destroy(&rr->lut_state[i]);
213 for (int i = 0; i < PL_ARRAY_SIZE(rr->grain_state); i++)
214 pl_shader_obj_destroy(&rr->grain_state[i]);
215
216 // Free all samplers
217 sampler_destroy(rr, &rr->sampler_main);
218 for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_src); i++)
219 sampler_destroy(rr, &rr->samplers_src[i]);
220 for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_dst); i++)
221 sampler_destroy(rr, &rr->samplers_dst[i]);
222
223 pl_dispatch_destroy(&rr->dp);
224 pl_free_ptr(p_rr);
225 }
226
pl_renderer_save(pl_renderer rr,uint8_t * out_cache)227 size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache)
228 {
229 return pl_dispatch_save(rr->dp, out_cache);
230 }
231
pl_renderer_load(pl_renderer rr,const uint8_t * cache)232 void pl_renderer_load(pl_renderer rr, const uint8_t *cache)
233 {
234 pl_dispatch_load(rr->dp, cache);
235 }
236
pl_renderer_flush_cache(pl_renderer rr)237 void pl_renderer_flush_cache(pl_renderer rr)
238 {
239 for (int i = 0; i < rr->frames.num; i++)
240 pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
241 rr->frames.num = 0;
242
243 pl_shader_obj_destroy(&rr->peak_detect_state);
244 }
245
246 const struct pl_render_params pl_render_default_params = {
247 .upscaler = &pl_filter_spline36,
248 .downscaler = &pl_filter_mitchell,
249 .frame_mixer = &pl_oversample_frame_mixer,
250 .lut_entries = 64,
251 .polar_cutoff = 0.001,
252
253 .sigmoid_params = &pl_sigmoid_default_params,
254 .peak_detect_params = &pl_peak_detect_default_params,
255 .color_map_params = &pl_color_map_default_params,
256 .dither_params = &pl_dither_default_params,
257 };
258
259 const struct pl_render_params pl_render_high_quality_params = {
260 .upscaler = &pl_filter_ewa_lanczos,
261 .downscaler = &pl_filter_mitchell,
262 .frame_mixer = &pl_oversample_frame_mixer,
263 .lut_entries = 64,
264 .polar_cutoff = 0.001,
265
266 .deband_params = &pl_deband_default_params,
267 .sigmoid_params = &pl_sigmoid_default_params,
268 .peak_detect_params = &pl_peak_detect_default_params,
269 .color_map_params = &pl_color_map_default_params,
270 .dither_params = &pl_dither_default_params,
271 };
272
273 // This is only used as a sentinel, to use the GLSL implementation
oversample(const struct pl_filter_function * k,double x)274 static double oversample(const struct pl_filter_function *k, double x)
275 {
276 pl_unreachable();
277 }
278
279 static const struct pl_filter_function oversample_kernel = {
280 .weight = oversample,
281 .tunable = {true},
282 .params = {0.0},
283 };
284
285 const struct pl_filter_config pl_filter_oversample = {
286 .kernel = &oversample_kernel,
287 };
288
289 const struct pl_filter_preset pl_frame_mixers[] = {
290 { "none", NULL, "No frame mixing" },
291 { "oversample", &pl_filter_oversample, "Oversample (AKA SmoothMotion)" },
292 { "mitchell_clamp", &pl_filter_mitchell_clamp, "Cubic spline (clamped)" },
293 {0}
294 };
295
296 const int pl_num_frame_mixers = PL_ARRAY_SIZE(pl_frame_mixers) - 1;
297
298 const struct pl_filter_preset pl_scale_filters[] = {
299 {"none", NULL, "Built-in sampling"},
300 {"oversample", &pl_filter_oversample, "Oversample (Aspect-preserving NN)"},
301 COMMON_FILTER_PRESETS,
302 {0}
303 };
304
305 const int pl_num_scale_filters = PL_ARRAY_SIZE(pl_scale_filters) - 1;
306
307 #define FBOFMT(n) (params->disable_fbos ? NULL : rr->fbofmt[n])
308
309 // Represents a "in-flight" image, which is either a shader that's in the
310 // process of producing some sort of image, or a texture that needs to be
311 // sampled from
312 struct img {
313 // Effective texture size, always set
314 int w, h;
315
316 // Recommended format (falls back to FBOFMT otherwise), only for shaders
317 pl_fmt fmt;
318
319 // Exactly *one* of these two is set:
320 pl_shader sh;
321 pl_tex tex;
322
323 // Current effective source area, will be sampled by the main scaler
324 struct pl_rect2df rect;
325
326 // The current effective colorspace
327 struct pl_color_repr repr;
328 struct pl_color_space color;
329 int comps;
330 };
331
332 // Plane 'type', ordered by incrementing priority
333 enum plane_type {
334 PLANE_INVALID = 0,
335 PLANE_ALPHA,
336 PLANE_CHROMA,
337 PLANE_LUMA,
338 PLANE_RGB,
339 PLANE_XYZ,
340 };
341
342 struct pass_state {
343 void *tmp;
344 pl_renderer rr;
345 const struct pl_render_params *params;
346 struct pl_render_info info; // for info callback
347
348 // Represents the "current" image which we're in the process of rendering.
349 // This is initially set by pass_read_image, and all of the subsequent
350 // rendering steps will mutate this in-place.
351 struct img img;
352
353 // Represents the "reference rect". Canonically, this is functionally
354 // equivalent to `image.crop`, but both guaranteed to be valid, and also
355 // updates as the refplane evolves (e.g. due to user hook prescalers)
356 struct pl_rect2df ref_rect;
357
358 // Integer version of `target.crop`. Semantically identical.
359 struct pl_rect2d dst_rect;
360
361 // Cached copies of the `image` / `target` for this rendering pass,
362 // corrected to make sure all rects etc. are properly defaulted/inferred.
363 struct pl_frame image;
364 struct pl_frame target;
365
366 // Some extra plane metadata, inferred from `planes`
367 enum plane_type src_type[4];
368 enum plane_type dst_type[4];
369 int src_ref, dst_ref; // index into `planes`
370
371 // Metadata for `rr->fbos`
372 bool *fbos_used;
373 };
374
info_callback(void * priv,const struct pl_dispatch_info * dinfo)375 static void info_callback(void *priv, const struct pl_dispatch_info *dinfo)
376 {
377 struct pass_state *pass = priv;
378 const struct pl_render_params *params = pass->params;
379 if (!params->info_callback)
380 return;
381
382 pass->info.pass = dinfo;
383 params->info_callback(params->info_priv, &pass->info);
384 if (pass->info.stage == PL_RENDER_STAGE_FRAME)
385 pass->info.index++;
386 }
387
get_fbo(struct pass_state * pass,int w,int h,pl_fmt fmt,int comps)388 static pl_tex get_fbo(struct pass_state *pass, int w, int h, pl_fmt fmt, int comps)
389 {
390 pl_renderer rr = pass->rr;
391 comps = PL_DEF(comps, 4);
392 fmt = PL_DEF(fmt, rr->fbofmt[comps]);
393 if (!fmt)
394 return NULL;
395
396 struct pl_tex_params params = {
397 .w = w,
398 .h = h,
399 .format = fmt,
400 .sampleable = true,
401 .renderable = true,
402 .storable = fmt->caps & PL_FMT_CAP_STORABLE,
403 };
404
405 int best_idx = -1;
406 int best_diff = 0;
407
408 // Find the best-fitting texture out of rr->fbos
409 for (int i = 0; i < rr->fbos.num; i++) {
410 if (pass->fbos_used[i])
411 continue;
412
413 // Orthogonal distance, with penalty for format mismatches
414 int diff = abs(rr->fbos.elem[i]->params.w - w) +
415 abs(rr->fbos.elem[i]->params.h - h) +
416 ((rr->fbos.elem[i]->params.format != fmt) ? 1000 : 0);
417
418 if (best_idx < 0 || diff < best_diff) {
419 best_idx = i;
420 best_diff = diff;
421 }
422 }
423
424 // No texture found at all, add a new one
425 if (best_idx < 0) {
426 best_idx = rr->fbos.num;
427 PL_ARRAY_APPEND(rr, rr->fbos, NULL);
428 pl_grow(pass->tmp, &pass->fbos_used, rr->fbos.num * sizeof(bool));
429 pass->fbos_used[best_idx] = false;
430 }
431
432 if (!pl_tex_recreate(rr->gpu, &rr->fbos.elem[best_idx], ¶ms))
433 return NULL;
434
435 pass->fbos_used[best_idx] = true;
436 return rr->fbos.elem[best_idx];
437 }
438
439 // Forcibly convert an img to `tex`, dispatching where necessary
img_tex(struct pass_state * pass,struct img * img)440 static pl_tex img_tex(struct pass_state *pass, struct img *img)
441 {
442 if (img->tex) {
443 pl_assert(!img->sh);
444 return img->tex;
445 }
446
447 pl_renderer rr = pass->rr;
448 pl_tex tex = get_fbo(pass, img->w, img->h, img->fmt, img->comps);
449 img->fmt = NULL;
450
451 if (!tex) {
452 PL_ERR(rr, "Failed creating FBO texture! Disabling advanced rendering..");
453 memset(rr->fbofmt, 0, sizeof(rr->fbofmt));
454 pl_dispatch_abort(rr->dp, &img->sh);
455 return NULL;
456 }
457
458 pl_assert(img->sh);
459 bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
460 .shader = &img->sh,
461 .target = tex,
462 });
463
464 if (!ok) {
465 PL_ERR(rr, "Failed dispatching intermediate pass!");
466 img->sh = pl_dispatch_begin(rr->dp);
467 return NULL;
468 }
469
470 img->tex = tex;
471 return img->tex;
472 }
473
474 // Forcibly convert an img to `sh`, sampling where necessary
img_sh(struct pass_state * pass,struct img * img)475 static pl_shader img_sh(struct pass_state *pass, struct img *img)
476 {
477 if (img->sh) {
478 pl_assert(!img->tex);
479 return img->sh;
480 }
481
482 pl_assert(img->tex);
483 img->sh = pl_dispatch_begin(pass->rr->dp);
484 pl_shader_sample_direct(img->sh, &(struct pl_sample_src) {
485 .tex = img->tex,
486 });
487
488 img->tex = NULL;
489 return img->sh;
490 }
491
492 enum sampler_type {
493 SAMPLER_DIRECT, // pick based on texture caps
494 SAMPLER_NEAREST, // direct sampling, force nearest
495 SAMPLER_BICUBIC, // fast bicubic scaling
496 SAMPLER_COMPLEX, // complex custom filters
497 SAMPLER_OVERSAMPLE,
498 };
499
500 enum sampler_dir {
501 SAMPLER_NOOP, // 1:1 scaling
502 SAMPLER_UP, // upscaling
503 SAMPLER_DOWN, // downscaling
504 };
505
506 struct sampler_info {
507 const struct pl_filter_config *config; // if applicable
508 enum sampler_type type;
509 enum sampler_dir dir;
510 enum sampler_dir dir_sep[2];
511 };
512
sample_src_info(struct pass_state * pass,const struct pl_sample_src * src)513 static struct sampler_info sample_src_info(struct pass_state *pass,
514 const struct pl_sample_src *src)
515 {
516 const struct pl_render_params *params = pass->params;
517 struct sampler_info info = {0};
518 pl_renderer rr = pass->rr;
519
520 float rx = src->new_w / fabs(pl_rect_w(src->rect));
521 if (rx < 1.0 - 1e-6) {
522 info.dir_sep[0] = SAMPLER_DOWN;
523 } else if (rx > 1.0 + 1e-6) {
524 info.dir_sep[0] = SAMPLER_UP;
525 }
526
527 float ry = src->new_h / fabs(pl_rect_h(src->rect));
528 if (ry < 1.0 - 1e-6) {
529 info.dir_sep[1] = SAMPLER_DOWN;
530 } else if (ry > 1.0 + 1e-6) {
531 info.dir_sep[1] = SAMPLER_UP;
532 }
533
534 // We use PL_MAX so downscaling overrides upscaling when choosing scalers
535 info.dir = PL_MAX(info.dir_sep[0], info.dir_sep[1]);
536 switch (info.dir) {
537 case SAMPLER_DOWN:
538 info.config = params->downscaler;
539 break;
540 case SAMPLER_UP:
541 info.config = params->upscaler;
542 break;
543 case SAMPLER_NOOP:
544 info.type = SAMPLER_NEAREST;
545 return info;
546 }
547
548 int comps = PL_DEF(src->components, 4);
549 if (!FBOFMT(comps) || rr->disable_sampling || !info.config) {
550 info.type = SAMPLER_DIRECT;
551 } else if (info.config->kernel->weight == oversample) {
552 info.type = SAMPLER_OVERSAMPLE;
553 } else {
554 info.type = SAMPLER_COMPLEX;
555
556 // Try using faster replacements for GPU built-in scalers
557 pl_fmt texfmt = src->tex ? src->tex->params.format : rr->fbofmt[comps];
558 bool can_linear = texfmt->caps & PL_FMT_CAP_LINEAR;
559 bool can_fast = info.dir == SAMPLER_UP || params->skip_anti_aliasing;
560
561 if (can_fast && !params->disable_builtin_scalers) {
562 if (can_linear && info.config == &pl_filter_bicubic)
563 info.type = SAMPLER_BICUBIC;
564 if (can_linear && info.config == &pl_filter_bilinear)
565 info.type = SAMPLER_DIRECT;
566 if (info.config == &pl_filter_nearest)
567 info.type = can_linear ? SAMPLER_NEAREST : SAMPLER_DIRECT;
568 }
569 }
570
571 return info;
572 }
573
dispatch_sampler(struct pass_state * pass,pl_shader sh,struct sampler * sampler,bool no_compute,const struct pl_sample_src * src)574 static void dispatch_sampler(struct pass_state *pass, pl_shader sh,
575 struct sampler *sampler, bool no_compute,
576 const struct pl_sample_src *src)
577 {
578 const struct pl_render_params *params = pass->params;
579 if (!sampler)
580 goto fallback;
581
582 pl_renderer rr = pass->rr;
583 struct sampler_info info = sample_src_info(pass, src);
584 pl_shader_obj *lut = NULL;
585 switch (info.dir) {
586 case SAMPLER_NOOP:
587 goto fallback;
588 case SAMPLER_DOWN:
589 lut = &sampler->downscaler_state;
590 break;
591 case SAMPLER_UP:
592 lut = &sampler->upscaler_state;
593 break;
594 }
595
596 switch (info.type) {
597 case SAMPLER_DIRECT:
598 goto fallback;
599 case SAMPLER_NEAREST:
600 pl_shader_sample_nearest(sh, src);
601 return;
602 case SAMPLER_OVERSAMPLE:
603 pl_shader_sample_oversample(sh, src, info.config->kernel->params[0]);
604 return;
605 case SAMPLER_BICUBIC:
606 pl_shader_sample_bicubic(sh, src);
607 return;
608 case SAMPLER_COMPLEX:
609 break; // continue below
610 }
611
612 pl_assert(lut);
613 struct pl_sample_filter_params fparams = {
614 .filter = *info.config,
615 .lut_entries = params->lut_entries,
616 .cutoff = params->polar_cutoff,
617 .antiring = params->antiringing_strength,
618 .no_compute = rr->disable_compute || no_compute,
619 .no_widening = params->skip_anti_aliasing,
620 .lut = lut,
621 };
622
623 bool ok;
624 if (info.config->polar) {
625 // Polar samplers are always a single function call
626 ok = pl_shader_sample_polar(sh, src, &fparams);
627 } else if (info.dir_sep[0] && info.dir_sep[1]) {
628 // Scaling is needed in both directions
629 pl_shader tsh = pl_dispatch_begin(rr->dp);
630 ok = pl_shader_sample_ortho(tsh, PL_SEP_VERT, src, &fparams);
631 if (!ok) {
632 pl_dispatch_abort(rr->dp, &tsh);
633 goto done;
634 }
635
636 struct img img = {
637 .sh = tsh,
638 .w = src->tex->params.w,
639 .h = src->new_h,
640 .comps = src->components,
641 };
642
643 struct pl_sample_src src2 = *src;
644 src2.tex = img_tex(pass, &img);
645 src2.scale = 1.0;
646 ok = src2.tex && pl_shader_sample_ortho(sh, PL_SEP_HORIZ, &src2, &fparams);
647 } else if (info.dir_sep[0]) {
648 // Scaling is needed only in the horizontal direction
649 ok = pl_shader_sample_ortho(sh, PL_SEP_HORIZ, src, &fparams);
650 } else {
651 // Scaling is needed only in the vertical direction
652 pl_assert(info.dir_sep[1]);
653 ok = pl_shader_sample_ortho(sh, PL_SEP_VERT, src, &fparams);
654 }
655
656 done:
657 if (!ok) {
658 PL_ERR(rr, "Failed dispatching scaler.. disabling");
659 rr->disable_sampling = true;
660 goto fallback;
661 }
662
663 return;
664
665 fallback:
666 // If all else fails, fall back to auto sampling
667 pl_shader_sample_direct(sh, src);
668 }
669
swizzle_color(pl_shader sh,int comps,const int comp_map[4],bool force_alpha)670 static void swizzle_color(pl_shader sh, int comps, const int comp_map[4],
671 bool force_alpha)
672 {
673 ident_t orig = sh_fresh(sh, "orig_color");
674 GLSL("vec4 %s = color; \n"
675 "color = vec4(0.0); \n", orig);
676
677 static const int def_map[4] = {0, 1, 2, 3};
678 comp_map = PL_DEF(comp_map, def_map);
679
680 for (int c = 0; c < comps; c++) {
681 if (comp_map[c] >= 0)
682 GLSL("color[%d] = %s[%d]; \n", c, orig, comp_map[c]);
683 }
684
685 if (force_alpha)
686 GLSL("color.a = %s.a; \n", orig);
687 }
688
draw_overlays(struct pass_state * pass,pl_tex fbo,int comps,const int comp_map[4],const struct pl_overlay * overlays,int num,struct pl_color_space color,struct pl_color_repr repr,bool use_sigmoid,struct pl_transform2x2 * scale)689 static void draw_overlays(struct pass_state *pass, pl_tex fbo,
690 int comps, const int comp_map[4],
691 const struct pl_overlay *overlays, int num,
692 struct pl_color_space color, struct pl_color_repr repr,
693 bool use_sigmoid, struct pl_transform2x2 *scale)
694 {
695 const struct pl_render_params *params = pass->params;
696 pl_renderer rr = pass->rr;
697 if (num <= 0 || rr->disable_overlay)
698 return;
699
700 enum pl_fmt_caps caps = fbo->params.format->caps;
701 if (!rr->disable_blending && !(caps & PL_FMT_CAP_BLENDABLE)) {
702 PL_WARN(rr, "Trying to draw an overlay to a non-blendable target. "
703 "Alpha blending is disabled, results may be incorrect!");
704 rr->disable_blending = true;
705 }
706
707 for (int n = 0; n < num; n++) {
708 struct pl_overlay ol = overlays[n];
709 struct pl_overlay_part fallback;
710 if (!ol.tex) {
711 // Backwards compatibility
712 ol.tex = ol.plane.texture;
713 ol.parts = &fallback;
714 ol.num_parts = 1;
715 fallback = (struct pl_overlay_part) {
716 .src = {
717 .x0 = -ol.plane.shift_x,
718 .y0 = -ol.plane.shift_y,
719 .x1 = ol.tex->params.w - ol.plane.shift_x,
720 .y1 = ol.tex->params.h - ol.plane.shift_y,
721 },
722 .dst = ol.rect,
723 .color = {
724 ol.base_color[0],
725 ol.base_color[1],
726 ol.base_color[2],
727 1.0,
728 },
729 };
730 }
731
732 if (!ol.num_parts)
733 continue;
734
735 // Construct vertex/index buffers
736 rr->osd_vertices.num = 0;
737 rr->osd_indices.num = 0;
738 for (int i = 0; i < ol.num_parts; i++) {
739 const struct pl_overlay_part *part = &ol.parts[i];
740
741 #define EMIT_VERT(x, y) \
742 do { \
743 float pos[2] = { part->dst.x, part->dst.y }; \
744 if (scale) \
745 pl_transform2x2_apply(scale, pos); \
746 PL_ARRAY_APPEND(rr, rr->osd_vertices, (struct osd_vertex) { \
747 .pos = { \
748 2.0 * (pos[0] / fbo->params.w) - 1.0, \
749 2.0 * (pos[1] / fbo->params.h) - 1.0, \
750 }, \
751 .coord = { \
752 part->src.x / ol.tex->params.w, \
753 part->src.y / ol.tex->params.h, \
754 }, \
755 .color = { \
756 part->color[0], part->color[1], \
757 part->color[2], part->color[3], \
758 }, \
759 }); \
760 } while (0)
761
762 int idx_base = rr->osd_vertices.num;
763 EMIT_VERT(x0, y0); // idx 0: top left
764 EMIT_VERT(x1, y0); // idx 1: top right
765 EMIT_VERT(x0, y1); // idx 2: bottom left
766 EMIT_VERT(x1, y1); // idx 3: bottom right
767 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 0);
768 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1);
769 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2);
770 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2);
771 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1);
772 PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 3);
773 }
774
775 // Draw parts
776 pl_shader sh = pl_dispatch_begin(rr->dp);
777 ident_t tex = sh_desc(sh, (struct pl_shader_desc) {
778 .desc = {
779 .name = "osd_tex",
780 .type = PL_DESC_SAMPLED_TEX,
781 },
782 .binding = {
783 .object = ol.tex,
784 .sample_mode = (ol.tex->params.format->caps & PL_FMT_CAP_LINEAR)
785 ? PL_TEX_SAMPLE_LINEAR
786 : PL_TEX_SAMPLE_NEAREST,
787 },
788 });
789
790 sh_describe(sh, "overlay");
791 GLSL("// overlay \n");
792
793 switch (ol.mode) {
794 case PL_OVERLAY_NORMAL:
795 GLSL("vec4 color = %s(%s, coord); \n",
796 sh_tex_fn(sh, ol.tex->params), tex);
797 break;
798 case PL_OVERLAY_MONOCHROME:
799 GLSL("vec4 color = osd_color; \n");
800 break;
801 case PL_OVERLAY_MODE_COUNT:
802 pl_unreachable();
803 };
804
805 sh->res.output = PL_SHADER_SIG_COLOR;
806 pl_shader_decode_color(sh, &ol.repr, NULL);
807 pl_shader_color_map(sh, params->color_map_params, ol.color, color,
808 NULL, false);
809
810 if (use_sigmoid)
811 pl_shader_sigmoidize(sh, params->sigmoid_params);
812
813 repr.alpha = PL_ALPHA_PREMULTIPLIED;
814 pl_shader_encode_color(sh, &repr);
815 if (ol.mode == PL_OVERLAY_MONOCHROME) {
816 GLSL("color.rgba *= %s(%s, coord).r; \n",
817 sh_tex_fn(sh, ol.tex->params), tex);
818 }
819
820 swizzle_color(sh, comps, comp_map, true);
821
822 struct pl_blend_params blend_params = {
823 .src_rgb = PL_BLEND_ONE,
824 .src_alpha = PL_BLEND_SRC_ALPHA,
825 // FIXME: What if the target is not premultiplied?
826 .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA,
827 .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA,
828 };
829
830 bool ok = pl_dispatch_vertex(rr->dp, &(struct pl_dispatch_vertex_params) {
831 .shader = &sh,
832 .target = fbo,
833 .blend_params = rr->disable_blending ? NULL : &blend_params,
834 .vertex_stride = sizeof(struct osd_vertex),
835 .num_vertex_attribs = ol.mode == PL_OVERLAY_NORMAL ? 2 : 3,
836 .vertex_attribs = rr->osd_attribs,
837 .vertex_position_idx = 0,
838 .vertex_coords = PL_COORDS_NORMALIZED,
839 .vertex_type = PL_PRIM_TRIANGLE_LIST,
840 .vertex_count = rr->osd_indices.num,
841 .vertex_data = rr->osd_vertices.elem,
842 .index_data = rr->osd_indices.elem,
843 });
844
845 if (!ok) {
846 PL_ERR(rr, "Failed rendering overlays!");
847 rr->disable_overlay = true;
848 return;
849 }
850 }
851 }
852
get_hook_tex(void * priv,int width,int height)853 static pl_tex get_hook_tex(void *priv, int width, int height)
854 {
855 struct pass_state *pass = priv;
856
857 return get_fbo(pass, width, height, NULL, 4);
858 }
859
860 // Returns if any hook was applied (even if there were errors)
pass_hook(struct pass_state * pass,struct img * img,enum pl_hook_stage stage)861 static bool pass_hook(struct pass_state *pass, struct img *img,
862 enum pl_hook_stage stage)
863 {
864 const struct pl_render_params *params = pass->params;
865 pl_renderer rr = pass->rr;
866 if (!rr->fbofmt[4] || rr->disable_hooks)
867 return false;
868
869 bool ret = false;
870
871 for (int n = 0; n < params->num_hooks; n++) {
872 const struct pl_hook *hook = params->hooks[n];
873 if (!(hook->stages & stage))
874 continue;
875
876 PL_TRACE(rr, "Dispatching hook %d stage 0x%x", n, stage);
877 struct pl_hook_params hparams = {
878 .gpu = rr->gpu,
879 .dispatch = rr->dp,
880 .get_tex = get_hook_tex,
881 .priv = pass,
882 .stage = stage,
883 .rect = img->rect,
884 .repr = img->repr,
885 .color = img->color,
886 .components = img->comps,
887 .src_rect = pass->ref_rect,
888 .dst_rect = pass->dst_rect,
889 };
890
891 // TODO: Add some sort of `test` API function to the hooks that allows
892 // us to skip having to touch the `img` state at all for no-ops
893
894 switch (hook->input) {
895 case PL_HOOK_SIG_NONE:
896 break;
897
898 case PL_HOOK_SIG_TEX: {
899 hparams.tex = img_tex(pass, img);
900 if (!hparams.tex) {
901 PL_ERR(rr, "Failed dispatching shader prior to hook!");
902 goto error;
903 }
904 break;
905 }
906
907 case PL_HOOK_SIG_COLOR:
908 hparams.sh = img_sh(pass, img);
909 break;
910
911 case PL_HOOK_SIG_COUNT:
912 pl_unreachable();
913 }
914
915 struct pl_hook_res res = hook->hook(hook->priv, &hparams);
916 if (res.failed) {
917 PL_ERR(rr, "Failed executing hook, disabling");
918 goto error;
919 }
920
921 bool resizable = pl_hook_stage_resizable(stage);
922 switch (res.output) {
923 case PL_HOOK_SIG_NONE:
924 break;
925
926 case PL_HOOK_SIG_TEX:
927 if (!resizable) {
928 if (res.tex->params.w != img->w ||
929 res.tex->params.h != img->h ||
930 !pl_rect2d_eq(res.rect, img->rect))
931 {
932 PL_ERR(rr, "User hook tried resizing non-resizable stage!");
933 goto error;
934 }
935 }
936
937 *img = (struct img) {
938 .tex = res.tex,
939 .repr = res.repr,
940 .color = res.color,
941 .comps = res.components,
942 .rect = res.rect,
943 .w = res.tex->params.w,
944 .h = res.tex->params.h,
945 };
946 break;
947
948 case PL_HOOK_SIG_COLOR:
949 if (!resizable) {
950 if (res.sh->output_w != img->w ||
951 res.sh->output_h != img->h ||
952 !pl_rect2d_eq(res.rect, img->rect))
953 {
954 PL_ERR(rr, "User hook tried resizing non-resizable stage!");
955 goto error;
956 }
957 }
958
959 *img = (struct img) {
960 .sh = res.sh,
961 .repr = res.repr,
962 .color = res.color,
963 .comps = res.components,
964 .rect = res.rect,
965 .w = res.sh->output_w,
966 .h = res.sh->output_h,
967 };
968 break;
969
970 case PL_HOOK_SIG_COUNT:
971 pl_unreachable();
972 }
973
974 // a hook was performed successfully
975 ret = true;
976 }
977
978 return ret;
979
980 error:
981 rr->disable_hooks = true;
982
983 // Make sure the state remains as valid as possible, even if the resulting
984 // shaders might end up nonsensical, to prevent segfaults
985 if (!img->tex && !img->sh)
986 img->sh = pl_dispatch_begin(rr->dp);
987 return ret;
988 }
989
990 // `deband_src` results
991 enum {
992 DEBAND_NOOP = 0, // no debanding was performing
993 DEBAND_NORMAL, // debanding was performed, the plane should still be scaled
994 DEBAND_SCALED, // debanding took care of scaling as well
995 };
996
deband_src(struct pass_state * pass,pl_shader psh,struct pl_sample_src * psrc)997 static int deband_src(struct pass_state *pass, pl_shader psh,
998 struct pl_sample_src *psrc)
999 {
1000 const struct pl_render_params *params = pass->params;
1001 const struct pl_frame *image = &pass->image;
1002 pl_renderer rr = pass->rr;
1003 if (rr->disable_debanding || !params->deband_params)
1004 return DEBAND_NOOP;
1005
1006 if (!(psrc->tex->params.format->caps & PL_FMT_CAP_LINEAR)) {
1007 PL_WARN(rr, "Debanding requires uploaded textures to be linearly "
1008 "sampleable (params.sample_mode = PL_TEX_SAMPLE_LINEAR)! "
1009 "Disabling debanding..");
1010 rr->disable_debanding = true;
1011 return DEBAND_NOOP;
1012 }
1013
1014 // The debanding shader can replace direct GPU sampling
1015 bool deband_scales = sample_src_info(pass, psrc).type == SAMPLER_DIRECT;
1016
1017 pl_shader sh = psh;
1018 struct pl_sample_src *src = psrc;
1019 struct pl_sample_src fixed;
1020 if (!deband_scales) {
1021 // Only sample/deband the relevant cut-out, but round it to the nearest
1022 // integer to avoid doing fractional scaling
1023 fixed = *src;
1024 fixed.rect.x0 = floorf(fixed.rect.x0);
1025 fixed.rect.y0 = floorf(fixed.rect.y0);
1026 fixed.rect.x1 = ceilf(fixed.rect.x1);
1027 fixed.rect.y1 = ceilf(fixed.rect.y1);
1028 fixed.new_w = pl_rect_w(fixed.rect);
1029 fixed.new_h = pl_rect_h(fixed.rect);
1030 src = &fixed;
1031
1032 if (fixed.new_w == psrc->new_w &&
1033 fixed.new_h == psrc->new_h &&
1034 pl_rect2d_eq(fixed.rect, psrc->rect))
1035 {
1036 // If there's nothing left to be done (i.e. we're already rendering
1037 // an exact integer crop without scaling), also skip the scalers
1038 deband_scales = true;
1039 } else {
1040 sh = pl_dispatch_begin_ex(rr->dp, true);
1041 }
1042 }
1043
1044 // Divide the deband grain scale by the effective current colorspace nominal
1045 // peak, to make sure the output intensity of the grain is as independent
1046 // of the source as possible, even though it happens this early in the
1047 // process (well before any linearization / output adaptation)
1048 struct pl_deband_params dparams = *params->deband_params;
1049 float scale = pl_color_transfer_nominal_peak(image->color.transfer)
1050 * image->color.sig_scale;
1051 dparams.grain /= scale;
1052
1053 pl_shader_deband(sh, src, &dparams);
1054
1055 if (deband_scales)
1056 return DEBAND_SCALED;
1057
1058 struct img img = {
1059 .sh = sh,
1060 .w = src->new_w,
1061 .h = src->new_h,
1062 .comps = src->components,
1063 };
1064
1065 pl_tex new = img_tex(pass, &img);
1066 if (!new) {
1067 PL_ERR(rr, "Failed dispatching debanding shader.. disabling debanding!");
1068 rr->disable_debanding = true;
1069 return DEBAND_NOOP;
1070 }
1071
1072 // Update the original pl_sample_src to point to the new texture
1073 psrc->tex = new;
1074 psrc->rect.x0 -= src->rect.x0;
1075 psrc->rect.y0 -= src->rect.y0;
1076 psrc->rect.x1 -= src->rect.x0;
1077 psrc->rect.y1 -= src->rect.y0;
1078 psrc->scale = 1.0;
1079 return DEBAND_NORMAL;
1080 }
1081
hdr_update_peak(struct pass_state * pass)1082 static void hdr_update_peak(struct pass_state *pass)
1083 {
1084 const struct pl_render_params *params = pass->params;
1085 pl_renderer rr = pass->rr;
1086 if (!params->peak_detect_params || !pl_color_space_is_hdr(pass->img.color))
1087 goto cleanup;
1088
1089 if (rr->disable_compute || rr->disable_peak_detect)
1090 goto cleanup;
1091
1092 float src_peak = pass->img.color.sig_peak * pass->img.color.sig_scale;
1093 float dst_peak = pass->target.color.sig_peak * pass->target.color.sig_scale;
1094 if (src_peak <= dst_peak + 1e-6)
1095 goto cleanup; // no adaptation needed
1096
1097 if (params->lut && params->lut_type == PL_LUT_CONVERSION)
1098 goto cleanup; // LUT handles tone mapping
1099
1100 if (!FBOFMT(4) && !params->allow_delayed_peak_detect) {
1101 PL_WARN(rr, "Disabling peak detection because "
1102 "`allow_delayed_peak_detect` is false, but lack of FBOs "
1103 "forces the result to be delayed.");
1104 rr->disable_peak_detect = true;
1105 goto cleanup;
1106 }
1107
1108 bool ok = pl_shader_detect_peak(img_sh(pass, &pass->img), pass->img.color,
1109 &rr->peak_detect_state,
1110 params->peak_detect_params);
1111 if (!ok) {
1112 PL_WARN(rr, "Failed creating HDR peak detection shader.. disabling");
1113 rr->disable_peak_detect = true;
1114 goto cleanup;
1115 }
1116
1117 return;
1118
1119 cleanup:
1120 // No peak detection required or supported, so clean up the state to avoid
1121 // confusing it with later frames where peak detection is enabled again
1122 pl_shader_obj_destroy(&rr->peak_detect_state);
1123 }
1124
1125 struct plane_state {
1126 enum plane_type type;
1127 struct pl_plane plane;
1128 struct img img; // for per-plane shaders
1129 };
1130
1131 static const char *plane_type_names[] = {
1132 [PLANE_INVALID] = "invalid",
1133 [PLANE_ALPHA] = "alpha",
1134 [PLANE_CHROMA] = "chroma",
1135 [PLANE_LUMA] = "luma",
1136 [PLANE_RGB] = "rgb",
1137 [PLANE_XYZ] = "xyz",
1138 };
1139
log_plane_info(pl_renderer rr,const struct plane_state * st)1140 static void log_plane_info(pl_renderer rr, const struct plane_state *st)
1141 {
1142 const struct pl_plane *plane = &st->plane;
1143 PL_TRACE(rr, " Type: %s", plane_type_names[st->type]);
1144
1145 switch (plane->components) {
1146 case 0:
1147 PL_TRACE(rr, " Components: (none)");
1148 break;
1149 case 1:
1150 PL_TRACE(rr, " Components: {%d}",
1151 plane->component_mapping[0]);
1152 break;
1153 case 2:
1154 PL_TRACE(rr, " Components: {%d %d}",
1155 plane->component_mapping[0],
1156 plane->component_mapping[1]);
1157 break;
1158 case 3:
1159 PL_TRACE(rr, " Components: {%d %d %d}",
1160 plane->component_mapping[0],
1161 plane->component_mapping[1],
1162 plane->component_mapping[2]);
1163 break;
1164 case 4:
1165 PL_TRACE(rr, " Components: {%d %d %d %d}",
1166 plane->component_mapping[0],
1167 plane->component_mapping[1],
1168 plane->component_mapping[2],
1169 plane->component_mapping[3]);
1170 break;
1171 }
1172
1173 PL_TRACE(rr, " Rect: {%f %f} -> {%f %f}",
1174 st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1);
1175
1176 PL_TRACE(rr, " Bits: %d (used) / %d (sampled), shift %d",
1177 st->img.repr.bits.color_depth,
1178 st->img.repr.bits.sample_depth,
1179 st->img.repr.bits.bit_shift);
1180 }
1181
1182 // Returns true if grain was applied
plane_av1_grain(struct pass_state * pass,int plane_idx,struct plane_state * st,const struct plane_state * ref,const struct pl_frame * image)1183 static bool plane_av1_grain(struct pass_state *pass, int plane_idx,
1184 struct plane_state *st,
1185 const struct plane_state *ref,
1186 const struct pl_frame *image)
1187 {
1188 const struct pl_render_params *params = pass->params;
1189 pl_renderer rr = pass->rr;
1190 if (rr->disable_grain)
1191 return false;
1192
1193 struct img *img = &st->img;
1194 struct pl_plane *plane = &st->plane;
1195 struct pl_color_repr repr = st->img.repr;
1196 struct pl_av1_grain_params grain_params = {
1197 .data = image->av1_grain,
1198 .luma_tex = ref->plane.texture,
1199 .repr = &repr,
1200 .components = plane->components,
1201 };
1202
1203 for (int c = 0; c < plane->components; c++)
1204 grain_params.component_mapping[c] = plane->component_mapping[c];
1205
1206 for (int c = 0; c < ref->plane.components; c++) {
1207 if (ref->plane.component_mapping[c] == PL_CHANNEL_Y)
1208 grain_params.luma_comp = c;
1209 }
1210
1211 if (!pl_needs_av1_grain(&grain_params))
1212 return false;
1213
1214 if (!FBOFMT(plane->components)) {
1215 PL_ERR(rr, "AV1 grain required but no renderable format available.. "
1216 "disabling!");
1217 rr->disable_grain = true;
1218 return false;
1219 }
1220
1221 grain_params.tex = img_tex(pass, img);
1222 if (!grain_params.tex)
1223 return false;
1224
1225 img->sh = pl_dispatch_begin_ex(rr->dp, true);
1226 if (!pl_shader_av1_grain(img->sh, &rr->grain_state[plane_idx], &grain_params)) {
1227 pl_dispatch_abort(rr->dp, &img->sh);
1228 rr->disable_grain = true;
1229 return false;
1230 }
1231
1232 img->tex = NULL;
1233 if (!img_tex(pass, img)) {
1234 PL_ERR(rr, "Failed applying AV1 grain.. disabling!");
1235 pl_dispatch_abort(rr->dp, &img->sh);
1236 img->tex = grain_params.tex;
1237 rr->disable_grain = true;
1238 return false;
1239 }
1240
1241 img->repr = repr;
1242 return true;
1243 }
1244
1245 static const enum pl_hook_stage plane_hook_stages[] = {
1246 [PLANE_ALPHA] = PL_HOOK_ALPHA_INPUT,
1247 [PLANE_CHROMA] = PL_HOOK_CHROMA_INPUT,
1248 [PLANE_LUMA] = PL_HOOK_LUMA_INPUT,
1249 [PLANE_RGB] = PL_HOOK_RGB_INPUT,
1250 [PLANE_XYZ] = PL_HOOK_XYZ_INPUT,
1251 };
1252
guess_frame_lut_type(const struct pl_frame * frame,bool reversed)1253 static enum pl_lut_type guess_frame_lut_type(const struct pl_frame *frame,
1254 bool reversed)
1255 {
1256 if (!frame->lut)
1257 return PL_LUT_UNKNOWN;
1258 if (frame->lut_type)
1259 return frame->lut_type;
1260
1261 enum pl_color_system sys_in = frame->lut->repr_in.sys;
1262 enum pl_color_system sys_out = frame->lut->repr_out.sys;
1263 if (reversed)
1264 PL_SWAP(sys_in, sys_out);
1265
1266 if (sys_in == PL_COLOR_SYSTEM_RGB && sys_out == sys_in)
1267 return PL_LUT_NORMALIZED;
1268
1269 if (sys_in == frame->repr.sys && sys_out == PL_COLOR_SYSTEM_RGB)
1270 return PL_LUT_CONVERSION;
1271
1272 // Unknown, just fall back to the default
1273 return PL_LUT_NATIVE;
1274 }
1275
merge_fmt(pl_renderer rr,const struct img * a,const struct img * b)1276 static pl_fmt merge_fmt(pl_renderer rr,
1277 const struct img *a, const struct img *b)
1278 {
1279 pl_fmt fmta = a->tex ? a->tex->params.format : a->fmt;
1280 pl_fmt fmtb = b->tex->params.format;
1281 pl_assert(fmta && fmtb);
1282 if (fmta->type != fmtb->type)
1283 return NULL;
1284
1285 int num_comps = PL_MIN(4, a->comps + b->comps);
1286 int min_depth = PL_MAX(a->repr.bits.sample_depth, b->repr.bits.sample_depth);
1287
1288 // Only return formats that support all relevant caps of both formats
1289 const enum pl_fmt_caps mask = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR;
1290 enum pl_fmt_caps req_caps = (fmta->caps & mask) | (fmtb->caps & mask);
1291
1292 return pl_find_fmt(rr->gpu, fmta->type, num_comps, min_depth, 0, req_caps);
1293 }
1294
1295 // Applies a series of rough heuristics to figure out whether we expect any
1296 // performance gains from plane merging. This is basically a series of checks
1297 // for operations that we *know* benefit from merged planes
want_merge(struct pass_state * pass,const struct plane_state * st,const struct plane_state * ref)1298 static bool want_merge(struct pass_state *pass,
1299 const struct plane_state *st,
1300 const struct plane_state *ref)
1301 {
1302 const struct pl_render_params *params = pass->params;
1303 const pl_renderer rr = pass->rr;
1304 if (!rr->fbofmt[4])
1305 return false;
1306
1307 // Debanding
1308 if (!rr->disable_debanding && params->deband_params)
1309 return true;
1310
1311 // Other plane hooks, which are generally nontrivial
1312 enum pl_hook_stage stage = plane_hook_stages[st->type];
1313 for (int i = 0; i < params->num_hooks; i++) {
1314 if (params->hooks[i]->stages & stage)
1315 return true;
1316 }
1317
1318 // Non-trivial scaling
1319 struct pl_sample_src src = {
1320 .new_w = ref->img.w,
1321 .new_h = ref->img.h,
1322 .rect = {
1323 .x1 = st->img.w,
1324 .y1 = st->img.h,
1325 },
1326 };
1327
1328 struct sampler_info info = sample_src_info(pass, &src);
1329 if (info.type == SAMPLER_COMPLEX)
1330 return true;
1331
1332 // AV1 grain synthesis, can be merged for compatible channels, saving on
1333 // redundant sampling of the grain/offset textures
1334 struct pl_av1_grain_params grain_params = {
1335 .data = pass->image.av1_grain,
1336 .repr = (struct pl_color_repr *) &st->img.repr,
1337 .components = st->plane.components,
1338 };
1339
1340 for (int c = 0; c < st->plane.components; c++)
1341 grain_params.component_mapping[c] = st->plane.component_mapping[c];
1342
1343 if (!rr->disable_grain && pl_needs_av1_grain(&grain_params))
1344 return true;
1345
1346 return false;
1347 }
1348
1349 // This scales and merges all of the source images, and initializes pass->img.
pass_read_image(struct pass_state * pass)1350 static bool pass_read_image(struct pass_state *pass)
1351 {
1352 const struct pl_render_params *params = pass->params;
1353 struct pl_frame *image = &pass->image;
1354 pl_renderer rr = pass->rr;
1355
1356 struct plane_state planes[4];
1357 struct plane_state *ref = &planes[pass->src_ref];
1358
1359 for (int i = 0; i < image->num_planes; i++) {
1360 planes[i] = (struct plane_state) {
1361 .type = pass->src_type[i],
1362 .plane = image->planes[i],
1363 .img = {
1364 .w = image->planes[i].texture->params.w,
1365 .h = image->planes[i].texture->params.h,
1366 .tex = image->planes[i].texture,
1367 .repr = image->repr,
1368 .color = image->color,
1369 .comps = image->planes[i].components,
1370 },
1371 };
1372 }
1373
1374 // Original ref texture, even after preprocessing
1375 pl_tex ref_tex = ref->plane.texture;
1376
1377 // Merge all compatible planes into 'combined' shaders
1378 for (int i = 0; i < image->num_planes; i++) {
1379 struct plane_state *sti = &planes[i];
1380 if (!sti->type)
1381 continue;
1382 if (!want_merge(pass, sti, ref))
1383 continue;
1384
1385 for (int j = i+1; j < image->num_planes; j++) {
1386 struct plane_state *stj = &planes[j];
1387 bool merge = sti->type == stj->type &&
1388 sti->img.w == stj->img.w &&
1389 sti->img.h == stj->img.h &&
1390 sti->plane.shift_x == stj->plane.shift_x &&
1391 sti->plane.shift_y == stj->plane.shift_y;
1392 if (!merge)
1393 continue;
1394
1395 pl_fmt fmt = merge_fmt(rr, &sti->img, &stj->img);
1396 if (!fmt)
1397 continue;
1398
1399 PL_TRACE(rr, "Merging plane %d into plane %d", j, i);
1400 pl_shader sh = sti->img.sh;
1401 if (!sh) {
1402 sh = sti->img.sh = pl_dispatch_begin_ex(pass->rr->dp, true);
1403 sh_describe(sh, "merging planes");
1404 GLSL("vec4 tmp; \n");
1405 pl_shader_sample_direct(sh, &(struct pl_sample_src) {
1406 .tex = sti->img.tex,
1407 });
1408 sti->img.tex = NULL;
1409 }
1410
1411 pl_shader psh = pl_dispatch_begin_ex(pass->rr->dp, true);
1412 pl_shader_sample_direct(psh, &(struct pl_sample_src) {
1413 .tex = stj->img.tex,
1414 });
1415
1416 ident_t sub = sh_subpass(sh, psh);
1417 pl_dispatch_abort(rr->dp, &psh);
1418 if (!sub)
1419 break; // skip merging
1420
1421 GLSL("tmp = %s(); \n", sub);
1422 for (int jc = 0; jc < stj->img.comps; jc++) {
1423 int map = stj->plane.component_mapping[jc];
1424 if (!map)
1425 continue;
1426 int ic = sti->img.comps++;
1427 pl_assert(ic < 4);
1428 GLSL("color[%d] = tmp[%d]; \n", ic, jc);
1429 sti->plane.components = sti->img.comps;
1430 sti->plane.component_mapping[ic] = map;
1431 }
1432
1433 sti->img.fmt = fmt;
1434 *stj = (struct plane_state) {0};
1435 }
1436
1437 if (!img_tex(pass, &sti->img)) {
1438 PL_ERR(rr, "Failed dispatching plane merging shader, disabling FBOs!");
1439 memset(rr->fbofmt, 0, sizeof(rr->fbofmt));
1440 return false;
1441 }
1442 }
1443
1444 // Compute the sampling rc of each plane
1445 for (int i = 0; i < image->num_planes; i++) {
1446 struct plane_state *st = &planes[i];
1447 if (!st->type)
1448 continue;
1449
1450 float rx = (float) ref_tex->params.w / st->plane.texture->params.w,
1451 ry = (float) ref_tex->params.h / st->plane.texture->params.h;
1452
1453 // Only accept integer scaling ratios. This accounts for the fact that
1454 // fractionally subsampled planes get rounded up to the nearest integer
1455 // size, which we want to discard.
1456 float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
1457 rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
1458
1459 float sx = st->plane.shift_x,
1460 sy = st->plane.shift_y;
1461
1462 st->img.rect = (struct pl_rect2df) {
1463 .x0 = (image->crop.x0 - sx) / rrx,
1464 .y0 = (image->crop.y0 - sy) / rry,
1465 .x1 = (image->crop.x1 - sx) / rrx,
1466 .y1 = (image->crop.y1 - sy) / rry,
1467 };
1468
1469 PL_TRACE(rr, "Plane %d:", i);
1470 log_plane_info(rr, st);
1471
1472 // Perform AV1 grain synthesis if needed. Do this first because it
1473 // requires unmodified plane sizes, and also because it's closer to the
1474 // intent of the spec (which is to apply synthesis effectively during
1475 // decoding)
1476
1477 if (plane_av1_grain(pass, i, st, ref, image)) {
1478 PL_TRACE(rr, "After AV1 grain:");
1479 log_plane_info(rr, st);
1480 }
1481
1482 if (pass_hook(pass, &st->img, plane_hook_stages[st->type])) {
1483 PL_TRACE(rr, "After user hooks:");
1484 log_plane_info(rr, st);
1485 }
1486
1487 // Update the conceptual width/height after applying plane shaders
1488 st->img.w = roundf(pl_rect_w(st->img.rect));
1489 st->img.h = roundf(pl_rect_h(st->img.rect));
1490 }
1491
1492 pl_shader sh = pl_dispatch_begin_ex(rr->dp, true);
1493 sh_require(sh, PL_SHADER_SIG_NONE, 0, 0);
1494
1495 // Initialize the color to black
1496 const char *neutral = "0.0, 0.0, 0.0";
1497 if (pl_color_system_is_ycbcr_like(image->repr.sys))
1498 neutral = "0.0, 0.5, 0.5";
1499
1500 GLSL("vec4 color = vec4(%s, 1.0); \n"
1501 "// pass_read_image \n"
1502 "{ \n"
1503 "vec4 tmp; \n",
1504 neutral);
1505
1506 // For quality reasons, explicitly drop subpixel offsets from the ref rect
1507 // and re-add them as part of `pass->img.rect`, always rounding towards 0.
1508 // Additionally, drop anamorphic subpixel mismatches.
1509 float off_x = ref->img.rect.x0 - truncf(ref->img.rect.x0),
1510 off_y = ref->img.rect.y0 - truncf(ref->img.rect.y0),
1511 stretch_x = roundf(pl_rect_w(ref->img.rect)) / pl_rect_w(ref->img.rect),
1512 stretch_y = roundf(pl_rect_h(ref->img.rect)) / pl_rect_h(ref->img.rect);
1513
1514 bool has_alpha = false;
1515 for (int i = 0; i < image->num_planes; i++) {
1516 struct plane_state *st = &planes[i];
1517 const struct pl_plane *plane = &st->plane;
1518 if (!st->type)
1519 continue;
1520
1521 float scale_x = pl_rect_w(st->img.rect) / pl_rect_w(ref->img.rect),
1522 scale_y = pl_rect_h(st->img.rect) / pl_rect_h(ref->img.rect),
1523 base_x = st->img.rect.x0 - scale_x * off_x,
1524 base_y = st->img.rect.y0 - scale_y * off_y;
1525
1526 struct pl_sample_src src = {
1527 .tex = st->img.tex,
1528 .components = plane->components,
1529 .address_mode = plane->address_mode,
1530 .scale = pl_color_repr_normalize(&st->img.repr),
1531 .new_w = ref->img.w,
1532 .new_h = ref->img.h,
1533 .rect = {
1534 base_x,
1535 base_y,
1536 base_x + stretch_x * pl_rect_w(st->img.rect),
1537 base_y + stretch_y * pl_rect_h(st->img.rect),
1538 },
1539 };
1540
1541 PL_TRACE(rr, "Aligning plane %d: {%f %f %f %f} -> {%f %f %f %f}",
1542 i, st->img.rect.x0, st->img.rect.y0,
1543 st->img.rect.x1, st->img.rect.y1,
1544 src.rect.x0, src.rect.y0,
1545 src.rect.x1, src.rect.y1);
1546
1547 pl_shader psh = pl_dispatch_begin_ex(rr->dp, true);
1548 if (deband_src(pass, psh, &src) != DEBAND_SCALED)
1549 dispatch_sampler(pass, psh, &rr->samplers_src[i], false, &src);
1550
1551 ident_t sub = sh_subpass(sh, psh);
1552 if (!sub) {
1553 // Can't merge shaders, so instead force FBO indirection here
1554 struct img inter_img = {
1555 .sh = psh,
1556 .w = ref->img.w,
1557 .h = ref->img.h,
1558 .comps = src.components,
1559 };
1560
1561 pl_tex inter_tex = img_tex(pass, &inter_img);
1562 if (!inter_tex) {
1563 PL_ERR(rr, "Failed dispatching subpass for plane.. disabling "
1564 "all plane shaders");
1565 rr->disable_sampling = true;
1566 rr->disable_debanding = true;
1567 rr->disable_grain = true;
1568 pl_dispatch_abort(rr->dp, &sh);
1569 return false;
1570 }
1571
1572 psh = pl_dispatch_begin_ex(rr->dp, true);
1573 pl_shader_sample_direct(psh, &(struct pl_sample_src) {
1574 .tex = inter_tex,
1575 });
1576
1577 sub = sh_subpass(sh, psh);
1578 pl_assert(sub);
1579 }
1580
1581 GLSL("tmp = %s();\n", sub);
1582 for (int c = 0; c < src.components; c++) {
1583 if (plane->component_mapping[c] < 0)
1584 continue;
1585 GLSL("color[%d] = tmp[%d];\n", plane->component_mapping[c], c);
1586
1587 has_alpha |= plane->component_mapping[c] == PL_CHANNEL_A;
1588 }
1589
1590 // we don't need it anymore
1591 pl_dispatch_abort(rr->dp, &psh);
1592 }
1593
1594 GLSL("}\n");
1595
1596 pass->img = (struct img) {
1597 .sh = sh,
1598 .w = ref->img.w,
1599 .h = ref->img.h,
1600 .repr = ref->img.repr,
1601 .color = image->color,
1602 .comps = has_alpha ? 4 : 3,
1603 .rect = {
1604 off_x,
1605 off_y,
1606 off_x + pl_rect_w(ref->img.rect) / stretch_x,
1607 off_y + pl_rect_h(ref->img.rect) / stretch_y,
1608 },
1609 };
1610
1611 // Update the reference rect to our adjusted image coordinates
1612 pass->ref_rect = pass->img.rect;
1613
1614 pass_hook(pass, &pass->img, PL_HOOK_NATIVE);
1615
1616 // Apply LUT logic and colorspace conversion
1617 enum pl_lut_type lut_type = guess_frame_lut_type(image, false);
1618 sh = img_sh(pass, &pass->img);
1619 bool needs_conversion = true;
1620
1621 if (lut_type == PL_LUT_NATIVE || lut_type == PL_LUT_CONVERSION) {
1622 // Fix bit depth normalization before applying LUT
1623 float scale = pl_color_repr_normalize(&pass->img.repr);
1624 GLSL("color *= vec4(%s); \n", SH_FLOAT(scale));
1625 pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]);
1626
1627 if (lut_type == PL_LUT_CONVERSION) {
1628 pass->img.repr.sys = PL_COLOR_SYSTEM_RGB;
1629 pass->img.repr.levels = PL_COLOR_LEVELS_FULL;
1630 needs_conversion = false;
1631 }
1632 }
1633
1634 if (needs_conversion)
1635 pl_shader_decode_color(sh, &pass->img.repr, params->color_adjustment);
1636 if (lut_type == PL_LUT_NORMALIZED)
1637 pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]);
1638
1639 pass_hook(pass, &pass->img, PL_HOOK_RGB);
1640 sh = NULL;
1641
1642 // HDR peak detection, do this as early as possible
1643 hdr_update_peak(pass);
1644 return true;
1645 }
1646
pass_scale_main(struct pass_state * pass)1647 static bool pass_scale_main(struct pass_state *pass)
1648 {
1649 const struct pl_render_params *params = pass->params;
1650 pl_renderer rr = pass->rr;
1651
1652 if (!FBOFMT(pass->img.comps)) {
1653 PL_TRACE(rr, "Skipping main scaler (no FBOs)");
1654 return true;
1655 }
1656
1657 struct img *img = &pass->img;
1658 struct pl_sample_src src = {
1659 .components = img->comps,
1660 .new_w = abs(pl_rect_w(pass->dst_rect)),
1661 .new_h = abs(pl_rect_h(pass->dst_rect)),
1662 .rect = img->rect,
1663 };
1664
1665 const struct pl_frame *image = &pass->image;
1666 bool need_fbo = image->num_overlays > 0;
1667 need_fbo |= rr->peak_detect_state && !params->allow_delayed_peak_detect;
1668
1669 // Force FBO indirection if this shader is non-resizable
1670 int out_w, out_h;
1671 if (img->sh && pl_shader_output_size(img->sh, &out_w, &out_h))
1672 need_fbo |= out_w != src.new_w || out_h != src.new_h;
1673
1674 struct sampler_info info = sample_src_info(pass, &src);
1675 bool use_sigmoid = info.dir == SAMPLER_UP && params->sigmoid_params;
1676 bool use_linear = use_sigmoid || info.dir == SAMPLER_DOWN;
1677
1678 // We need to enable the full rendering pipeline if there are any user
1679 // shaders / hooks that might depend on it.
1680 uint64_t scaling_hooks = PL_HOOK_PRE_OVERLAY | PL_HOOK_PRE_KERNEL |
1681 PL_HOOK_POST_KERNEL;
1682 uint64_t linear_hooks = PL_HOOK_LINEAR | PL_HOOK_SIGMOID;
1683
1684 for (int i = 0; i < params->num_hooks; i++) {
1685 if (params->hooks[i]->stages & (scaling_hooks | linear_hooks)) {
1686 need_fbo = true;
1687 if (params->hooks[i]->stages & linear_hooks)
1688 use_linear = true;
1689 if (params->hooks[i]->stages & PL_HOOK_SIGMOID)
1690 use_sigmoid = true;
1691 }
1692 }
1693
1694 if (info.dir == SAMPLER_NOOP && !need_fbo) {
1695 pl_assert(src.new_w == img->w && src.new_h == img->h);
1696 PL_TRACE(rr, "Skipping main scaler (would be no-op)");
1697 return true;
1698 }
1699
1700 if (info.type == SAMPLER_DIRECT && !need_fbo) {
1701 img->w = src.new_w;
1702 img->h = src.new_h;
1703 PL_TRACE(rr, "Skipping main scaler (free sampling)");
1704 return true;
1705 }
1706
1707 // Hard-disable both sigmoidization and linearization when required
1708 if (params->disable_linear_scaling || rr->disable_linear_sdr)
1709 use_sigmoid = use_linear = false;
1710
1711 // Avoid sigmoidization for HDR content because it clips to [0,1]
1712 if (pl_color_transfer_is_hdr(img->color.transfer)) {
1713 use_sigmoid = false;
1714 // Also disable linearization if necessary
1715 if (rr->disable_linear_hdr)
1716 use_linear = false;
1717 }
1718
1719 if (use_linear) {
1720 pl_shader_linearize(img_sh(pass, img), img->color);
1721 img->color.transfer = PL_COLOR_TRC_LINEAR;
1722 pass_hook(pass, img, PL_HOOK_LINEAR);
1723 }
1724
1725 if (use_sigmoid) {
1726 pl_shader_sigmoidize(img_sh(pass, img), params->sigmoid_params);
1727 pass_hook(pass, img, PL_HOOK_SIGMOID);
1728 }
1729
1730 pass_hook(pass, img, PL_HOOK_PRE_OVERLAY);
1731
1732 img->tex = img_tex(pass, img);
1733 if (!img->tex)
1734 return false;
1735
1736 // Draw overlay on top of the intermediate image if needed, accounting
1737 // for possible stretching needed due to mismatch between the ref and src
1738 struct pl_transform2x2 tf = pl_transform2x2_identity;
1739 if (!pl_rect2d_eq(img->rect, image->crop)) {
1740 float rx = pl_rect_w(img->rect) / pl_rect_w(image->crop),
1741 ry = pl_rect_w(img->rect) / pl_rect_w(image->crop);
1742
1743 tf = (struct pl_transform2x2) {
1744 .mat = {{{ rx, 0.0 }, { 0.0, ry }}},
1745 .c = {
1746 img->rect.x0 - image->crop.x0 * rx,
1747 img->rect.y0 - image->crop.y0 * ry
1748 },
1749 };
1750 }
1751
1752 draw_overlays(pass, img->tex, img->comps, NULL, image->overlays,
1753 image->num_overlays, img->color, img->repr, use_sigmoid, &tf);
1754
1755 pass_hook(pass, img, PL_HOOK_PRE_KERNEL);
1756
1757 src.tex = img_tex(pass, img);
1758 pl_shader sh = pl_dispatch_begin_ex(rr->dp, true);
1759 dispatch_sampler(pass, sh, &rr->sampler_main, false, &src);
1760 *img = (struct img) {
1761 .sh = sh,
1762 .w = src.new_w,
1763 .h = src.new_h,
1764 .repr = img->repr,
1765 .rect = { 0, 0, src.new_w, src.new_h },
1766 .color = img->color,
1767 .comps = img->comps,
1768 };
1769
1770 pass_hook(pass, img, PL_HOOK_POST_KERNEL);
1771
1772 if (use_sigmoid)
1773 pl_shader_unsigmoidize(img_sh(pass, img), params->sigmoid_params);
1774
1775 pass_hook(pass, img, PL_HOOK_SCALED);
1776 return true;
1777 }
1778
1779 #define CLEAR_COL(params) \
1780 (float[4]) { \
1781 (params)->background_color[0], \
1782 (params)->background_color[1], \
1783 (params)->background_color[2], \
1784 1.0 - (params)->background_transparency, \
1785 }
1786
pass_output_target(struct pass_state * pass)1787 static bool pass_output_target(struct pass_state *pass)
1788 {
1789 const struct pl_render_params *params = pass->params;
1790 const struct pl_frame *image = &pass->image;
1791 const struct pl_frame *target = &pass->target;
1792 pl_renderer rr = pass->rr;
1793
1794 struct img *img = &pass->img;
1795 pl_shader sh = img_sh(pass, img);
1796
1797 // Color management
1798 bool prelinearized = false;
1799 bool need_conversion = true;
1800 assert(image->color.primaries == img->color.primaries);
1801 assert(image->color.light == img->color.light);
1802 if (img->color.transfer == PL_COLOR_TRC_LINEAR)
1803 prelinearized = true;
1804
1805 bool need_icc = !params->ignore_icc_profiles &&
1806 (image->profile.data || target->profile.data) &&
1807 !pl_icc_profile_equal(&image->profile, &target->profile);
1808
1809 if (params->force_icc_lut || params->force_3dlut)
1810 need_icc |= !pl_color_space_equal(&image->color, &target->color);
1811 need_icc &= !rr->disable_icc;
1812
1813 if (params->lut) {
1814 struct pl_color_space lut_in = params->lut->color_in;
1815 struct pl_color_space lut_out = params->lut->color_out;
1816 switch (params->lut_type) {
1817 case PL_LUT_UNKNOWN:
1818 case PL_LUT_NATIVE:
1819 pl_color_space_merge(&lut_in, &image->color);
1820 pl_color_space_merge(&lut_out, &image->color);
1821 break;
1822 case PL_LUT_CONVERSION:
1823 pl_color_space_merge(&lut_in, &image->color);
1824 pl_color_space_merge(&lut_out, &target->color);
1825 // Conversion LUT the highest priority
1826 need_icc = false;
1827 need_conversion = false;
1828 break;
1829 case PL_LUT_NORMALIZED:
1830 if (!prelinearized) {
1831 // PL_LUT_NORMALIZED wants linear input data
1832 pl_shader_linearize(sh, img->color);
1833 img->color.transfer = PL_COLOR_TRC_LINEAR;
1834 prelinearized = true;
1835 }
1836 pl_color_space_merge(&lut_in, &img->color);
1837 pl_color_space_merge(&lut_out, &img->color);
1838 break;
1839 }
1840
1841 pl_shader_color_map(sh, params->color_map_params, image->color, lut_in,
1842 NULL, prelinearized);
1843
1844 if (params->lut_type == PL_LUT_NORMALIZED) {
1845 GLSLF("color.rgb *= vec3(1.0/%s); \n",
1846 SH_FLOAT(pl_color_transfer_nominal_peak(lut_in.transfer)));
1847 }
1848
1849 pl_shader_custom_lut(sh, params->lut, &rr->lut_state[LUT_PARAMS]);
1850
1851 if (params->lut_type == PL_LUT_NORMALIZED) {
1852 GLSLF("color.rgb *= vec3(%s); \n",
1853 SH_FLOAT(pl_color_transfer_nominal_peak(lut_out.transfer)));
1854 }
1855
1856 if (params->lut_type != PL_LUT_CONVERSION) {
1857 pl_shader_color_map(sh, params->color_map_params, lut_out, img->color,
1858 NULL, false);
1859 }
1860 }
1861
1862 #ifdef PL_HAVE_LCMS
1863
1864 if (need_icc) {
1865 struct pl_icc_color_space src = {
1866 .color = image->color,
1867 .profile = image->profile,
1868 };
1869
1870 struct pl_icc_color_space dst = {
1871 .color = target->color,
1872 .profile = target->profile,
1873 };
1874
1875 if (params->ignore_icc_profiles)
1876 src.profile = dst.profile = (struct pl_icc_profile) {0};
1877
1878 struct pl_icc_result res;
1879 bool ok = pl_icc_update(sh, &src, &dst, &rr->icc_state, &res,
1880 PL_DEF(params->icc_params, params->lut3d_params));
1881 if (!ok) {
1882 rr->disable_icc = true;
1883 goto fallback;
1884 }
1885
1886 // current -> ICC in
1887 pl_shader_color_map(sh, params->color_map_params, image->color,
1888 res.src_color, &rr->peak_detect_state, prelinearized);
1889 // ICC in -> ICC out
1890 pl_icc_apply(sh, &rr->icc_state);
1891 // ICC out -> target
1892 pl_shader_color_map(sh, params->color_map_params, res.dst_color,
1893 target->color, NULL, false);
1894
1895 need_conversion = false;
1896 }
1897
1898 fallback:
1899
1900 #else // !PL_HAVE_LCMS
1901
1902 if (need_icc) {
1903 PL_WARN(rr, "An ICC profile was set, but libplacebo is built without "
1904 "support for LittleCMS! Disabling..");
1905 rr->disable_icc = true;
1906 }
1907
1908 #endif
1909
1910 if (need_conversion) {
1911 // current -> target
1912 pl_shader_color_map(sh, params->color_map_params, image->color,
1913 target->color, &rr->peak_detect_state, prelinearized);
1914 }
1915
1916 // Apply color blindness simulation if requested
1917 if (params->cone_params)
1918 pl_shader_cone_distort(sh, target->color, params->cone_params);
1919
1920 enum pl_lut_type lut_type = guess_frame_lut_type(target, true);
1921 if (lut_type == PL_LUT_NORMALIZED || lut_type == PL_LUT_CONVERSION)
1922 pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]);
1923
1924 // Apply the color scale separately, after encoding is done, to make sure
1925 // that the intermediate FBO (if any) has the correct precision.
1926 struct pl_color_repr repr = target->repr;
1927 float scale = pl_color_repr_normalize(&repr);
1928 if (lut_type != PL_LUT_CONVERSION)
1929 pl_shader_encode_color(sh, &repr);
1930 if (lut_type == PL_LUT_NATIVE)
1931 pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]);
1932 pass_hook(pass, img, PL_HOOK_OUTPUT);
1933 sh = NULL;
1934
1935 const struct pl_plane *ref = &target->planes[pass->dst_ref];
1936 bool flipped_x = pass->dst_rect.x1 < pass->dst_rect.x0,
1937 flipped_y = pass->dst_rect.y1 < pass->dst_rect.y0;
1938
1939 if (!params->skip_target_clearing && pl_frame_is_cropped(target))
1940 pl_frame_clear_rgba(rr->gpu, target, CLEAR_COL(params));
1941
1942 for (int p = 0; p < target->num_planes; p++) {
1943 const struct pl_plane *plane = &target->planes[p];
1944 float rx = (float) plane->texture->params.w / ref->texture->params.w,
1945 ry = (float) plane->texture->params.h / ref->texture->params.h;
1946
1947 // Only accept integer scaling ratios. This accounts for the fact
1948 // that fractionally subsampled planes get rounded up to the
1949 // nearest integer size, which we want to over-render.
1950 float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
1951 rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
1952 float sx = plane->shift_x, sy = plane->shift_y;
1953
1954 struct pl_rect2df dst_rectf = {
1955 .x0 = (pass->dst_rect.x0 - sx) * rrx,
1956 .y0 = (pass->dst_rect.y0 - sy) * rry,
1957 .x1 = (pass->dst_rect.x1 - sx) * rrx,
1958 .y1 = (pass->dst_rect.y1 - sy) * rry,
1959 };
1960
1961 // Normalize to make the math easier
1962 pl_rect2df_normalize(&dst_rectf);
1963
1964 // Round the output rect
1965 int rx0 = floorf(dst_rectf.x0), ry0 = floorf(dst_rectf.y0),
1966 rx1 = ceilf(dst_rectf.x1), ry1 = ceilf(dst_rectf.y1);
1967
1968 PL_TRACE(rr, "Subsampled target %d: {%f %f %f %f} -> {%d %d %d %d}",
1969 p, dst_rectf.x0, dst_rectf.y0,
1970 dst_rectf.x1, dst_rectf.y1,
1971 rx0, ry0, rx1, ry1);
1972
1973 if (target->num_planes > 1) {
1974
1975 // Planar input, so we need to sample from an intermediate FBO
1976 struct pl_sample_src src = {
1977 .tex = img_tex(pass, img),
1978 .new_w = rx1 - rx0,
1979 .new_h = ry1 - ry0,
1980 .rect = {
1981 .x0 = (rx0 - dst_rectf.x0) / rrx,
1982 .x1 = (rx1 - dst_rectf.x0) / rrx,
1983 .y0 = (ry0 - dst_rectf.y0) / rry,
1984 .y1 = (ry1 - dst_rectf.y0) / rry,
1985 },
1986 };
1987
1988 if (!src.tex) {
1989 PL_ERR(rr, "Output requires multiple planes, but FBOs are "
1990 "unavailable. This combination is unsupported.");
1991 return false;
1992 }
1993
1994 PL_TRACE(rr, "Sampling %dx%d img aligned from {%f %f %f %f}",
1995 pass->img.w, pass->img.h,
1996 src.rect.x0, src.rect.y0,
1997 src.rect.x1, src.rect.y1);
1998
1999 for (int c = 0; c < plane->components; c++) {
2000 if (plane->component_mapping[c] < 0)
2001 continue;
2002 src.component_mask |= 1 << plane->component_mapping[c];
2003 }
2004
2005 sh = pl_dispatch_begin(rr->dp);
2006 dispatch_sampler(pass, sh, &rr->samplers_dst[p],
2007 !plane->texture->params.storable, &src);
2008
2009 } else {
2010
2011 // Single plane, so we can directly re-use the img shader unless
2012 // it's incompatible with the FBO capabilities
2013 bool is_comp = pl_shader_is_compute(img_sh(pass, img));
2014 if (is_comp && !plane->texture->params.storable) {
2015 if (!img_tex(pass, img)) {
2016 PL_ERR(rr, "Rendering requires compute shaders, but output "
2017 "is not storable, and FBOs are unavailable. This "
2018 "combination is unsupported.");
2019 return false;
2020 }
2021 }
2022
2023 sh = img_sh(pass, img);
2024 img->sh = NULL;
2025
2026 }
2027
2028 GLSL("color *= vec4(1.0 / %s); \n", SH_FLOAT(scale));
2029 swizzle_color(sh, plane->components, plane->component_mapping, false);
2030
2031 if (params->dither_params) {
2032 // Ignore dithering for > 16-bit FBOs by default, since it makes
2033 // little sense to do so (and probably just adds errors)
2034 int depth = repr.bits.sample_depth;
2035 if (depth && (depth <= 16 || params->force_dither))
2036 pl_shader_dither(sh, depth, &rr->dither_state, params->dither_params);
2037 }
2038
2039 bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
2040 .shader = &sh,
2041 .target = plane->texture,
2042 .blend_params = params->blend_params,
2043 .rect = {
2044 .x0 = flipped_x ? rx1 : rx0,
2045 .y0 = flipped_y ? ry1 : ry0,
2046 .x1 = flipped_x ? rx0 : rx1,
2047 .y1 = flipped_y ? ry0 : ry1,
2048 },
2049 });
2050
2051 if (!ok)
2052 return false;
2053
2054 // Render any overlays, including overlays that need to be rendered
2055 // from the `image` itself, but which couldn't be rendered as
2056 // part of the intermediate scaling pass due to missing FBOs.
2057 if (image->num_overlays > 0 && !FBOFMT(img->comps)) {
2058 // The original image dimensions need to be scaled by the effective
2059 // end-to-end scaling ratio to compensate for the mismatch in
2060 // pixel coordinates between the image and target.
2061 float scale_x = pl_rect_w(dst_rectf) / pl_rect_w(image->crop),
2062 scale_y = pl_rect_h(dst_rectf) / pl_rect_h(image->crop);
2063
2064 struct pl_transform2x2 iscale = {
2065 .mat = {{{ scale_x, 0.0 }, { 0.0, scale_y }}},
2066 .c = {
2067 // If the image was rendered with an offset relative to the
2068 // target crop, we also need to shift the overlays.
2069 dst_rectf.x0 - image->crop.x0 * scale_x,
2070 dst_rectf.y0 - image->crop.y0 * scale_y,
2071 },
2072 };
2073
2074 draw_overlays(pass, plane->texture, plane->components,
2075 plane->component_mapping, image->overlays,
2076 image->num_overlays, target->color, target->repr,
2077 false, &iscale);
2078 }
2079
2080 struct pl_transform2x2 tscale = {
2081 .mat = {{{ rrx, 0.0 }, { 0.0, rry }}},
2082 .c = { -sx, -sy },
2083 };
2084
2085 draw_overlays(pass, plane->texture, plane->components,
2086 plane->component_mapping, target->overlays,
2087 target->num_overlays, target->color, target->repr,
2088 false, &tscale);
2089 }
2090
2091 *img = (struct img) {0};
2092 return true;
2093 }
2094
2095 #define require(expr) \
2096 do { \
2097 if (!(expr)) { \
2098 PL_ERR(rr, "Validation failed: %s (%s:%d)", \
2099 #expr, __FILE__, __LINE__); \
2100 return false; \
2101 } \
2102 } while (0)
2103
2104 #define validate_plane(plane, param) \
2105 do { \
2106 require((plane).texture); \
2107 require((plane).texture->params.param); \
2108 require((plane).components > 0 && (plane).components <= 4); \
2109 for (int c = 0; c < (plane).components; c++) { \
2110 require((plane).component_mapping[c] >= PL_CHANNEL_NONE && \
2111 (plane).component_mapping[c] <= PL_CHANNEL_A); \
2112 } \
2113 } while (0)
2114
2115 #define validate_overlay(overlay) \
2116 do { \
2117 require(!(overlay).tex ^ !(overlay).plane.texture); \
2118 if ((overlay).tex) { \
2119 require((overlay).tex->params.sampleable); \
2120 require((overlay).num_parts >= 0); \
2121 for (int n = 0; n < (overlay).num_parts; n++) { \
2122 const struct pl_overlay_part *p = &(overlay).parts[n]; \
2123 require(pl_rect_w(p->dst) && pl_rect_h(p->dst)); \
2124 } \
2125 } else { \
2126 require((overlay).num_parts == 0); \
2127 require((overlay).plane.texture->params.sampleable); \
2128 require(pl_rect_w((overlay).rect) && pl_rect_h((overlay).rect)); \
2129 } \
2130 } while (0)
2131
2132 // Perform some basic validity checks on incoming structs to help catch invalid
2133 // API usage. This is not an exhaustive check. In particular, enums are not
2134 // bounds checked. This is because most functions accepting enums already
2135 // abort() in the default case, and because it's not the intent of this check
2136 // to catch all instances of memory corruption - just common logic bugs.
validate_structs(pl_renderer rr,const struct pl_frame * image,const struct pl_frame * target)2137 static bool validate_structs(pl_renderer rr,
2138 const struct pl_frame *image,
2139 const struct pl_frame *target)
2140 {
2141 // Rendering to/from a frame with no planes is technically allowed, but so
2142 // pointless that it's more likely to be a user error worth catching.
2143 require(image->num_planes > 0 && image->num_planes <= PL_MAX_PLANES);
2144 require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES);
2145 for (int i = 0; i < image->num_planes; i++)
2146 validate_plane(image->planes[i], sampleable);
2147 for (int i = 0; i < target->num_planes; i++)
2148 validate_plane(target->planes[i], renderable);
2149
2150 float src_w = pl_rect_w(image->crop), src_h = pl_rect_h(image->crop);
2151 float dst_w = pl_rect_w(target->crop), dst_h = pl_rect_h(target->crop);
2152 require(!src_w == !src_h);
2153 require(!dst_w == !dst_h);
2154
2155 require(image->num_overlays >= 0);
2156 require(target->num_overlays >= 0);
2157 for (int i = 0; i < image->num_overlays; i++)
2158 validate_overlay(image->overlays[i]);
2159 for (int i = 0; i < target->num_overlays; i++)
2160 validate_overlay(target->overlays[i]);
2161
2162 return true;
2163 }
2164
detect_plane_type(const struct pl_plane * plane,const struct pl_color_repr * repr)2165 static inline enum plane_type detect_plane_type(const struct pl_plane *plane,
2166 const struct pl_color_repr *repr)
2167 {
2168 if (pl_color_system_is_ycbcr_like(repr->sys)) {
2169 int t = PLANE_INVALID;
2170 for (int c = 0; c < plane->components; c++) {
2171 switch (plane->component_mapping[c]) {
2172 case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue;
2173 case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue;
2174
2175 case PL_CHANNEL_CB:
2176 case PL_CHANNEL_CR:
2177 t = PL_MAX(t, PLANE_CHROMA);
2178 continue;
2179
2180 default: continue;
2181 }
2182 }
2183
2184 pl_assert(t);
2185 return t;
2186 }
2187
2188 // Extra test for exclusive / separated alpha plane
2189 if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A)
2190 return PLANE_ALPHA;
2191
2192 switch (repr->sys) {
2193 case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB
2194 case PL_COLOR_SYSTEM_RGB: return PLANE_RGB;
2195 case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ;
2196
2197 // For the switch completeness check
2198 case PL_COLOR_SYSTEM_BT_601:
2199 case PL_COLOR_SYSTEM_BT_709:
2200 case PL_COLOR_SYSTEM_SMPTE_240M:
2201 case PL_COLOR_SYSTEM_BT_2020_NC:
2202 case PL_COLOR_SYSTEM_BT_2020_C:
2203 case PL_COLOR_SYSTEM_BT_2100_PQ:
2204 case PL_COLOR_SYSTEM_BT_2100_HLG:
2205 case PL_COLOR_SYSTEM_YCGCO:
2206 case PL_COLOR_SYSTEM_COUNT:
2207 break;
2208 }
2209
2210 pl_unreachable();
2211 }
2212
default_rect(struct pl_rect2df * rc,const struct pl_rect2df * backup)2213 static inline void default_rect(struct pl_rect2df *rc,
2214 const struct pl_rect2df *backup)
2215 {
2216 if (!rc->x0 && !rc->y0 && !rc->x1 && !rc->y1)
2217 *rc = *backup;
2218 }
2219
fix_refs_and_rects(struct pass_state * pass)2220 static void fix_refs_and_rects(struct pass_state *pass)
2221 {
2222 struct pl_frame *image = &pass->image;
2223 struct pl_frame *target = &pass->target;
2224
2225 // Find the ref planes
2226 for (int i = 0; i < image->num_planes; i++) {
2227 pass->src_type[i] = detect_plane_type(&image->planes[i], &image->repr);
2228 switch (pass->src_type[i]) {
2229 case PLANE_RGB:
2230 case PLANE_LUMA:
2231 case PLANE_XYZ:
2232 pass->src_ref = i;
2233 break;
2234 case PLANE_CHROMA:
2235 case PLANE_ALPHA:
2236 break;
2237 case PLANE_INVALID:
2238 pl_unreachable();
2239 }
2240 }
2241
2242 for (int i = 0; i < target->num_planes; i++) {
2243 pass->dst_type[i] = detect_plane_type(&target->planes[i], &target->repr);
2244 switch (pass->dst_type[i]) {
2245 case PLANE_RGB:
2246 case PLANE_LUMA:
2247 case PLANE_XYZ:
2248 pass->dst_ref = i;
2249 break;
2250 case PLANE_CHROMA:
2251 case PLANE_ALPHA:
2252 break;
2253 case PLANE_INVALID:
2254 pl_unreachable();
2255 }
2256 }
2257
2258 // Fix the rendering rects
2259 struct pl_rect2df *src = &image->crop, *dst = &target->crop;
2260 pl_tex src_ref = pass->image.planes[pass->src_ref].texture;
2261 pl_tex dst_ref = pass->target.planes[pass->dst_ref].texture;
2262
2263 if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) {
2264 src->x1 = src_ref->params.w;
2265 src->y1 = src_ref->params.h;
2266 };
2267
2268 if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) {
2269 dst->x1 = dst_ref->params.w;
2270 dst->y1 = dst_ref->params.h;
2271 }
2272
2273 // Keep track of whether the end-to-end rendering is flipped
2274 bool flipped_x = (src->x0 > src->x1) != (dst->x0 > dst->x1),
2275 flipped_y = (src->y0 > src->y1) != (dst->y0 > dst->y1);
2276
2277 // Normalize both rects to make the math easier
2278 pl_rect2df_normalize(src);
2279 pl_rect2df_normalize(dst);
2280
2281 // Round the output rect and clip it to the framebuffer dimensions
2282 float rx0 = roundf(PL_MAX(dst->x0, 0.0)),
2283 ry0 = roundf(PL_MAX(dst->y0, 0.0)),
2284 rx1 = roundf(PL_MIN(dst->x1, dst_ref->params.w)),
2285 ry1 = roundf(PL_MIN(dst->y1, dst_ref->params.h));
2286
2287 // Adjust the src rect corresponding to the rounded crop
2288 float scale_x = pl_rect_w(*src) / pl_rect_w(*dst),
2289 scale_y = pl_rect_h(*src) / pl_rect_h(*dst),
2290 base_x = src->x0,
2291 base_y = src->y0;
2292
2293 src->x0 = base_x + (rx0 - dst->x0) * scale_x;
2294 src->x1 = base_x + (rx1 - dst->x0) * scale_x;
2295 src->y0 = base_y + (ry0 - dst->y0) * scale_y;
2296 src->y1 = base_y + (ry1 - dst->y0) * scale_y;
2297
2298 // Update dst_rect to the rounded values and re-apply flip if needed. We
2299 // always do this in the `dst` rather than the `src`` because this allows
2300 // e.g. polar sampling compute shaders to work.
2301 *dst = (struct pl_rect2df) {
2302 .x0 = flipped_x ? rx1 : rx0,
2303 .y0 = flipped_y ? ry1 : ry0,
2304 .x1 = flipped_x ? rx0 : rx1,
2305 .y1 = flipped_y ? ry0 : ry1,
2306 };
2307
2308 // Copies of the above, for convenience
2309 pass->ref_rect = *src;
2310 pass->dst_rect = (struct pl_rect2d) {
2311 dst->x0, dst->y0, dst->x1, dst->y1,
2312 };
2313 }
2314
frame_ref(const struct pl_frame * frame)2315 static pl_tex frame_ref(const struct pl_frame *frame)
2316 {
2317 pl_assert(frame->num_planes);
2318 for (int i = 0; i < frame->num_planes; i++) {
2319 switch (detect_plane_type(&frame->planes[i], &frame->repr)) {
2320 case PLANE_RGB:
2321 case PLANE_LUMA:
2322 case PLANE_XYZ:
2323 return frame->planes[i].texture;
2324 case PLANE_CHROMA:
2325 case PLANE_ALPHA:
2326 continue;
2327 case PLANE_INVALID:
2328 pl_unreachable();
2329 }
2330 }
2331
2332 return frame->planes[0].texture;
2333 }
2334
fix_color_space(struct pl_frame * frame)2335 static void fix_color_space(struct pl_frame *frame)
2336 {
2337 pl_tex tex = frame_ref(frame);
2338
2339 // If the primaries are not known, guess them based on the resolution
2340 if (!frame->color.primaries)
2341 frame->color.primaries = pl_color_primaries_guess(tex->params.w, tex->params.h);
2342
2343 pl_color_space_infer(&frame->color);
2344
2345 // For UNORM formats, we can infer the sampled bit depth from the texture
2346 // itself. This is ignored for other format types, because the logic
2347 // doesn't really work out for them anyways, and it's best not to do
2348 // anything too crazy unless the user provides explicit details.
2349 struct pl_bit_encoding *bits = &frame->repr.bits;
2350 if (!bits->sample_depth && tex->params.format->type == PL_FMT_UNORM) {
2351 // Just assume the first component's depth is canonical. This works in
2352 // practice, since for cases like rgb565 we want to use the lower depth
2353 // anyway. Plus, every format has at least one component.
2354 bits->sample_depth = tex->params.format->component_depth[0];
2355
2356 // If we don't know the color depth, assume it spans the full range of
2357 // the texture. Otherwise, clamp it to the texture depth.
2358 bits->color_depth = PL_DEF(bits->color_depth, bits->sample_depth);
2359 bits->color_depth = PL_MIN(bits->color_depth, bits->sample_depth);
2360
2361 // If the texture depth is higher than the known color depth, assume
2362 // the colors were left-shifted.
2363 bits->bit_shift += bits->sample_depth - bits->color_depth;
2364 }
2365 }
2366
pass_infer_state(struct pass_state * pass)2367 static bool pass_infer_state(struct pass_state *pass)
2368 {
2369 // Backwards compatibility hacks
2370 struct pl_frame *image = &pass->image;
2371 struct pl_frame *target = &pass->target;
2372 default_rect(&image->crop, &image->src_rect);
2373 default_rect(&target->crop, &target->dst_rect);
2374
2375 if (!target->num_planes && target->fbo) {
2376 target->num_planes = 1;
2377 target->planes[0] = (struct pl_plane) {
2378 .texture = target->fbo,
2379 .components = target->fbo->params.format->num_components,
2380 .component_mapping = {0, 1, 2, 3},
2381 };
2382 }
2383
2384 if (!validate_structs(pass->rr, image, target))
2385 return false;
2386
2387 fix_refs_and_rects(pass);
2388 fix_color_space(image);
2389
2390 // Infer the target color space info based on the image's
2391 pl_color_space_infer_ref(&target->color, &image->color);
2392 fix_color_space(target);
2393 return true;
2394 }
2395
draw_empty_overlays(pl_renderer rr,const struct pl_frame * ptarget,const struct pl_render_params * params)2396 static bool draw_empty_overlays(pl_renderer rr,
2397 const struct pl_frame *ptarget,
2398 const struct pl_render_params *params)
2399 {
2400 if (!params->skip_target_clearing)
2401 pl_frame_clear_rgba(rr->gpu, ptarget, CLEAR_COL(params));
2402
2403 if (!ptarget->num_overlays)
2404 return true;
2405
2406 struct pass_state pass = {
2407 .rr = rr,
2408 .params = params,
2409 .target = *ptarget,
2410 .info.stage = PL_RENDER_STAGE_FRAME,
2411 };
2412
2413 struct pl_frame *target = &pass.target;
2414 require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES);
2415 for (int i = 0; i < target->num_planes; i++)
2416 validate_plane(target->planes[i], renderable);
2417 require(target->num_overlays >= 0);
2418 for (int i = 0; i < target->num_overlays; i++)
2419 validate_overlay(target->overlays[i]);
2420 fix_color_space(target);
2421
2422 pl_dispatch_callback(rr->dp, &pass, info_callback);
2423 pl_dispatch_reset_frame(rr->dp);
2424
2425 pl_tex ref = frame_ref(target);
2426 for (int p = 0; p < target->num_planes; p++) {
2427 const struct pl_plane *plane = &target->planes[p];
2428 // Math replicated from `pass_output_target`
2429 float rx = (float) plane->texture->params.w / ref->params.w,
2430 ry = (float) plane->texture->params.h / ref->params.h;
2431 float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
2432 rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
2433 float sx = plane->shift_x, sy = plane->shift_y;
2434
2435 struct pl_transform2x2 tscale = {
2436 .mat = {{{ rrx, 0.0 }, { 0.0, rry }}},
2437 .c = { -sx, -sy },
2438 };
2439
2440 draw_overlays(&pass, plane->texture, plane->components,
2441 plane->component_mapping, target->overlays,
2442 target->num_overlays, target->color, target->repr,
2443 false, &tscale);
2444 }
2445
2446 return true;
2447 }
2448
pl_render_image(pl_renderer rr,const struct pl_frame * pimage,const struct pl_frame * ptarget,const struct pl_render_params * params)2449 bool pl_render_image(pl_renderer rr, const struct pl_frame *pimage,
2450 const struct pl_frame *ptarget,
2451 const struct pl_render_params *params)
2452 {
2453 params = PL_DEF(params, &pl_render_default_params);
2454 pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants);
2455 if (!pimage)
2456 return draw_empty_overlays(rr, ptarget, params);
2457
2458 struct pass_state pass = {
2459 .rr = rr,
2460 .params = params,
2461 .image = *pimage,
2462 .target = *ptarget,
2463 .info.stage = PL_RENDER_STAGE_FRAME,
2464 };
2465
2466 if (!pass_infer_state(&pass))
2467 return false;
2468
2469 pass.tmp = pl_tmp(NULL),
2470 pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool));
2471
2472 pl_dispatch_callback(rr->dp, &pass, info_callback);
2473 pl_dispatch_reset_frame(rr->dp);
2474
2475 for (int i = 0; i < params->num_hooks; i++) {
2476 if (params->hooks[i]->reset)
2477 params->hooks[i]->reset(params->hooks[i]->priv);
2478 }
2479
2480 if (!pass_read_image(&pass))
2481 goto error;
2482
2483 if (!pass_scale_main(&pass))
2484 goto error;
2485
2486 if (!pass_output_target(&pass))
2487 goto error;
2488
2489 pl_free(pass.tmp);
2490 return true;
2491
2492 error:
2493 pl_dispatch_abort(rr->dp, &pass.img.sh);
2494 pl_free(pass.tmp);
2495 PL_ERR(rr, "Failed rendering image!");
2496 return false;
2497 }
2498
render_params_hash(const struct pl_render_params * params_orig)2499 static uint64_t render_params_hash(const struct pl_render_params *params_orig)
2500 {
2501 struct pl_render_params params = *params_orig;
2502 uint64_t hash = 0;
2503
2504 #define HASH_PTR(ptr) \
2505 do { \
2506 if (ptr) { \
2507 pl_hash_merge(&hash, pl_mem_hash(ptr, sizeof(*ptr))); \
2508 ptr = NULL; \
2509 } \
2510 } while (0)
2511
2512 #define HASH_FILTER(scaler) \
2513 do { \
2514 if (scaler) { \
2515 struct pl_filter_config filter = *scaler; \
2516 HASH_PTR(filter.kernel); \
2517 HASH_PTR(filter.window); \
2518 pl_hash_merge(&hash, pl_mem_hash(&filter, sizeof(filter))); \
2519 scaler = NULL; \
2520 } \
2521 } while (0)
2522
2523 HASH_FILTER(params.upscaler);
2524 HASH_FILTER(params.downscaler);
2525 HASH_FILTER(params.frame_mixer);
2526
2527 HASH_PTR(params.deband_params);
2528 HASH_PTR(params.sigmoid_params);
2529 HASH_PTR(params.color_adjustment);
2530 HASH_PTR(params.peak_detect_params);
2531 HASH_PTR(params.color_map_params);
2532 HASH_PTR(params.dither_params);
2533 HASH_PTR(params.cone_params);
2534 HASH_PTR(params.blend_params);
2535
2536 #ifdef PL_HAVE_LCMS
2537 HASH_PTR(params.icc_params);
2538 HASH_PTR(params.lut3d_params);
2539 #endif
2540
2541 // Hash all hooks
2542 for (int i = 0; i < params.num_hooks; i++)
2543 pl_hash_merge(&hash, pl_mem_hash(¶ms.hooks[i], sizeof(params.hooks[i])));
2544 params.hooks = NULL;
2545
2546 // Hash the LUT by only looking at the signature
2547 if (params.lut) {
2548 pl_hash_merge(&hash, params.lut->signature);
2549 params.lut = NULL;
2550 }
2551
2552 pl_hash_merge(&hash, pl_mem_hash(¶ms, sizeof(params)));
2553 return hash;
2554 }
2555
2556 #define MAX_MIX_FRAMES 16
2557
pl_render_image_mix(pl_renderer rr,const struct pl_frame_mix * images,const struct pl_frame * ptarget,const struct pl_render_params * params)2558 bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images,
2559 const struct pl_frame *ptarget,
2560 const struct pl_render_params *params)
2561 {
2562 if (!images->num_frames)
2563 return pl_render_image(rr, NULL, ptarget, params);
2564
2565 params = PL_DEF(params, &pl_render_default_params);
2566 uint64_t params_hash = render_params_hash(params);
2567 pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants);
2568
2569 require(images->num_frames >= 1);
2570 for (int i = 0; i < images->num_frames - 1; i++)
2571 require(images->timestamps[i] <= images->timestamps[i+1]);
2572
2573 // As the canonical reference, find the nearest neighbour frame
2574 const struct pl_image *refimg = images->frames[0];
2575 float best = fabs(images->timestamps[0]);
2576 for (int i = 1; i < images->num_frames; i++) {
2577 float dist = fabs(images->timestamps[i]);
2578 if (dist < best) {
2579 refimg = images->frames[i];
2580 best = dist;
2581 continue;
2582 } else {
2583 break;
2584 }
2585 }
2586
2587 struct pass_state pass = {
2588 .rr = rr,
2589 .params = params,
2590 .image = *refimg,
2591 .target = *ptarget,
2592 .info.stage = PL_RENDER_STAGE_BLEND,
2593 };
2594
2595 if (!params->frame_mixer || rr->disable_mixing || !FBOFMT(4))
2596 goto fallback;
2597
2598 // Can't reasonably interpolate a single image, so just directly render it
2599 if (images->num_frames == 1)
2600 goto fallback;
2601
2602 if (!pass_infer_state(&pass))
2603 return false;
2604
2605 int out_w = abs(pl_rect_w(pass.dst_rect)),
2606 out_h = abs(pl_rect_h(pass.dst_rect));
2607
2608 int fidx = 0;
2609 struct cached_frame frames[MAX_MIX_FRAMES];
2610 float weights[MAX_MIX_FRAMES];
2611 float wsum = 0.0;
2612 pass.tmp = pl_tmp(NULL);
2613
2614 // Garbage collect the cache by evicting all frames from the cache that are
2615 // not determined to still be required
2616 for (int i = 0; i < rr->frames.num; i++)
2617 rr->frames.elem[i].evict = true;
2618
2619 // Traverse the input frames and determine/prepare the ones we need
2620 for (int i = 0; i < images->num_frames; i++) {
2621 uint64_t sig = images->signatures[i];
2622 float pts = images->timestamps[i];
2623 PL_TRACE(rr, "Considering image with signature 0x%llx, pts %f",
2624 (unsigned long long) sig, pts);
2625
2626 float weight;
2627
2628 // For backwards compatibility, treat !kernel as oversample
2629 const struct pl_filter_function *kernel = params->frame_mixer->kernel;
2630 kernel = PL_DEF(kernel, &oversample_kernel);
2631 if (kernel->weight == oversample) {
2632
2633 // Compute the visible interval [pts, end] of this frame
2634 float end = i+1 < images->num_frames ? images->timestamps[i+1] : INFINITY;
2635 if (pts > images->vsync_duration || end < 0.0) {
2636 PL_TRACE(rr, " -> Skipping: no intersection with vsync");
2637 continue;
2638 } else {
2639 pts = PL_MAX(pts, 0.0);
2640 end = PL_MIN(end, images->vsync_duration);
2641 pl_assert(end >= pts);
2642 }
2643
2644 // Weight is the fraction of vsync interval that frame is visible
2645 weight = (end - pts) / images->vsync_duration;
2646 PL_TRACE(rr, " -> Frame [%f, %f] intersects [%f, %f] = weight %f",
2647 pts, end, 0.0, images->vsync_duration, weight);
2648
2649 if (weight < kernel->params[0]) {
2650 PL_TRACE(rr, " (culling due to threshold)");
2651 weight = 0.0;
2652 }
2653
2654 } else {
2655
2656 if (fabs(pts) >= kernel->radius) {
2657 PL_TRACE(rr, " -> Skipping: outside filter radius (%f)",
2658 kernel->radius);
2659 continue;
2660 }
2661
2662 // Weight is directly sampled from the filter
2663 weight = pl_filter_sample(params->frame_mixer, pts);
2664 PL_TRACE(rr, " -> Filter offset %f = weight %f", pts, weight);
2665
2666 }
2667
2668 struct cached_frame *f = NULL;
2669 for (int j = 0; j < rr->frames.num; j++) {
2670 if (rr->frames.elem[j].signature == sig) {
2671 f = &rr->frames.elem[j];
2672 f->evict = false;
2673 break;
2674 }
2675 }
2676
2677 // Skip frames with negligible contributions. Do this after the loop
2678 // above to make sure these frames don't get evicted just yet.
2679 const float cutoff = 1e-3;
2680 if (fabs(weight) <= cutoff) {
2681 PL_TRACE(rr, " -> Skipping: weight (%f) below threshold (%f)",
2682 weight, cutoff);
2683 continue;
2684 }
2685
2686 if (!f) {
2687 // Signature does not exist in the cache at all yet,
2688 // so grow the cache by this entry.
2689 PL_ARRAY_GROW(rr, rr->frames);
2690 f = &rr->frames.elem[rr->frames.num++];
2691 *f = (struct cached_frame) {
2692 .signature = sig,
2693 .color = images->frames[i]->color,
2694 .profile = images->frames[i]->profile,
2695 };
2696 }
2697
2698 // Check to see if we can blindly reuse this cache entry. This is the
2699 // case if either the params are compatible, or the user doesn't care
2700 bool can_reuse = f->tex;
2701 if (can_reuse && !params->preserve_mixing_cache) {
2702 can_reuse = f->tex->params.w == out_w &&
2703 f->tex->params.h == out_h &&
2704 f->params_hash == params_hash;
2705 }
2706
2707 if (!can_reuse) {
2708 // If we can't reuse the entry, we need to re-render this frame
2709 PL_TRACE(rr, " -> Cached texture missing or invalid.. (re)creating");
2710 if (!f->tex) {
2711 if (PL_ARRAY_POP(rr->frame_fbos, &f->tex))
2712 pl_tex_invalidate(rr->gpu, f->tex);
2713 }
2714
2715 bool ok = pl_tex_recreate(rr->gpu, &f->tex, &(struct pl_tex_params) {
2716 .w = out_w,
2717 .h = out_h,
2718 .format = rr->fbofmt[4],
2719 .sampleable = true,
2720 .renderable = true,
2721 .storable = rr->fbofmt[4]->caps & PL_FMT_CAP_STORABLE,
2722 });
2723
2724 if (!ok) {
2725 PL_ERR(rr, "Could not create intermediate texture for "
2726 "frame mixing.. disabling!");
2727 rr->disable_mixing = true;
2728 goto fallback;
2729 }
2730
2731 struct pass_state inter_pass = {
2732 .rr = rr,
2733 .tmp = pass.tmp,
2734 .params = pass.params,
2735 .fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool)),
2736 .image = *images->frames[i],
2737 .target = pass.target,
2738 .info.stage = PL_RENDER_STAGE_FRAME,
2739 };
2740
2741 // Render a single frame up to `pass_output_target`
2742 if (!pass_infer_state(&inter_pass))
2743 goto error;
2744
2745 pl_dispatch_callback(rr->dp, &inter_pass, info_callback);
2746 pl_dispatch_reset_frame(rr->dp);
2747 for (int n = 0; n < params->num_hooks; n++) {
2748 if (params->hooks[n]->reset)
2749 params->hooks[n]->reset(params->hooks[n]->priv);
2750 }
2751
2752 if (!pass_read_image(&inter_pass))
2753 goto error;
2754 if (!pass_scale_main(&inter_pass))
2755 goto error;
2756
2757 pl_assert(inter_pass.img.w == out_w &&
2758 inter_pass.img.h == out_h);
2759
2760 ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
2761 .shader = &inter_pass.img.sh,
2762 .target = f->tex,
2763 });
2764 if (!ok)
2765 goto error;
2766
2767 f->params_hash = params_hash;
2768 f->color = inter_pass.img.color;
2769 }
2770
2771 pl_assert(fidx < MAX_MIX_FRAMES);
2772 frames[fidx] = *f;
2773 weights[fidx] = weight;
2774 wsum += weight;
2775 fidx++;
2776 }
2777
2778 // Evict the frames we *don't* need
2779 for (int i = 0; i < rr->frames.num; ) {
2780 if (rr->frames.elem[i].evict) {
2781 PL_TRACE(rr, "Evicting frame with signature %llx from cache",
2782 (unsigned long long) rr->frames.elem[i].signature);
2783 PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex);
2784 PL_ARRAY_REMOVE_AT(rr->frames, i);
2785 continue;
2786 } else {
2787 i++;
2788 }
2789 }
2790
2791 // Sample and mix the output color
2792 pl_dispatch_callback(rr->dp, &pass, info_callback);
2793 pl_dispatch_reset_frame(rr->dp);
2794 pass.info.index = fidx;
2795
2796 pl_shader sh = pl_dispatch_begin(rr->dp);
2797 sh_describe(sh, "frame mixing");
2798 sh->res.output = PL_SHADER_SIG_COLOR;
2799 sh->output_w = out_w;
2800 sh->output_h = out_h;
2801
2802 // The color space to mix the frames in. Arbitrarily use the newest frame's
2803 // color, since this is unlikely to change very often mid-playback.
2804 pl_assert(fidx > 0);
2805 const struct pl_color_space mix_color = frames[fidx - 1].color;
2806
2807 GLSL("vec4 color; \n"
2808 "// pl_render_image_mix \n"
2809 "{ \n"
2810 "vec4 mix_color = vec4(0.0); \n");
2811
2812 for (int i = 0; i < fidx; i++) {
2813 const struct pl_tex_params *tpars = &frames[i].tex->params;
2814
2815 // Use linear sampling if desired and possible
2816 enum pl_tex_sample_mode sample_mode = PL_TEX_SAMPLE_NEAREST;
2817 if ((tpars->w != out_w || tpars->h != out_h) &&
2818 (tpars->format->caps & PL_FMT_CAP_LINEAR))
2819 {
2820 sample_mode = PL_TEX_SAMPLE_LINEAR;
2821 }
2822
2823 ident_t pos, tex = sh_bind(sh, frames[i].tex, PL_TEX_ADDRESS_CLAMP,
2824 sample_mode, "frame", NULL, &pos, NULL, NULL);
2825
2826 GLSL("color = %s(%s, %s); \n", sh_tex_fn(sh, *tpars), tex, pos);
2827
2828 // Note: This ignores differences in ICC profile, which we decide to
2829 // just simply not care about. Doing that properly would require
2830 // converting between different image profiles, and the headache of
2831 // finagling that state is just not worth it because this is an
2832 // exceptionally unlikely hypothetical.
2833 pl_shader_color_map(sh, NULL, frames[i].color, mix_color, NULL, false);
2834
2835 ident_t weight = sh_var(sh, (struct pl_shader_var) {
2836 .var = pl_var_float("weight"),
2837 .data = &(float){ weights[i] / wsum },
2838 .dynamic = true,
2839 });
2840
2841 GLSL("mix_color += %s * color; \n", weight);
2842 }
2843
2844 GLSL("color = mix_color; \n"
2845 "} \n");
2846
2847 // Dispatch this to the destination
2848 pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool));
2849 pass.img = (struct img) {
2850 .sh = sh,
2851 .w = out_w,
2852 .h = out_h,
2853 .comps = 4,
2854 .color = mix_color,
2855 .repr = {
2856 .sys = PL_COLOR_SYSTEM_RGB,
2857 .levels = PL_COLOR_LEVELS_PC,
2858 .alpha = PL_ALPHA_PREMULTIPLIED,
2859 },
2860 };
2861
2862 for (int i = 0; i < params->num_hooks; i++) {
2863 if (params->hooks[i]->reset)
2864 params->hooks[i]->reset(params->hooks[i]->priv);
2865 }
2866
2867 if (!pass_output_target(&pass))
2868 goto fallback;
2869
2870 pl_free(pass.tmp);
2871 return true;
2872
2873 error:
2874 PL_ERR(rr, "Could not render image for frame mixing.. disabling!");
2875 rr->disable_mixing = true;
2876 // fall through
2877
2878 fallback:
2879 pl_free(pass.tmp);
2880 return pl_render_image(rr, refimg, ptarget, params);
2881
2882
2883 }
2884
pl_frame_set_chroma_location(struct pl_frame * frame,enum pl_chroma_location chroma_loc)2885 void pl_frame_set_chroma_location(struct pl_frame *frame,
2886 enum pl_chroma_location chroma_loc)
2887 {
2888 pl_tex ref = frame_ref(frame);
2889
2890 if (ref) {
2891 // Texture dimensions are already known, so apply the chroma location
2892 // only to subsampled planes
2893 int ref_w = ref->params.w, ref_h = ref->params.h;
2894
2895 for (int i = 0; i < frame->num_planes; i++) {
2896 struct pl_plane *plane = &frame->planes[i];
2897 pl_tex tex = plane->texture;
2898 bool subsampled = tex->params.w < ref_w || tex->params.h < ref_h;
2899 if (subsampled)
2900 pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y);
2901 }
2902 } else {
2903 // Texture dimensions are not yet known, so apply the chroma location
2904 // to all chroma planes, regardless of subsampling
2905 for (int i = 0; i < frame->num_planes; i++) {
2906 struct pl_plane *plane = &frame->planes[i];
2907 if (detect_plane_type(plane, &frame->repr) == PLANE_CHROMA)
2908 pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y);
2909 }
2910 }
2911 }
2912
pl_frame_from_swapchain(struct pl_frame * out_frame,const struct pl_swapchain_frame * frame)2913 void pl_frame_from_swapchain(struct pl_frame *out_frame,
2914 const struct pl_swapchain_frame *frame)
2915 {
2916 pl_tex fbo = frame->fbo;
2917 *out_frame = (struct pl_frame) {
2918 .num_planes = 1,
2919 .planes = {{
2920 .texture = fbo,
2921 .components = fbo->params.format->num_components,
2922 .component_mapping = {0, 1, 2, 3},
2923 }},
2924 .crop = { 0, 0, fbo->params.w, fbo->params.h },
2925 .repr = frame->color_repr,
2926 .color = frame->color_space,
2927 };
2928
2929 if (frame->flipped)
2930 PL_SWAP(out_frame->crop.y0, out_frame->crop.y1);
2931 }
2932
pl_frame_is_cropped(const struct pl_frame * frame)2933 bool pl_frame_is_cropped(const struct pl_frame *frame)
2934 {
2935 int x0 = roundf(PL_MIN(frame->crop.x0, frame->crop.x1)),
2936 y0 = roundf(PL_MIN(frame->crop.y0, frame->crop.y1)),
2937 x1 = roundf(PL_MAX(frame->crop.x0, frame->crop.x1)),
2938 y1 = roundf(PL_MAX(frame->crop.y0, frame->crop.y1));
2939
2940 pl_tex ref = frame_ref(frame);
2941 pl_assert(ref);
2942
2943 if (!x0 && !x1)
2944 x1 = ref->params.w;
2945 if (!y0 && !y1)
2946 y1 = ref->params.h;
2947
2948 return x0 > 0 || y0 > 0 || x1 < ref->params.w || y1 < ref->params.h;
2949 }
2950
pl_frame_clear_rgba(pl_gpu gpu,const struct pl_frame * frame,const float rgba[4])2951 void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame,
2952 const float rgba[4])
2953 {
2954 struct pl_color_repr repr = frame->repr;
2955 struct pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL);
2956 pl_transform3x3_invert(&tr);
2957
2958 float encoded[3] = { rgba[0], rgba[1], rgba[2] };
2959 pl_transform3x3_apply(&tr, encoded);
2960
2961 float mult = frame->repr.alpha == PL_ALPHA_INDEPENDENT ? 1.0 : rgba[3];
2962 for (int p = 0; p < frame->num_planes; p++) {
2963 const struct pl_plane *plane = &frame->planes[p];
2964 float clear[4] = { 0.0, 0.0, 0.0, rgba[3] };
2965 for (int c = 0; c < plane->components; c++) {
2966 if (plane->component_mapping[c] >= 0)
2967 clear[c] = mult * encoded[plane->component_mapping[c]];
2968 }
2969
2970 pl_tex_clear(gpu, plane->texture, clear);
2971 }
2972 }
2973