1 /*
2 * This file is part of mpv.
3 *
4 * mpv is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * mpv is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <assert.h>
19 #include <float.h>
20 #include <math.h>
21 #include <stdarg.h>
22 #include <stdbool.h>
23 #include <string.h>
24
25 #include <libavutil/common.h>
26 #include <libavutil/lfg.h>
27
28 #include "video.h"
29
30 #include "misc/bstr.h"
31 #include "options/m_config.h"
32 #include "options/path.h"
33 #include "common/global.h"
34 #include "options/options.h"
35 #include "utils.h"
36 #include "hwdec.h"
37 #include "osd.h"
38 #include "ra.h"
39 #include "stream/stream.h"
40 #include "video_shaders.h"
41 #include "user_shaders.h"
42 #include "error_diffusion.h"
43 #include "video/out/filter_kernels.h"
44 #include "video/out/aspect.h"
45 #include "video/out/dither.h"
46 #include "video/out/vo.h"
47
48 // scale/cscale arguments that map directly to shader filter routines.
49 // Note that the convolution filters are not included in this list.
50 static const char *const fixed_scale_filters[] = {
51 "bilinear",
52 "bicubic_fast",
53 "oversample",
54 NULL
55 };
56 static const char *const fixed_tscale_filters[] = {
57 "oversample",
58 "linear",
59 NULL
60 };
61
62 // must be sorted, and terminated with 0
63 int filter_sizes[] =
64 {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
65 int tscale_sizes[] = {2, 4, 6, 8, 0};
66
67 struct vertex_pt {
68 float x, y;
69 };
70
71 struct texplane {
72 struct ra_tex *tex;
73 int w, h;
74 bool flipped;
75 };
76
77 struct video_image {
78 struct texplane planes[4];
79 struct mp_image *mpi; // original input image
80 uint64_t id; // unique ID identifying mpi contents
81 bool hwdec_mapped;
82 };
83
84 enum plane_type {
85 PLANE_NONE = 0,
86 PLANE_RGB,
87 PLANE_LUMA,
88 PLANE_CHROMA,
89 PLANE_ALPHA,
90 PLANE_XYZ,
91 };
92
93 static const char *plane_names[] = {
94 [PLANE_NONE] = "unknown",
95 [PLANE_RGB] = "rgb",
96 [PLANE_LUMA] = "luma",
97 [PLANE_CHROMA] = "chroma",
98 [PLANE_ALPHA] = "alpha",
99 [PLANE_XYZ] = "xyz",
100 };
101
102 // A self-contained description of a source image which can be bound to a
103 // texture unit and sampled from. Contains metadata about how it's to be used
104 struct image {
105 enum plane_type type; // must be set to something non-zero
106 int components; // number of relevant coordinates
107 float multiplier; // multiplier to be used when sampling
108 struct ra_tex *tex;
109 int w, h; // logical size (after transformation)
110 struct gl_transform transform; // rendering transformation
111 int padding; // number of leading padding components (e.g. 2 = rg is padding)
112 };
113
114 // A named image, for user scripting purposes
115 struct saved_img {
116 const char *name;
117 struct image img;
118 };
119
120 // A texture hook. This is some operation that transforms a named texture as
121 // soon as it's generated
122 struct tex_hook {
123 const char *save_tex;
124 const char *hook_tex[SHADER_MAX_HOOKS];
125 const char *bind_tex[SHADER_MAX_BINDS];
126 int components; // how many components are relevant (0 = same as input)
127 bool align_offset; // whether to align hooked tex with reference.
128 void *priv; // this gets talloc_freed when the tex_hook is removed
129 void (*hook)(struct gl_video *p, struct image img, // generates GLSL
130 struct gl_transform *trans, void *priv);
131 bool (*cond)(struct gl_video *p, struct image img, void *priv);
132 };
133
134 struct surface {
135 struct ra_tex *tex;
136 uint64_t id;
137 double pts;
138 };
139
140 #define SURFACES_MAX 10
141
142 struct cached_file {
143 char *path;
144 struct bstr body;
145 };
146
147 struct pass_info {
148 struct bstr desc;
149 struct mp_pass_perf perf;
150 };
151
152 struct dr_buffer {
153 struct ra_buf *buf;
154 // The mpi reference will keep the data from being recycled (or from other
155 // references gaining write access) while the GPU is accessing the buffer.
156 struct mp_image *mpi;
157 };
158
159 struct gl_video {
160 struct ra *ra;
161
162 struct mpv_global *global;
163 struct mp_log *log;
164 struct gl_video_opts opts;
165 struct m_config_cache *opts_cache;
166 struct gl_lcms *cms;
167
168 int fb_depth; // actual bits available in GL main framebuffer
169 struct m_color clear_color;
170 bool force_clear_color;
171
172 struct gl_shader_cache *sc;
173
174 struct osd_state *osd_state;
175 struct mpgl_osd *osd;
176 double osd_pts;
177
178 struct ra_tex *lut_3d_texture;
179 bool use_lut_3d;
180 int lut_3d_size[3];
181
182 struct ra_tex *dither_texture;
183
184 struct mp_image_params real_image_params; // configured format
185 struct mp_image_params image_params; // texture format (mind hwdec case)
186 struct ra_imgfmt_desc ra_format; // texture format
187 int plane_count;
188
189 bool is_gray;
190 bool has_alpha;
191 char color_swizzle[5];
192 bool use_integer_conversion;
193
194 struct video_image image;
195
196 struct dr_buffer *dr_buffers;
197 int num_dr_buffers;
198
199 bool using_dr_path;
200
201 bool dumb_mode;
202 bool forced_dumb_mode;
203
204 // Cached vertex array, to avoid re-allocation per frame. For simplicity,
205 // our vertex format is simply a list of `vertex_pt`s, since this greatly
206 // simplifies offset calculation at the cost of (unneeded) flexibility.
207 struct vertex_pt *tmp_vertex;
208 struct ra_renderpass_input *vao;
209 int vao_len;
210
211 const struct ra_format *fbo_format;
212 struct ra_tex *merge_tex[4];
213 struct ra_tex *scale_tex[4];
214 struct ra_tex *integer_tex[4];
215 struct ra_tex *indirect_tex;
216 struct ra_tex *blend_subs_tex;
217 struct ra_tex *error_diffusion_tex[2];
218 struct ra_tex *screen_tex;
219 struct ra_tex *output_tex;
220 struct ra_tex **hook_textures;
221 int num_hook_textures;
222 int idx_hook_textures;
223
224 struct ra_buf *hdr_peak_ssbo;
225 struct surface surfaces[SURFACES_MAX];
226
227 // user pass descriptions and textures
228 struct tex_hook *tex_hooks;
229 int num_tex_hooks;
230 struct gl_user_shader_tex *user_textures;
231 int num_user_textures;
232
233 int surface_idx;
234 int surface_now;
235 int frames_drawn;
236 bool is_interpolated;
237 bool output_tex_valid;
238
239 // state for configured scalers
240 struct scaler scaler[SCALER_COUNT];
241
242 struct mp_csp_equalizer_state *video_eq;
243
244 struct mp_rect src_rect; // displayed part of the source video
245 struct mp_rect dst_rect; // video rectangle on output window
246 struct mp_osd_res osd_rect; // OSD size/margins
247
248 // temporary during rendering
249 struct compute_info pass_compute; // compute shader metadata for this pass
250 struct image *pass_imgs; // bound images for this pass
251 int num_pass_imgs;
252 struct saved_img *saved_imgs; // saved (named) images for this frame
253 int num_saved_imgs;
254
255 // effective current texture metadata - this will essentially affect the
256 // next render pass target, as well as implicitly tracking what needs to
257 // be done with the image
258 int texture_w, texture_h;
259 struct gl_transform texture_offset; // texture transform without rotation
260 int components;
261 bool use_linear;
262 float user_gamma;
263
264 // pass info / metrics
265 struct pass_info pass_fresh[VO_PASS_PERF_MAX];
266 struct pass_info pass_redraw[VO_PASS_PERF_MAX];
267 struct pass_info *pass;
268 int pass_idx;
269 struct timer_pool *upload_timer;
270 struct timer_pool *blit_timer;
271 struct timer_pool *osd_timer;
272
273 int frames_uploaded;
274 int frames_rendered;
275 AVLFG lfg;
276
277 // Cached because computing it can take relatively long
278 int last_dither_matrix_size;
279 float *last_dither_matrix;
280
281 struct cached_file *files;
282 int num_files;
283
284 bool hwdec_interop_loading_done;
285 struct ra_hwdec **hwdecs;
286 int num_hwdecs;
287
288 struct ra_hwdec_mapper *hwdec_mapper;
289 struct ra_hwdec *hwdec_overlay;
290 bool hwdec_active;
291
292 bool dsi_warned;
293 bool broken_frame; // temporary error state
294
295 bool colorspace_override_warned;
296 bool correct_downscaling_warned;
297 };
298
299 static const struct gl_video_opts gl_video_opts_def = {
300 .dither_algo = DITHER_FRUIT,
301 .dither_depth = -1,
302 .dither_size = 6,
303 .temporal_dither_period = 1,
304 .error_diffusion = "sierra-lite",
305 .fbo_format = "auto",
306 .sigmoid_center = 0.75,
307 .sigmoid_slope = 6.5,
308 .scaler = {
309 {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
310 .cutoff = 0.001}, // scale
311 {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}},
312 .cutoff = 0.001}, // dscale
313 {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
314 .cutoff = 0.001}, // cscale
315 {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
316 .clamp = 1, }, // tscale
317 },
318 .scaler_resizes_only = 1,
319 .scaler_lut_size = 6,
320 .interpolation_threshold = 0.01,
321 .alpha_mode = ALPHA_BLEND_TILES,
322 .background = {0, 0, 0, 255},
323 .gamma = 1.0f,
324 .tone_map = {
325 .curve = TONE_MAPPING_BT_2390,
326 .curve_param = NAN,
327 .max_boost = 1.0,
328 .decay_rate = 100.0,
329 .scene_threshold_low = 5.5,
330 .scene_threshold_high = 10.0,
331 .desat = 0.75,
332 .desat_exp = 1.5,
333 .gamut_clipping = 1,
334 },
335 .early_flush = -1,
336 .hwdec_interop = "auto",
337 };
338
339 static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
340 struct bstr name, const char **value);
341
342 static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
343 struct bstr name, const char **value);
344
345 static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt,
346 struct bstr name, const char **value);
347
348 #define OPT_BASE_STRUCT struct gl_video_opts
349
350 // Use for options which use NAN for defaults.
351 #define OPT_FLOATDEF(field) \
352 OPT_FLOAT(field), \
353 .flags = M_OPT_DEFAULT_NAN
354
355 #define SCALER_OPTS(n, i) \
356 {n, OPT_STRING_VALIDATE(scaler[i].kernel.name, validate_scaler_opt)}, \
357 {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])}, \
358 {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])}, \
359 {n"-blur", OPT_FLOAT(scaler[i].kernel.blur)}, \
360 {n"-cutoff", OPT_FLOAT(scaler[i].cutoff), M_RANGE(0.0, 1.0)}, \
361 {n"-taper", OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)}, \
362 {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])}, \
363 {n"-wblur", OPT_FLOAT(scaler[i].window.blur)}, \
364 {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)}, \
365 {n"-clamp", OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)}, \
366 {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)}, \
367 {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)}, \
368 {n"-window", OPT_STRING_VALIDATE(scaler[i].window.name, validate_window_opt)}
369
370 const struct m_sub_options gl_video_conf = {
371 .opts = (const m_option_t[]) {
372 {"gpu-dumb-mode", OPT_CHOICE(dumb_mode,
373 {"auto", 0}, {"yes", 1}, {"no", -1})},
374 {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0)},
375 {"gamma-auto", OPT_FLAG(gamma_auto)},
376 {"target-prim", OPT_CHOICE_C(target_prim, mp_csp_prim_names)},
377 {"target-trc", OPT_CHOICE_C(target_trc, mp_csp_trc_names)},
378 {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}),
379 M_RANGE(10, 10000)},
380 {"tone-mapping", OPT_CHOICE(tone_map.curve,
381 {"clip", TONE_MAPPING_CLIP},
382 {"mobius", TONE_MAPPING_MOBIUS},
383 {"reinhard", TONE_MAPPING_REINHARD},
384 {"hable", TONE_MAPPING_HABLE},
385 {"gamma", TONE_MAPPING_GAMMA},
386 {"linear", TONE_MAPPING_LINEAR},
387 {"bt.2390", TONE_MAPPING_BT_2390})},
388 {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak,
389 {"auto", 0},
390 {"yes", 1},
391 {"no", -1})},
392 {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate),
393 M_RANGE(1.0, 1000.0)},
394 {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low),
395 M_RANGE(0, 20.0)},
396 {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high),
397 M_RANGE(0, 20.0)},
398 {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)},
399 {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost),
400 M_RANGE(1.0, 10.0)},
401 {"tone-mapping-desaturate", OPT_FLOAT(tone_map.desat)},
402 {"tone-mapping-desaturate-exponent", OPT_FLOAT(tone_map.desat_exp),
403 M_RANGE(0.0, 20.0)},
404 {"gamut-warning", OPT_FLAG(tone_map.gamut_warning)},
405 {"gamut-clipping", OPT_FLAG(tone_map.gamut_clipping)},
406 {"opengl-pbo", OPT_FLAG(pbo)},
407 SCALER_OPTS("scale", SCALER_SCALE),
408 SCALER_OPTS("dscale", SCALER_DSCALE),
409 SCALER_OPTS("cscale", SCALER_CSCALE),
410 SCALER_OPTS("tscale", SCALER_TSCALE),
411 {"scaler-lut-size", OPT_INT(scaler_lut_size), M_RANGE(4, 10)},
412 {"scaler-resizes-only", OPT_FLAG(scaler_resizes_only)},
413 {"correct-downscaling", OPT_FLAG(correct_downscaling)},
414 {"linear-downscaling", OPT_FLAG(linear_downscaling)},
415 {"linear-upscaling", OPT_FLAG(linear_upscaling)},
416 {"sigmoid-upscaling", OPT_FLAG(sigmoid_upscaling)},
417 {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)},
418 {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)},
419 {"fbo-format", OPT_STRING(fbo_format)},
420 {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}),
421 M_RANGE(-1, 16)},
422 {"dither", OPT_CHOICE(dither_algo,
423 {"fruit", DITHER_FRUIT},
424 {"ordered", DITHER_ORDERED},
425 {"error-diffusion", DITHER_ERROR_DIFFUSION},
426 {"no", DITHER_NONE})},
427 {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)},
428 {"temporal-dither", OPT_FLAG(temporal_dither)},
429 {"temporal-dither-period", OPT_INT(temporal_dither_period),
430 M_RANGE(1, 128)},
431 {"error-diffusion",
432 OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)},
433 {"alpha", OPT_CHOICE(alpha_mode,
434 {"no", ALPHA_NO},
435 {"yes", ALPHA_YES},
436 {"blend", ALPHA_BLEND},
437 {"blend-tiles", ALPHA_BLEND_TILES})},
438 {"opengl-rectangle-textures", OPT_FLAG(use_rectangle)},
439 {"background", OPT_COLOR(background)},
440 {"interpolation", OPT_FLAG(interpolation)},
441 {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)},
442 {"blend-subtitles", OPT_CHOICE(blend_subs,
443 {"no", BLEND_SUBS_NO},
444 {"yes", BLEND_SUBS_YES},
445 {"video", BLEND_SUBS_VIDEO})},
446 {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE},
447 {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")},
448 {"deband", OPT_FLAG(deband)},
449 {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)},
450 {"sharpen", OPT_FLOAT(unsharp)},
451 {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)},
452 {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)},
453 {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)},
454 {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE},
455 {"gpu-hwdec-interop",
456 OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)},
457 {"opengl-hwdec-interop", OPT_REPLACED("gpu-hwdec-interop")},
458 {"hwdec-preload", OPT_REPLACED("opengl-hwdec-interop")},
459 {"hdr-tone-mapping", OPT_REPLACED("tone-mapping")},
460 {"opengl-shaders", OPT_REPLACED("glsl-shaders")},
461 {"opengl-shader", OPT_REPLACED("glsl-shader")},
462 {"opengl-shader-cache-dir", OPT_REPLACED("gpu-shader-cache-dir")},
463 {"opengl-tex-pad-x", OPT_REPLACED("gpu-tex-pad-x")},
464 {"opengl-tex-pad-y", OPT_REPLACED("gpu-tex-pad-y")},
465 {"opengl-fbo-format", OPT_REPLACED("fbo-format")},
466 {"opengl-dumb-mode", OPT_REPLACED("gpu-dumb-mode")},
467 {"opengl-gamma", OPT_REPLACED("gamma-factor")},
468 {"linear-scaling", OPT_REMOVED("Split into --linear-upscaling and "
469 "--linear-downscaling")},
470 {0}
471 },
472 .size = sizeof(struct gl_video_opts),
473 .defaults = &gl_video_opts_def,
474 };
475
476 static void uninit_rendering(struct gl_video *p);
477 static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
478 static void check_gl_features(struct gl_video *p);
479 static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id);
480 static const char *handle_scaler_opt(const char *name, bool tscale);
481 static void reinit_from_options(struct gl_video *p);
482 static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
483 static void gl_video_setup_hooks(struct gl_video *p);
484 static void gl_video_update_options(struct gl_video *p);
485
486 #define GLSL(x) gl_sc_add(p->sc, #x "\n");
487 #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
488 #define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
489 #define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
490
load_cached_file(struct gl_video * p,const char * path)491 static struct bstr load_cached_file(struct gl_video *p, const char *path)
492 {
493 if (!path || !path[0])
494 return (struct bstr){0};
495 for (int n = 0; n < p->num_files; n++) {
496 if (strcmp(p->files[n].path, path) == 0)
497 return p->files[n].body;
498 }
499 // not found -> load it
500 char *fname = mp_get_user_path(NULL, p->global, path);
501 struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB
502 talloc_free(fname);
503 if (s.len) {
504 struct cached_file new = {
505 .path = talloc_strdup(p, path),
506 .body = s,
507 };
508 MP_TARRAY_APPEND(p, p->files, p->num_files, new);
509 return new.body;
510 }
511 return (struct bstr){0};
512 }
513
debug_check_gl(struct gl_video * p,const char * msg)514 static void debug_check_gl(struct gl_video *p, const char *msg)
515 {
516 if (p->ra->fns->debug_marker)
517 p->ra->fns->debug_marker(p->ra, msg);
518 }
519
gl_video_reset_surfaces(struct gl_video * p)520 static void gl_video_reset_surfaces(struct gl_video *p)
521 {
522 for (int i = 0; i < SURFACES_MAX; i++) {
523 p->surfaces[i].id = 0;
524 p->surfaces[i].pts = MP_NOPTS_VALUE;
525 }
526 p->surface_idx = 0;
527 p->surface_now = 0;
528 p->frames_drawn = 0;
529 p->output_tex_valid = false;
530 }
531
gl_video_reset_hooks(struct gl_video * p)532 static void gl_video_reset_hooks(struct gl_video *p)
533 {
534 for (int i = 0; i < p->num_tex_hooks; i++)
535 talloc_free(p->tex_hooks[i].priv);
536
537 for (int i = 0; i < p->num_user_textures; i++)
538 ra_tex_free(p->ra, &p->user_textures[i].tex);
539
540 p->num_tex_hooks = 0;
541 p->num_user_textures = 0;
542 }
543
surface_wrap(int id)544 static inline int surface_wrap(int id)
545 {
546 id = id % SURFACES_MAX;
547 return id < 0 ? id + SURFACES_MAX : id;
548 }
549
reinit_osd(struct gl_video * p)550 static void reinit_osd(struct gl_video *p)
551 {
552 mpgl_osd_destroy(p->osd);
553 p->osd = NULL;
554 if (p->osd_state)
555 p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state);
556 }
557
uninit_rendering(struct gl_video * p)558 static void uninit_rendering(struct gl_video *p)
559 {
560 for (int n = 0; n < SCALER_COUNT; n++)
561 uninit_scaler(p, &p->scaler[n]);
562
563 ra_tex_free(p->ra, &p->dither_texture);
564
565 for (int n = 0; n < 4; n++) {
566 ra_tex_free(p->ra, &p->merge_tex[n]);
567 ra_tex_free(p->ra, &p->scale_tex[n]);
568 ra_tex_free(p->ra, &p->integer_tex[n]);
569 }
570
571 ra_tex_free(p->ra, &p->indirect_tex);
572 ra_tex_free(p->ra, &p->blend_subs_tex);
573 ra_tex_free(p->ra, &p->screen_tex);
574 ra_tex_free(p->ra, &p->output_tex);
575
576 for (int n = 0; n < 2; n++)
577 ra_tex_free(p->ra, &p->error_diffusion_tex[n]);
578
579 for (int n = 0; n < SURFACES_MAX; n++)
580 ra_tex_free(p->ra, &p->surfaces[n].tex);
581
582 for (int n = 0; n < p->num_hook_textures; n++)
583 ra_tex_free(p->ra, &p->hook_textures[n]);
584
585 gl_video_reset_surfaces(p);
586 gl_video_reset_hooks(p);
587
588 gl_sc_reset_error(p->sc);
589 }
590
gl_video_gamma_auto_enabled(struct gl_video * p)591 bool gl_video_gamma_auto_enabled(struct gl_video *p)
592 {
593 return p->opts.gamma_auto;
594 }
595
gl_video_get_output_colorspace(struct gl_video * p)596 struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p)
597 {
598 return (struct mp_colorspace) {
599 .primaries = p->opts.target_prim,
600 .gamma = p->opts.target_trc,
601 .sig_peak = p->opts.target_peak / MP_REF_WHITE,
602 };
603 }
604
605 // Warning: profile.start must point to a ta allocation, and the function
606 // takes over ownership.
gl_video_set_icc_profile(struct gl_video * p,bstr icc_data)607 void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
608 {
609 if (gl_lcms_set_memory_profile(p->cms, icc_data))
610 reinit_from_options(p);
611 }
612
gl_video_icc_auto_enabled(struct gl_video * p)613 bool gl_video_icc_auto_enabled(struct gl_video *p)
614 {
615 return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
616 }
617
gl_video_get_lut3d(struct gl_video * p,enum mp_csp_prim prim,enum mp_csp_trc trc)618 static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
619 enum mp_csp_trc trc)
620 {
621 if (!p->use_lut_3d)
622 return false;
623
624 struct AVBufferRef *icc = NULL;
625 if (p->image.mpi)
626 icc = p->image.mpi->icc_profile;
627
628 if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc))
629 return true;
630
631 // GLES3 doesn't provide filtered 16 bit integer textures
632 // GLES2 doesn't even provide 3D textures
633 const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
634 if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) {
635 p->use_lut_3d = false;
636 MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n");
637 return false;
638 }
639
640 struct lut3d *lut3d = NULL;
641 if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) {
642 p->use_lut_3d = false;
643 return false;
644 }
645
646 ra_tex_free(p->ra, &p->lut_3d_texture);
647
648 struct ra_tex_params params = {
649 .dimensions = 3,
650 .w = lut3d->size[0],
651 .h = lut3d->size[1],
652 .d = lut3d->size[2],
653 .format = fmt,
654 .render_src = true,
655 .src_linear = true,
656 .initial_data = lut3d->data,
657 };
658 p->lut_3d_texture = ra_tex_create(p->ra, ¶ms);
659
660 debug_check_gl(p, "after 3d lut creation");
661
662 for (int i = 0; i < 3; i++)
663 p->lut_3d_size[i] = lut3d->size[i];
664
665 talloc_free(lut3d);
666
667 if (!p->lut_3d_texture) {
668 p->use_lut_3d = false;
669 return false;
670 }
671
672 return true;
673 }
674
675 // Fill an image struct from a ra_tex + some metadata
image_wrap(struct ra_tex * tex,enum plane_type type,int components)676 static struct image image_wrap(struct ra_tex *tex, enum plane_type type,
677 int components)
678 {
679 assert(type != PLANE_NONE);
680 return (struct image){
681 .type = type,
682 .tex = tex,
683 .multiplier = 1.0,
684 .w = tex ? tex->params.w : 1,
685 .h = tex ? tex->params.h : 1,
686 .transform = identity_trans,
687 .components = components,
688 };
689 }
690
691 // Bind an image to a free texture unit and return its ID.
pass_bind(struct gl_video * p,struct image img)692 static int pass_bind(struct gl_video *p, struct image img)
693 {
694 int idx = p->num_pass_imgs;
695 MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img);
696 return idx;
697 }
698
699 // Rotation by 90° and flipping.
700 // w/h is used for recentering.
get_transform(float w,float h,int rotate,bool flip,struct gl_transform * out_tr)701 static void get_transform(float w, float h, int rotate, bool flip,
702 struct gl_transform *out_tr)
703 {
704 int a = rotate % 90 ? 0 : rotate / 90;
705 int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc.
706 int cos90[4] = {1, 0, -1, 0};
707 struct gl_transform tr = {{{ cos90[a], sin90[a]},
708 {-sin90[a], cos90[a]}}};
709
710 // basically, recenter to keep the whole image in view
711 float b[2] = {1, 1};
712 gl_transform_vec(tr, &b[0], &b[1]);
713 tr.t[0] += b[0] < 0 ? w : 0;
714 tr.t[1] += b[1] < 0 ? h : 0;
715
716 if (flip) {
717 struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}};
718 gl_transform_trans(fliptr, &tr);
719 }
720
721 *out_tr = tr;
722 }
723
724 // Return the chroma plane upscaled to luma size, but with additional padding
725 // for image sizes not aligned to subsampling.
chroma_upsize(int size,int pixel)726 static int chroma_upsize(int size, int pixel)
727 {
728 return (size + pixel - 1) / pixel * pixel;
729 }
730
731 // If a and b are on the same plane, return what plane type should be used.
732 // If a or b are none, the other type always wins.
733 // Usually: LUMA/RGB/XYZ > CHROMA > ALPHA
merge_plane_types(enum plane_type a,enum plane_type b)734 static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b)
735 {
736 if (a == PLANE_NONE)
737 return b;
738 if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ)
739 return b;
740 if (b != PLANE_NONE && a == PLANE_ALPHA)
741 return b;
742 return a;
743 }
744
745 // Places a video_image's image textures + associated metadata into img[]. The
746 // number of textures is equal to p->plane_count. Any necessary plane offsets
747 // are stored in off. (e.g. chroma position)
pass_get_images(struct gl_video * p,struct video_image * vimg,struct image img[4],struct gl_transform off[4])748 static void pass_get_images(struct gl_video *p, struct video_image *vimg,
749 struct image img[4], struct gl_transform off[4])
750 {
751 assert(vimg->mpi);
752
753 int w = p->image_params.w;
754 int h = p->image_params.h;
755
756 // Determine the chroma offset
757 float ls_w = 1.0 / p->ra_format.chroma_w;
758 float ls_h = 1.0 / p->ra_format.chroma_h;
759
760 struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
761
762 if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
763 int cx, cy;
764 mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
765 // By default texture coordinates are such that chroma is centered with
766 // any chroma subsampling. If a specific direction is given, make it
767 // so that the luma and chroma sample line up exactly.
768 // For 4:4:4, setting chroma location should have no effect at all.
769 // luma sample size (in chroma coord. space)
770 chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
771 chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
772 }
773
774 memset(img, 0, 4 * sizeof(img[0]));
775 for (int n = 0; n < p->plane_count; n++) {
776 struct texplane *t = &vimg->planes[n];
777
778 enum plane_type type = PLANE_NONE;
779 int padding = 0;
780 for (int i = 0; i < 4; i++) {
781 int c = p->ra_format.components[n][i];
782 enum plane_type ctype;
783 if (c == 0) {
784 ctype = PLANE_NONE;
785 } else if (c == 4) {
786 ctype = PLANE_ALPHA;
787 } else if (p->image_params.color.space == MP_CSP_RGB) {
788 ctype = PLANE_RGB;
789 } else if (p->image_params.color.space == MP_CSP_XYZ) {
790 ctype = PLANE_XYZ;
791 } else {
792 ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA;
793 }
794 type = merge_plane_types(type, ctype);
795 if (!c && padding == i)
796 padding = i + 1;
797 }
798
799 int msb_valid_bits =
800 p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0);
801 int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space;
802 float tex_mul =
803 1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits);
804 if (p->ra_format.component_type == RA_CTYPE_FLOAT)
805 tex_mul = 1.0;
806
807 img[n] = (struct image){
808 .type = type,
809 .tex = t->tex,
810 .multiplier = tex_mul,
811 .w = t->w,
812 .h = t->h,
813 .padding = padding,
814 };
815
816 for (int i = 0; i < 4; i++)
817 img[n].components += !!p->ra_format.components[n][i];
818
819 get_transform(t->w, t->h, p->image_params.rotate, t->flipped,
820 &img[n].transform);
821 if (p->image_params.rotate % 180 == 90)
822 MPSWAP(int, img[n].w, img[n].h);
823
824 off[n] = identity_trans;
825
826 if (type == PLANE_CHROMA) {
827 struct gl_transform rot;
828 get_transform(0, 0, p->image_params.rotate, true, &rot);
829
830 struct gl_transform tr = chroma;
831 gl_transform_vec(rot, &tr.t[0], &tr.t[1]);
832
833 float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w;
834 float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h;
835
836 // Adjust the chroma offset if the real chroma size is fractional
837 // due image sizes not aligned to chroma subsampling.
838 struct gl_transform rot2;
839 get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2);
840 if (rot2.m[0][0] < 0)
841 tr.t[0] += dx;
842 if (rot2.m[1][0] < 0)
843 tr.t[0] += dy;
844 if (rot2.m[0][1] < 0)
845 tr.t[1] += dx;
846 if (rot2.m[1][1] < 0)
847 tr.t[1] += dy;
848
849 off[n] = tr;
850 }
851 }
852 }
853
854 // Return the index of the given component (assuming all non-padding components
855 // of all planes are concatenated into a linear list).
find_comp(struct ra_imgfmt_desc * desc,int component)856 static int find_comp(struct ra_imgfmt_desc *desc, int component)
857 {
858 int cur = 0;
859 for (int n = 0; n < desc->num_planes; n++) {
860 for (int i = 0; i < 4; i++) {
861 if (desc->components[n][i]) {
862 if (desc->components[n][i] == component)
863 return cur;
864 cur++;
865 }
866 }
867 }
868 return -1;
869 }
870
init_video(struct gl_video * p)871 static void init_video(struct gl_video *p)
872 {
873 p->use_integer_conversion = false;
874
875 struct ra_hwdec *hwdec = NULL;
876 for (int n = 0; n < p->num_hwdecs; n++) {
877 if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) {
878 hwdec = p->hwdecs[n];
879 break;
880 }
881 }
882
883 if (hwdec) {
884 if (hwdec->driver->overlay_frame) {
885 MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed "
886 "on the video!\n");
887 p->hwdec_overlay = hwdec;
888 } else {
889 p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params);
890 if (!p->hwdec_mapper)
891 MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
892 }
893 if (p->hwdec_mapper)
894 p->image_params = p->hwdec_mapper->dst_params;
895 const char **exts = hwdec->glsl_extensions;
896 for (int n = 0; exts && exts[n]; n++)
897 gl_sc_enable_extension(p->sc, (char *)exts[n]);
898 p->hwdec_active = true;
899 }
900
901 p->ra_format = (struct ra_imgfmt_desc){0};
902 ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format);
903
904 p->plane_count = p->ra_format.num_planes;
905
906 p->has_alpha = false;
907 p->is_gray = true;
908
909 for (int n = 0; n < p->ra_format.num_planes; n++) {
910 for (int i = 0; i < 4; i++) {
911 if (p->ra_format.components[n][i]) {
912 p->has_alpha |= p->ra_format.components[n][i] == 4;
913 p->is_gray &= p->ra_format.components[n][i] == 1 ||
914 p->ra_format.components[n][i] == 4;
915 }
916 }
917 }
918
919 for (int c = 0; c < 4; c++) {
920 int loc = find_comp(&p->ra_format, c + 1);
921 p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0];
922 }
923 p->color_swizzle[4] = '\0';
924
925 mp_image_params_guess_csp(&p->image_params);
926
927 av_lfg_init(&p->lfg, 1);
928
929 debug_check_gl(p, "before video texture creation");
930
931 if (!p->hwdec_active) {
932 struct video_image *vimg = &p->image;
933
934 struct mp_image layout = {0};
935 mp_image_set_params(&layout, &p->image_params);
936
937 for (int n = 0; n < p->plane_count; n++) {
938 struct texplane *plane = &vimg->planes[n];
939 const struct ra_format *format = p->ra_format.planes[n];
940
941 plane->w = mp_image_plane_w(&layout, n);
942 plane->h = mp_image_plane_h(&layout, n);
943
944 struct ra_tex_params params = {
945 .dimensions = 2,
946 .w = plane->w + p->opts.tex_pad_x,
947 .h = plane->h + p->opts.tex_pad_y,
948 .d = 1,
949 .format = format,
950 .render_src = true,
951 .src_linear = format->linear_filter,
952 .non_normalized = p->opts.use_rectangle,
953 .host_mutable = true,
954 };
955
956 MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n,
957 params.w, params.h);
958
959 plane->tex = ra_tex_create(p->ra, ¶ms);
960 p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT;
961 }
962 }
963
964 debug_check_gl(p, "after video texture creation");
965
966 // Format-dependent checks.
967 check_gl_features(p);
968
969 gl_video_setup_hooks(p);
970 }
971
gl_find_dr_buffer(struct gl_video * p,uint8_t * ptr)972 static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
973 {
974 for (int i = 0; i < p->num_dr_buffers; i++) {
975 struct dr_buffer *buffer = &p->dr_buffers[i];
976 uint8_t *bufptr = buffer->buf->data;
977 size_t size = buffer->buf->params.size;
978 if (ptr >= bufptr && ptr < bufptr + size)
979 return buffer;
980 }
981
982 return NULL;
983 }
984
gc_pending_dr_fences(struct gl_video * p,bool force)985 static void gc_pending_dr_fences(struct gl_video *p, bool force)
986 {
987 again:;
988 for (int n = 0; n < p->num_dr_buffers; n++) {
989 struct dr_buffer *buffer = &p->dr_buffers[n];
990 if (!buffer->mpi)
991 continue;
992
993 bool res = p->ra->fns->buf_poll(p->ra, buffer->buf);
994 if (res || force) {
995 // Unreferencing the image could cause gl_video_dr_free_buffer()
996 // to be called by the talloc destructor (if it was the last
997 // reference). This will implicitly invalidate the buffer pointer
998 // and change the p->dr_buffers array. To make it worse, it could
999 // free multiple dr_buffers due to weird theoretical corner cases.
1000 // This is also why we use the goto to iterate again from the
1001 // start, because everything gets fucked up. Hail satan!
1002 struct mp_image *ref = buffer->mpi;
1003 buffer->mpi = NULL;
1004 talloc_free(ref);
1005 goto again;
1006 }
1007 }
1008 }
1009
unref_current_image(struct gl_video * p)1010 static void unref_current_image(struct gl_video *p)
1011 {
1012 struct video_image *vimg = &p->image;
1013
1014 if (vimg->hwdec_mapped) {
1015 assert(p->hwdec_active && p->hwdec_mapper);
1016 ra_hwdec_mapper_unmap(p->hwdec_mapper);
1017 memset(vimg->planes, 0, sizeof(vimg->planes));
1018 vimg->hwdec_mapped = false;
1019 }
1020
1021 vimg->id = 0;
1022
1023 mp_image_unrefp(&vimg->mpi);
1024
1025 // While we're at it, also garbage collect pending fences in here to
1026 // get it out of the way.
1027 gc_pending_dr_fences(p, false);
1028 }
1029
1030 // If overlay mode is used, make sure to remove the overlay.
1031 // Be careful with this. Removing the overlay and adding another one will
1032 // lead to flickering artifacts.
unmap_overlay(struct gl_video * p)1033 static void unmap_overlay(struct gl_video *p)
1034 {
1035 if (p->hwdec_overlay)
1036 p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true);
1037 }
1038
uninit_video(struct gl_video * p)1039 static void uninit_video(struct gl_video *p)
1040 {
1041 uninit_rendering(p);
1042
1043 struct video_image *vimg = &p->image;
1044
1045 unmap_overlay(p);
1046 unref_current_image(p);
1047
1048 for (int n = 0; n < p->plane_count; n++) {
1049 struct texplane *plane = &vimg->planes[n];
1050 ra_tex_free(p->ra, &plane->tex);
1051 }
1052 *vimg = (struct video_image){0};
1053
1054 // Invalidate image_params to ensure that gl_video_config() will call
1055 // init_video() on uninitialized gl_video.
1056 p->real_image_params = (struct mp_image_params){0};
1057 p->image_params = p->real_image_params;
1058 p->hwdec_active = false;
1059 p->hwdec_overlay = NULL;
1060 ra_hwdec_mapper_free(&p->hwdec_mapper);
1061 }
1062
pass_record(struct gl_video * p,struct mp_pass_perf perf)1063 static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
1064 {
1065 if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
1066 return;
1067
1068 struct pass_info *pass = &p->pass[p->pass_idx];
1069 pass->perf = perf;
1070
1071 if (pass->desc.len == 0)
1072 bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
1073
1074 p->pass_idx++;
1075 }
1076
1077 PRINTF_ATTRIBUTE(2, 3)
pass_describe(struct gl_video * p,const char * textf,...)1078 static void pass_describe(struct gl_video *p, const char *textf, ...)
1079 {
1080 if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
1081 return;
1082
1083 struct pass_info *pass = &p->pass[p->pass_idx];
1084
1085 if (pass->desc.len > 0)
1086 bstr_xappend(p, &pass->desc, bstr0(" + "));
1087
1088 va_list ap;
1089 va_start(ap, textf);
1090 bstr_xappend_vasprintf(p, &pass->desc, textf, ap);
1091 va_end(ap);
1092 }
1093
pass_info_reset(struct gl_video * p,bool is_redraw)1094 static void pass_info_reset(struct gl_video *p, bool is_redraw)
1095 {
1096 p->pass = is_redraw ? p->pass_redraw : p->pass_fresh;
1097 p->pass_idx = 0;
1098
1099 for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
1100 p->pass[i].desc.len = 0;
1101 p->pass[i].perf = (struct mp_pass_perf){0};
1102 }
1103 }
1104
pass_report_performance(struct gl_video * p)1105 static void pass_report_performance(struct gl_video *p)
1106 {
1107 if (!p->pass)
1108 return;
1109
1110 for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
1111 struct pass_info *pass = &p->pass[i];
1112 if (pass->desc.len) {
1113 MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n",
1114 BSTR_P(pass->desc),
1115 (int)pass->perf.last/1000,
1116 (int)pass->perf.avg/1000,
1117 (int)pass->perf.peak/1000);
1118 }
1119 }
1120 }
1121
pass_prepare_src_tex(struct gl_video * p)1122 static void pass_prepare_src_tex(struct gl_video *p)
1123 {
1124 struct gl_shader_cache *sc = p->sc;
1125
1126 for (int n = 0; n < p->num_pass_imgs; n++) {
1127 struct image *s = &p->pass_imgs[n];
1128 if (!s->tex)
1129 continue;
1130
1131 char *texture_name = mp_tprintf(32, "texture%d", n);
1132 char *texture_size = mp_tprintf(32, "texture_size%d", n);
1133 char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
1134 char *texture_off = mp_tprintf(32, "texture_off%d", n);
1135 char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
1136
1137 gl_sc_uniform_texture(sc, texture_name, s->tex);
1138 float f[2] = {1, 1};
1139 if (!s->tex->params.non_normalized) {
1140 f[0] = s->tex->params.w;
1141 f[1] = s->tex->params.h;
1142 }
1143 gl_sc_uniform_vec2(sc, texture_size, f);
1144 gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
1145 gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
1146 gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0],
1147 1.0f / f[1]});
1148 }
1149 }
1150
cleanup_binds(struct gl_video * p)1151 static void cleanup_binds(struct gl_video *p)
1152 {
1153 p->num_pass_imgs = 0;
1154 }
1155
1156 // Sets the appropriate compute shader metadata for an implicit compute pass
1157 // bw/bh: block size
pass_is_compute(struct gl_video * p,int bw,int bh,bool flexible)1158 static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
1159 {
1160 if (p->pass_compute.active && flexible) {
1161 // Avoid overwriting existing block sizes when using a flexible pass
1162 bw = p->pass_compute.block_w;
1163 bh = p->pass_compute.block_h;
1164 }
1165
1166 p->pass_compute = (struct compute_info){
1167 .active = true,
1168 .block_w = bw,
1169 .block_h = bh,
1170 };
1171 }
1172
1173 // w/h: the width/height of the compute shader's operating domain (e.g. the
1174 // target target that needs to be written, or the source texture that needs to
1175 // be reduced)
dispatch_compute(struct gl_video * p,int w,int h,struct compute_info info)1176 static void dispatch_compute(struct gl_video *p, int w, int h,
1177 struct compute_info info)
1178 {
1179 PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n",
1180 info.threads_w > 0 ? info.threads_w : info.block_w,
1181 info.threads_h > 0 ? info.threads_h : info.block_h);
1182
1183 pass_prepare_src_tex(p);
1184
1185 // Since we don't actually have vertices, we pretend for convenience
1186 // reasons that we do and calculate the right texture coordinates based on
1187 // the output sample ID
1188 gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h });
1189 PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
1190
1191 for (int n = 0; n < p->num_pass_imgs; n++) {
1192 struct image *s = &p->pass_imgs[n];
1193 if (!s->tex)
1194 continue;
1195
1196 PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + "
1197 "pixel_size%d * texture_off%d)\n", n, n, n, n);
1198 PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n);
1199 }
1200
1201 // always round up when dividing to make sure we don't leave off a part of
1202 // the image
1203 int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1,
1204 num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1;
1205
1206 if (!(p->ra->caps & RA_CAP_NUM_GROUPS))
1207 PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y);
1208
1209 pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1));
1210 cleanup_binds(p);
1211 }
1212
render_pass_quad(struct gl_video * p,struct ra_fbo fbo,bool discard,const struct mp_rect * dst)1213 static struct mp_pass_perf render_pass_quad(struct gl_video *p,
1214 struct ra_fbo fbo, bool discard,
1215 const struct mp_rect *dst)
1216 {
1217 // The first element is reserved for `vec2 position`
1218 int num_vertex_attribs = 1 + p->num_pass_imgs;
1219 size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt);
1220
1221 // Expand the VAO if necessary
1222 while (p->vao_len < num_vertex_attribs) {
1223 MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
1224 .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1),
1225 .type = RA_VARTYPE_FLOAT,
1226 .dim_v = 2,
1227 .dim_m = 1,
1228 .offset = p->vao_len * sizeof(struct vertex_pt),
1229 });
1230 }
1231
1232 int num_vertices = 6; // quad as triangle list
1233 int num_attribs_total = num_vertices * num_vertex_attribs;
1234 MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total);
1235
1236 struct gl_transform t;
1237 gl_transform_ortho_fbo(&t, fbo);
1238
1239 float x[2] = {dst->x0, dst->x1};
1240 float y[2] = {dst->y0, dst->y1};
1241 gl_transform_vec(t, &x[0], &y[0]);
1242 gl_transform_vec(t, &x[1], &y[1]);
1243
1244 for (int n = 0; n < 4; n++) {
1245 struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n];
1246 // vec2 position in idx 0
1247 vs[0].x = x[n / 2];
1248 vs[0].y = y[n % 2];
1249 for (int i = 0; i < p->num_pass_imgs; i++) {
1250 struct image *s = &p->pass_imgs[i];
1251 if (!s->tex)
1252 continue;
1253 struct gl_transform tr = s->transform;
1254 float tx = (n / 2) * s->w;
1255 float ty = (n % 2) * s->h;
1256 gl_transform_vec(tr, &tx, &ty);
1257 bool rect = s->tex->params.non_normalized;
1258 // vec2 texcoordN in idx N+1
1259 vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w);
1260 vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h);
1261 }
1262 }
1263
1264 memmove(&p->tmp_vertex[num_vertex_attribs * 4],
1265 &p->tmp_vertex[num_vertex_attribs * 2],
1266 vertex_stride);
1267
1268 memmove(&p->tmp_vertex[num_vertex_attribs * 5],
1269 &p->tmp_vertex[num_vertex_attribs * 1],
1270 vertex_stride);
1271
1272 return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs,
1273 vertex_stride, p->tmp_vertex, num_vertices);
1274 }
1275
finish_pass_fbo(struct gl_video * p,struct ra_fbo fbo,bool discard,const struct mp_rect * dst)1276 static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo,
1277 bool discard, const struct mp_rect *dst)
1278 {
1279 pass_prepare_src_tex(p);
1280 pass_record(p, render_pass_quad(p, fbo, discard, dst));
1281 debug_check_gl(p, "after rendering");
1282 cleanup_binds(p);
1283 }
1284
1285 // dst_fbo: this will be used for rendering; possibly reallocating the whole
1286 // FBO, if the required parameters have changed
1287 // w, h: required FBO target dimension, and also defines the target rectangle
1288 // used for rasterization
finish_pass_tex(struct gl_video * p,struct ra_tex ** dst_tex,int w,int h)1289 static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
1290 int w, int h)
1291 {
1292 if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) {
1293 cleanup_binds(p);
1294 gl_sc_reset(p->sc);
1295 return;
1296 }
1297
1298 // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
1299 // over fragment shaders wherever possible.
1300 if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) &&
1301 (*dst_tex)->params.storage_dst)
1302 {
1303 pass_is_compute(p, 16, 16, true);
1304 }
1305
1306 if (p->pass_compute.active) {
1307 gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
1308 if (!p->pass_compute.directly_writes)
1309 GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
1310
1311 dispatch_compute(p, w, h, p->pass_compute);
1312 p->pass_compute = (struct compute_info){0};
1313
1314 debug_check_gl(p, "after dispatching compute shader");
1315 } else {
1316 struct ra_fbo fbo = { .tex = *dst_tex, };
1317 finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h});
1318 }
1319 }
1320
get_tex_swizzle(struct image * img)1321 static const char *get_tex_swizzle(struct image *img)
1322 {
1323 if (!img->tex)
1324 return "rgba";
1325 return img->tex->params.format->luminance_alpha ? "raaa" : "rgba";
1326 }
1327
1328 // Copy a texture to the vec4 color, while increasing offset. Also applies
1329 // the texture multiplier to the sampled color
copy_image(struct gl_video * p,int * offset,struct image img)1330 static void copy_image(struct gl_video *p, int *offset, struct image img)
1331 {
1332 int count = img.components;
1333 assert(*offset + count <= 4);
1334 assert(img.padding + count <= 4);
1335
1336 int id = pass_bind(p, img);
1337 char src[5] = {0};
1338 char dst[5] = {0};
1339 const char *tex_fmt = get_tex_swizzle(&img);
1340 const char *dst_fmt = "rgba";
1341 for (int i = 0; i < count; i++) {
1342 src[i] = tex_fmt[img.padding + i];
1343 dst[i] = dst_fmt[*offset + i];
1344 }
1345
1346 if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) {
1347 uint64_t tex_max = 1ull << p->ra_format.component_bits;
1348 img.multiplier *= 1.0 / (tex_max - 1);
1349 }
1350
1351 GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
1352 dst, img.multiplier, id, id, src);
1353
1354 *offset += count;
1355 }
1356
skip_unused(struct gl_video * p,int num_components)1357 static void skip_unused(struct gl_video *p, int num_components)
1358 {
1359 for (int i = num_components; i < 4; i++)
1360 GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0);
1361 }
1362
uninit_scaler(struct gl_video * p,struct scaler * scaler)1363 static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
1364 {
1365 ra_tex_free(p->ra, &scaler->sep_fbo);
1366 ra_tex_free(p->ra, &scaler->lut);
1367 scaler->kernel = NULL;
1368 scaler->initialized = false;
1369 }
1370
hook_prelude(struct gl_video * p,const char * name,int id,struct image img)1371 static void hook_prelude(struct gl_video *p, const char *name, int id,
1372 struct image img)
1373 {
1374 GLSLHF("#define %s_raw texture%d\n", name, id);
1375 GLSLHF("#define %s_pos texcoord%d\n", name, id);
1376 GLSLHF("#define %s_size texture_size%d\n", name, id);
1377 GLSLHF("#define %s_rot texture_rot%d\n", name, id);
1378 GLSLHF("#define %s_off texture_off%d\n", name, id);
1379 GLSLHF("#define %s_pt pixel_size%d\n", name, id);
1380 GLSLHF("#define %s_map texmap%d\n", name, id);
1381 GLSLHF("#define %s_mul %f\n", name, img.multiplier);
1382
1383 char crap[5] = "";
1384 snprintf(crap, sizeof(crap), "%s", get_tex_swizzle(&img));
1385
1386 // Remove leading padding by rotating the swizzle mask.
1387 int len = strlen(crap);
1388 for (int n = 0; n < img.padding; n++) {
1389 if (len) {
1390 char f = crap[0];
1391 memmove(crap, crap + 1, len - 1);
1392 crap[len - 1] = f;
1393 }
1394 }
1395
1396 // Set up the sampling functions
1397 GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n",
1398 name, name, name, crap);
1399
1400 // Since the extra matrix multiplication impacts performance,
1401 // skip it unless the texture was actually rotated
1402 if (gl_transform_eq(img.transform, identity_trans)) {
1403 GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n",
1404 name, name, name, name);
1405 } else {
1406 GLSLHF("#define %s_texOff(off) "
1407 "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n",
1408 name, name, name, name, name);
1409 }
1410 }
1411
saved_img_find(struct gl_video * p,const char * name,struct image * out)1412 static bool saved_img_find(struct gl_video *p, const char *name,
1413 struct image *out)
1414 {
1415 if (!name || !out)
1416 return false;
1417
1418 for (int i = 0; i < p->num_saved_imgs; i++) {
1419 if (strcmp(p->saved_imgs[i].name, name) == 0) {
1420 *out = p->saved_imgs[i].img;
1421 return true;
1422 }
1423 }
1424
1425 return false;
1426 }
1427
saved_img_store(struct gl_video * p,const char * name,struct image img)1428 static void saved_img_store(struct gl_video *p, const char *name,
1429 struct image img)
1430 {
1431 assert(name);
1432
1433 for (int i = 0; i < p->num_saved_imgs; i++) {
1434 if (strcmp(p->saved_imgs[i].name, name) == 0) {
1435 p->saved_imgs[i].img = img;
1436 return;
1437 }
1438 }
1439
1440 MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) {
1441 .name = name,
1442 .img = img
1443 });
1444 }
1445
pass_hook_setup_binds(struct gl_video * p,const char * name,struct image img,struct tex_hook * hook)1446 static bool pass_hook_setup_binds(struct gl_video *p, const char *name,
1447 struct image img, struct tex_hook *hook)
1448 {
1449 for (int t = 0; t < SHADER_MAX_BINDS; t++) {
1450 char *bind_name = (char *)hook->bind_tex[t];
1451
1452 if (!bind_name)
1453 continue;
1454
1455 // This is a special name that means "currently hooked texture"
1456 if (strcmp(bind_name, "HOOKED") == 0) {
1457 int id = pass_bind(p, img);
1458 hook_prelude(p, "HOOKED", id, img);
1459 hook_prelude(p, name, id, img);
1460 continue;
1461 }
1462
1463 // BIND can also be used to load user-defined textures, in which
1464 // case we will directly load them as a uniform instead of
1465 // generating the hook_prelude boilerplate
1466 for (int u = 0; u < p->num_user_textures; u++) {
1467 struct gl_user_shader_tex *utex = &p->user_textures[u];
1468 if (bstr_equals0(utex->name, bind_name)) {
1469 gl_sc_uniform_texture(p->sc, bind_name, utex->tex);
1470 goto next_bind;
1471 }
1472 }
1473
1474 struct image bind_img;
1475 if (!saved_img_find(p, bind_name, &bind_img)) {
1476 // Clean up texture bindings and move on to the next hook
1477 MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n",
1478 name, bind_name);
1479 p->num_pass_imgs -= t;
1480 return false;
1481 }
1482
1483 hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img);
1484
1485 next_bind: ;
1486 }
1487
1488 return true;
1489 }
1490
next_hook_tex(struct gl_video * p)1491 static struct ra_tex **next_hook_tex(struct gl_video *p)
1492 {
1493 if (p->idx_hook_textures == p->num_hook_textures)
1494 MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL);
1495
1496 return &p->hook_textures[p->idx_hook_textures++];
1497 }
1498
1499 // Process hooks for a plane, saving the result and returning a new image
1500 // If 'trans' is NULL, the shader is forbidden from transforming img
pass_hook(struct gl_video * p,const char * name,struct image img,struct gl_transform * trans)1501 static struct image pass_hook(struct gl_video *p, const char *name,
1502 struct image img, struct gl_transform *trans)
1503 {
1504 if (!name)
1505 return img;
1506
1507 saved_img_store(p, name, img);
1508
1509 MP_TRACE(p, "Running hooks for %s\n", name);
1510 for (int i = 0; i < p->num_tex_hooks; i++) {
1511 struct tex_hook *hook = &p->tex_hooks[i];
1512
1513 // Figure out if this pass hooks this texture
1514 for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
1515 if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
1516 goto found;
1517 }
1518
1519 continue;
1520
1521 found:
1522 // Check the hook's condition
1523 if (hook->cond && !hook->cond(p, img, hook->priv)) {
1524 MP_TRACE(p, "Skipping hook on %s due to condition.\n", name);
1525 continue;
1526 }
1527
1528 const char *store_name = hook->save_tex ? hook->save_tex : name;
1529 bool is_overwrite = strcmp(store_name, name) == 0;
1530
1531 // If user shader is set to align HOOKED with reference and fix its
1532 // offset, it requires HOOKED to be resizable and overwrited.
1533 if (is_overwrite && hook->align_offset) {
1534 if (!trans) {
1535 MP_ERR(p, "Hook tried to align unresizable texture %s!\n",
1536 name);
1537 return img;
1538 }
1539
1540 struct gl_transform align_off = identity_trans;
1541 align_off.t[0] = trans->t[0];
1542 align_off.t[1] = trans->t[1];
1543
1544 gl_transform_trans(align_off, &img.transform);
1545 }
1546
1547 if (!pass_hook_setup_binds(p, name, img, hook))
1548 continue;
1549
1550 // Run the actual hook. This generates a series of GLSL shader
1551 // instructions sufficient for drawing the hook's output
1552 struct gl_transform hook_off = identity_trans;
1553 hook->hook(p, img, &hook_off, hook->priv);
1554
1555 int comps = hook->components ? hook->components : img.components;
1556 skip_unused(p, comps);
1557
1558 // Compute the updated FBO dimensions and store the result
1559 struct mp_rect_f sz = {0, 0, img.w, img.h};
1560 gl_transform_rect(hook_off, &sz);
1561 int w = lroundf(fabs(sz.x1 - sz.x0));
1562 int h = lroundf(fabs(sz.y1 - sz.y0));
1563
1564 struct ra_tex **tex = next_hook_tex(p);
1565 finish_pass_tex(p, tex, w, h);
1566 struct image saved_img = image_wrap(*tex, img.type, comps);
1567
1568 // If the texture we're saving overwrites the "current" texture, also
1569 // update the tex parameter so that the future loop cycles will use the
1570 // updated values, and export the offset
1571 if (is_overwrite) {
1572 if (!trans && !gl_transform_eq(hook_off, identity_trans)) {
1573 MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n",
1574 name);
1575 return img;
1576 }
1577
1578 img = saved_img;
1579 if (trans) {
1580 gl_transform_trans(hook_off, trans);
1581
1582 // If user shader is set to align HOOKED, the offset it produces
1583 // is dynamic (with static resizing factor though).
1584 // Align it with reference manually to get offset fixed.
1585 if (hook->align_offset) {
1586 trans->t[0] = 0.0;
1587 trans->t[1] = 0.0;
1588 }
1589 }
1590 }
1591
1592 saved_img_store(p, store_name, saved_img);
1593 }
1594
1595 return img;
1596 }
1597
1598 // This can be used at any time in the middle of rendering to specify an
1599 // optional hook point, which if triggered will render out to a new FBO and
1600 // load the result back into vec4 color. Offsets applied by the hooks are
1601 // accumulated in tex_trans, and the FBO is dimensioned according
1602 // to p->texture_w/h
pass_opt_hook_point(struct gl_video * p,const char * name,struct gl_transform * tex_trans)1603 static void pass_opt_hook_point(struct gl_video *p, const char *name,
1604 struct gl_transform *tex_trans)
1605 {
1606 if (!name)
1607 return;
1608
1609 for (int i = 0; i < p->num_tex_hooks; i++) {
1610 struct tex_hook *hook = &p->tex_hooks[i];
1611
1612 for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
1613 if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
1614 goto found;
1615 }
1616
1617 for (int b = 0; b < SHADER_MAX_BINDS; b++) {
1618 if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0)
1619 goto found;
1620 }
1621 }
1622
1623 // Nothing uses this texture, don't bother storing it
1624 return;
1625
1626 found: ;
1627 struct ra_tex **tex = next_hook_tex(p);
1628 finish_pass_tex(p, tex, p->texture_w, p->texture_h);
1629 struct image img = image_wrap(*tex, PLANE_RGB, p->components);
1630 img = pass_hook(p, name, img, tex_trans);
1631 copy_image(p, &(int){0}, img);
1632 p->texture_w = img.w;
1633 p->texture_h = img.h;
1634 p->components = img.components;
1635 pass_describe(p, "(remainder pass)");
1636 }
1637
load_shader(struct gl_video * p,struct bstr body)1638 static void load_shader(struct gl_video *p, struct bstr body)
1639 {
1640 gl_sc_hadd_bstr(p->sc, body);
1641 gl_sc_uniform_dynamic(p->sc);
1642 gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
1643 gl_sc_uniform_dynamic(p->sc);
1644 gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded);
1645 gl_sc_uniform_vec2(p->sc, "input_size",
1646 (float[]){(p->src_rect.x1 - p->src_rect.x0) *
1647 p->texture_offset.m[0][0],
1648 (p->src_rect.y1 - p->src_rect.y0) *
1649 p->texture_offset.m[1][1]});
1650 gl_sc_uniform_vec2(p->sc, "target_size",
1651 (float[]){p->dst_rect.x1 - p->dst_rect.x0,
1652 p->dst_rect.y1 - p->dst_rect.y0});
1653 gl_sc_uniform_vec2(p->sc, "tex_offset",
1654 (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] +
1655 p->texture_offset.t[0],
1656 p->src_rect.y0 * p->texture_offset.m[1][1] +
1657 p->texture_offset.t[1]});
1658 }
1659
1660 // Semantic equality
double_seq(double a,double b)1661 static bool double_seq(double a, double b)
1662 {
1663 return (isnan(a) && isnan(b)) || a == b;
1664 }
1665
scaler_fun_eq(struct scaler_fun a,struct scaler_fun b)1666 static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b)
1667 {
1668 if ((a.name && !b.name) || (b.name && !a.name))
1669 return false;
1670
1671 return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) &&
1672 double_seq(a.params[0], b.params[0]) &&
1673 double_seq(a.params[1], b.params[1]) &&
1674 a.blur == b.blur &&
1675 a.taper == b.taper;
1676 }
1677
scaler_conf_eq(struct scaler_config a,struct scaler_config b)1678 static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b)
1679 {
1680 // Note: antiring isn't compared because it doesn't affect LUT
1681 // generation
1682 return scaler_fun_eq(a.kernel, b.kernel) &&
1683 scaler_fun_eq(a.window, b.window) &&
1684 a.radius == b.radius &&
1685 a.clamp == b.clamp;
1686 }
1687
reinit_scaler(struct gl_video * p,struct scaler * scaler,const struct scaler_config * conf,double scale_factor,int sizes[])1688 static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
1689 const struct scaler_config *conf,
1690 double scale_factor,
1691 int sizes[])
1692 {
1693 if (scaler_conf_eq(scaler->conf, *conf) &&
1694 scaler->scale_factor == scale_factor &&
1695 scaler->initialized)
1696 return;
1697
1698 uninit_scaler(p, scaler);
1699
1700 const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name);
1701 const struct filter_window *t_window = mp_find_filter_window(conf->window.name);
1702 bool is_tscale = scaler->index == SCALER_TSCALE;
1703
1704 scaler->conf = *conf;
1705 scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale);
1706 scaler->conf.window.name = t_window ? (char *)t_window->name : NULL;
1707 scaler->scale_factor = scale_factor;
1708 scaler->insufficient = false;
1709 scaler->initialized = true;
1710 if (!t_kernel)
1711 return;
1712
1713 scaler->kernel_storage = *t_kernel;
1714 scaler->kernel = &scaler->kernel_storage;
1715
1716 if (!t_window) {
1717 // fall back to the scaler's default window if available
1718 t_window = mp_find_filter_window(t_kernel->window);
1719 }
1720 if (t_window)
1721 scaler->kernel->w = *t_window;
1722
1723 for (int n = 0; n < 2; n++) {
1724 if (!isnan(conf->kernel.params[n]))
1725 scaler->kernel->f.params[n] = conf->kernel.params[n];
1726 if (!isnan(conf->window.params[n]))
1727 scaler->kernel->w.params[n] = conf->window.params[n];
1728 }
1729
1730 if (conf->kernel.blur > 0.0)
1731 scaler->kernel->f.blur = conf->kernel.blur;
1732 if (conf->window.blur > 0.0)
1733 scaler->kernel->w.blur = conf->window.blur;
1734
1735 if (conf->kernel.taper > 0.0)
1736 scaler->kernel->f.taper = conf->kernel.taper;
1737 if (conf->window.taper > 0.0)
1738 scaler->kernel->w.taper = conf->window.taper;
1739
1740 if (scaler->kernel->f.resizable && conf->radius > 0.0)
1741 scaler->kernel->f.radius = conf->radius;
1742
1743 scaler->kernel->clamp = conf->clamp;
1744 scaler->kernel->value_cutoff = conf->cutoff;
1745
1746 scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
1747
1748 int size = scaler->kernel->size;
1749 int num_components = size > 2 ? 4 : size;
1750 const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
1751 assert(fmt);
1752
1753 int width = (size + num_components - 1) / num_components; // round up
1754 int stride = width * num_components;
1755 assert(size <= stride);
1756
1757 scaler->lut_size = 1 << p->opts.scaler_lut_size;
1758
1759 float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
1760 mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);
1761
1762 bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
1763
1764 struct ra_tex_params lut_params = {
1765 .dimensions = use_1d ? 1 : 2,
1766 .w = use_1d ? scaler->lut_size : width,
1767 .h = use_1d ? 1 : scaler->lut_size,
1768 .d = 1,
1769 .format = fmt,
1770 .render_src = true,
1771 .src_linear = true,
1772 .initial_data = weights,
1773 };
1774 scaler->lut = ra_tex_create(p->ra, &lut_params);
1775
1776 talloc_free(weights);
1777
1778 debug_check_gl(p, "after initializing scaler");
1779 }
1780
1781 // Special helper for sampling from two separated stages
pass_sample_separated(struct gl_video * p,struct image src,struct scaler * scaler,int w,int h)1782 static void pass_sample_separated(struct gl_video *p, struct image src,
1783 struct scaler *scaler, int w, int h)
1784 {
1785 // Separate the transformation into x and y components, per pass
1786 struct gl_transform t_x = {
1787 .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}},
1788 .t = {src.transform.t[0], 0.0},
1789 };
1790 struct gl_transform t_y = {
1791 .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}},
1792 .t = {0.0, src.transform.t[1]},
1793 };
1794
1795 // First pass (scale only in the y dir)
1796 src.transform = t_y;
1797 sampler_prelude(p->sc, pass_bind(p, src));
1798 GLSLF("// first pass\n");
1799 pass_sample_separated_gen(p->sc, scaler, 0, 1);
1800 GLSLF("color *= %f;\n", src.multiplier);
1801 finish_pass_tex(p, &scaler->sep_fbo, src.w, h);
1802
1803 // Second pass (scale only in the x dir)
1804 src = image_wrap(scaler->sep_fbo, src.type, src.components);
1805 src.transform = t_x;
1806 pass_describe(p, "%s second pass", scaler->conf.kernel.name);
1807 sampler_prelude(p->sc, pass_bind(p, src));
1808 pass_sample_separated_gen(p->sc, scaler, 1, 0);
1809 }
1810
1811 // Picks either the compute shader version or the regular sampler version
1812 // depending on hardware support
pass_dispatch_sample_polar(struct gl_video * p,struct scaler * scaler,struct image img,int w,int h)1813 static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler,
1814 struct image img, int w, int h)
1815 {
1816 uint64_t reqs = RA_CAP_COMPUTE;
1817 if ((p->ra->caps & reqs) != reqs)
1818 goto fallback;
1819
1820 int bound = ceil(scaler->kernel->radius_cutoff);
1821 int offset = bound - 1; // padding top/left
1822 int padding = offset + bound; // total padding
1823
1824 float ratiox = (float)w / img.w,
1825 ratioy = (float)h / img.h;
1826
1827 // For performance we want to load at least as many pixels
1828 // horizontally as there are threads in a warp (32 for nvidia), as
1829 // well as enough to take advantage of shmem parallelism
1830 const int warp_size = 32, threads = 256;
1831 int bw = warp_size;
1832 int bh = threads / bw;
1833
1834 // We need to sample everything from base_min to base_max, so make sure
1835 // we have enough room in shmem
1836 int iw = (int)ceil(bw / ratiox) + padding + 1,
1837 ih = (int)ceil(bh / ratioy) + padding + 1;
1838
1839 int shmem_req = iw * ih * img.components * sizeof(float);
1840 if (shmem_req > p->ra->max_shmem)
1841 goto fallback;
1842
1843 pass_is_compute(p, bw, bh, false);
1844 pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
1845 return;
1846
1847 fallback:
1848 // Fall back to regular polar shader when compute shaders are unsupported
1849 // or the kernel is too big for shmem
1850 pass_sample_polar(p->sc, scaler, img.components,
1851 p->ra->caps & RA_CAP_GATHER);
1852 }
1853
1854 // Sample from image, with the src rectangle given by it.
1855 // The dst rectangle is implicit by what the caller will do next, but w and h
1856 // must still be what is going to be used (to dimension FBOs correctly).
1857 // This will write the scaled contents to the vec4 "color".
1858 // The scaler unit is initialized by this function; in order to avoid cache
1859 // thrashing, the scaler unit should usually use the same parameters.
pass_sample(struct gl_video * p,struct image img,struct scaler * scaler,const struct scaler_config * conf,double scale_factor,int w,int h)1860 static void pass_sample(struct gl_video *p, struct image img,
1861 struct scaler *scaler, const struct scaler_config *conf,
1862 double scale_factor, int w, int h)
1863 {
1864 reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
1865
1866 // Describe scaler
1867 const char *scaler_opt[] = {
1868 [SCALER_SCALE] = "scale",
1869 [SCALER_DSCALE] = "dscale",
1870 [SCALER_CSCALE] = "cscale",
1871 [SCALER_TSCALE] = "tscale",
1872 };
1873
1874 pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index],
1875 scaler->conf.kernel.name, plane_names[img.type]);
1876
1877 bool is_separated = scaler->kernel && !scaler->kernel->polar;
1878
1879 // Set up the transformation+prelude and bind the texture, for everything
1880 // other than separated scaling (which does this in the subfunction)
1881 if (!is_separated)
1882 sampler_prelude(p->sc, pass_bind(p, img));
1883
1884 // Dispatch the scaler. They're all wildly different.
1885 const char *name = scaler->conf.kernel.name;
1886 if (strcmp(name, "bilinear") == 0) {
1887 GLSL(color = texture(tex, pos);)
1888 } else if (strcmp(name, "bicubic_fast") == 0) {
1889 pass_sample_bicubic_fast(p->sc);
1890 } else if (strcmp(name, "oversample") == 0) {
1891 pass_sample_oversample(p->sc, scaler, w, h);
1892 } else if (scaler->kernel && scaler->kernel->polar) {
1893 pass_dispatch_sample_polar(p, scaler, img, w, h);
1894 } else if (scaler->kernel) {
1895 pass_sample_separated(p, img, scaler, w, h);
1896 } else {
1897 // Should never happen
1898 abort();
1899 }
1900
1901 // Apply any required multipliers. Separated scaling already does this in
1902 // its first stage
1903 if (!is_separated)
1904 GLSLF("color *= %f;\n", img.multiplier);
1905
1906 // Micro-optimization: Avoid scaling unneeded channels
1907 skip_unused(p, img.components);
1908 }
1909
1910 // Returns true if two images are semantically equivalent (same metadata)
image_equiv(struct image a,struct image b)1911 static bool image_equiv(struct image a, struct image b)
1912 {
1913 return a.type == b.type &&
1914 a.components == b.components &&
1915 a.multiplier == b.multiplier &&
1916 a.tex->params.format == b.tex->params.format &&
1917 a.tex->params.w == b.tex->params.w &&
1918 a.tex->params.h == b.tex->params.h &&
1919 a.w == b.w &&
1920 a.h == b.h &&
1921 gl_transform_eq(a.transform, b.transform);
1922 }
1923
deband_hook(struct gl_video * p,struct image img,struct gl_transform * trans,void * priv)1924 static void deband_hook(struct gl_video *p, struct image img,
1925 struct gl_transform *trans, void *priv)
1926 {
1927 pass_describe(p, "debanding (%s)", plane_names[img.type]);
1928 pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg,
1929 p->image_params.color.gamma);
1930 }
1931
unsharp_hook(struct gl_video * p,struct image img,struct gl_transform * trans,void * priv)1932 static void unsharp_hook(struct gl_video *p, struct image img,
1933 struct gl_transform *trans, void *priv)
1934 {
1935 pass_describe(p, "unsharp masking");
1936 pass_sample_unsharp(p->sc, p->opts.unsharp);
1937 }
1938
1939 struct szexp_ctx {
1940 struct gl_video *p;
1941 struct image img;
1942 };
1943
szexp_lookup(void * priv,struct bstr var,float size[2])1944 static bool szexp_lookup(void *priv, struct bstr var, float size[2])
1945 {
1946 struct szexp_ctx *ctx = priv;
1947 struct gl_video *p = ctx->p;
1948
1949 if (bstr_equals0(var, "NATIVE_CROPPED")) {
1950 size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0];
1951 size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1];
1952 return true;
1953 }
1954
1955 // The size of OUTPUT is determined. It could be useful for certain
1956 // user shaders to skip passes.
1957 if (bstr_equals0(var, "OUTPUT")) {
1958 size[0] = p->dst_rect.x1 - p->dst_rect.x0;
1959 size[1] = p->dst_rect.y1 - p->dst_rect.y0;
1960 return true;
1961 }
1962
1963 // HOOKED is a special case
1964 if (bstr_equals0(var, "HOOKED")) {
1965 size[0] = ctx->img.w;
1966 size[1] = ctx->img.h;
1967 return true;
1968 }
1969
1970 for (int o = 0; o < p->num_saved_imgs; o++) {
1971 if (bstr_equals0(var, p->saved_imgs[o].name)) {
1972 size[0] = p->saved_imgs[o].img.w;
1973 size[1] = p->saved_imgs[o].img.h;
1974 return true;
1975 }
1976 }
1977
1978 return false;
1979 }
1980
user_hook_cond(struct gl_video * p,struct image img,void * priv)1981 static bool user_hook_cond(struct gl_video *p, struct image img, void *priv)
1982 {
1983 struct gl_user_shader_hook *shader = priv;
1984 assert(shader);
1985
1986 float res = false;
1987 struct szexp_ctx ctx = {p, img};
1988 eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res);
1989 return res;
1990 }
1991
user_hook(struct gl_video * p,struct image img,struct gl_transform * trans,void * priv)1992 static void user_hook(struct gl_video *p, struct image img,
1993 struct gl_transform *trans, void *priv)
1994 {
1995 struct gl_user_shader_hook *shader = priv;
1996 assert(shader);
1997 load_shader(p, shader->pass_body);
1998
1999 pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc),
2000 plane_names[img.type]);
2001
2002 if (shader->compute.active) {
2003 p->pass_compute = shader->compute;
2004 GLSLF("hook();\n");
2005 } else {
2006 GLSLF("color = hook();\n");
2007 }
2008
2009 // Make sure we at least create a legal FBO on failure, since it's better
2010 // to do this and display an error message than just crash OpenGL
2011 float w = 1.0, h = 1.0;
2012
2013 eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w);
2014 eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h);
2015
2016 *trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}};
2017 gl_transform_trans(shader->offset, trans);
2018 }
2019
add_user_hook(void * priv,struct gl_user_shader_hook hook)2020 static bool add_user_hook(void *priv, struct gl_user_shader_hook hook)
2021 {
2022 struct gl_video *p = priv;
2023 struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy);
2024 *copy = hook;
2025
2026 struct tex_hook texhook = {
2027 .save_tex = bstrdup0(copy, hook.save_tex),
2028 .components = hook.components,
2029 .align_offset = hook.align_offset,
2030 .hook = user_hook,
2031 .cond = user_hook_cond,
2032 .priv = copy,
2033 };
2034
2035 for (int h = 0; h < SHADER_MAX_HOOKS; h++)
2036 texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]);
2037 for (int h = 0; h < SHADER_MAX_BINDS; h++)
2038 texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]);
2039
2040 MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook);
2041 return true;
2042 }
2043
add_user_tex(void * priv,struct gl_user_shader_tex tex)2044 static bool add_user_tex(void *priv, struct gl_user_shader_tex tex)
2045 {
2046 struct gl_video *p = priv;
2047
2048 tex.tex = ra_tex_create(p->ra, &tex.params);
2049 TA_FREEP(&tex.params.initial_data);
2050
2051 if (!tex.tex)
2052 return false;
2053
2054 MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex);
2055 return true;
2056 }
2057
load_user_shaders(struct gl_video * p,char ** shaders)2058 static void load_user_shaders(struct gl_video *p, char **shaders)
2059 {
2060 if (!shaders)
2061 return;
2062
2063 for (int n = 0; shaders[n] != NULL; n++) {
2064 struct bstr file = load_cached_file(p, shaders[n]);
2065 parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex);
2066 }
2067 }
2068
gl_video_setup_hooks(struct gl_video * p)2069 static void gl_video_setup_hooks(struct gl_video *p)
2070 {
2071 gl_video_reset_hooks(p);
2072
2073 if (p->opts.deband) {
2074 MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
2075 .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"},
2076 .bind_tex = {"HOOKED"},
2077 .hook = deband_hook,
2078 });
2079 }
2080
2081 if (p->opts.unsharp != 0.0) {
2082 MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
2083 .hook_tex = {"MAIN"},
2084 .bind_tex = {"HOOKED"},
2085 .hook = unsharp_hook,
2086 });
2087 }
2088
2089 load_user_shaders(p, p->opts.user_shaders);
2090 }
2091
2092 // sample from video textures, set "color" variable to yuv value
pass_read_video(struct gl_video * p)2093 static void pass_read_video(struct gl_video *p)
2094 {
2095 struct image img[4];
2096 struct gl_transform offsets[4];
2097 pass_get_images(p, &p->image, img, offsets);
2098
2099 // To keep the code as simple as possibly, we currently run all shader
2100 // stages even if they would be unnecessary (e.g. no hooks for a texture).
2101 // In the future, deferred image should optimize this away.
2102
2103 // Merge semantically identical textures. This loop is done from back
2104 // to front so that merged textures end up in the right order while
2105 // simultaneously allowing us to skip unnecessary merges
2106 for (int n = 3; n >= 0; n--) {
2107 if (img[n].type == PLANE_NONE)
2108 continue;
2109
2110 int first = n;
2111 int num = 0;
2112
2113 for (int i = 0; i < n; i++) {
2114 if (image_equiv(img[n], img[i]) &&
2115 gl_transform_eq(offsets[n], offsets[i]))
2116 {
2117 GLSLF("// merging plane %d ...\n", i);
2118 copy_image(p, &num, img[i]);
2119 first = MPMIN(first, i);
2120 img[i] = (struct image){0};
2121 }
2122 }
2123
2124 if (num > 0) {
2125 GLSLF("// merging plane %d ... into %d\n", n, first);
2126 copy_image(p, &num, img[n]);
2127 pass_describe(p, "merging planes");
2128 finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h);
2129 img[first] = image_wrap(p->merge_tex[n], img[n].type, num);
2130 img[n] = (struct image){0};
2131 }
2132 }
2133
2134 // If any textures are still in integer format by this point, we need
2135 // to introduce an explicit conversion pass to avoid breaking hooks/scaling
2136 for (int n = 0; n < 4; n++) {
2137 if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) {
2138 GLSLF("// use_integer fix for plane %d\n", n);
2139 copy_image(p, &(int){0}, img[n]);
2140 pass_describe(p, "use_integer fix");
2141 finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h);
2142 img[n] = image_wrap(p->integer_tex[n], img[n].type,
2143 img[n].components);
2144 }
2145 }
2146
2147 // The basic idea is we assume the rgb/luma texture is the "reference" and
2148 // scale everything else to match, after all planes are finalized.
2149 // We find the reference texture first, in order to maintain texture offset
2150 // between hooks on different type of planes.
2151 int reference_tex_num = 0;
2152 for (int n = 0; n < 4; n++) {
2153 switch (img[n].type) {
2154 case PLANE_RGB:
2155 case PLANE_XYZ:
2156 case PLANE_LUMA: break;
2157 default: continue;
2158 }
2159
2160 reference_tex_num = n;
2161 break;
2162 }
2163
2164 // Dispatch the hooks for all of these textures, saving and perhaps
2165 // modifying them in the process
2166 for (int n = 0; n < 4; n++) {
2167 const char *name;
2168 switch (img[n].type) {
2169 case PLANE_RGB: name = "RGB"; break;
2170 case PLANE_LUMA: name = "LUMA"; break;
2171 case PLANE_CHROMA: name = "CHROMA"; break;
2172 case PLANE_ALPHA: name = "ALPHA"; break;
2173 case PLANE_XYZ: name = "XYZ"; break;
2174 default: continue;
2175 }
2176
2177 img[n] = pass_hook(p, name, img[n], &offsets[n]);
2178
2179 if (reference_tex_num == n) {
2180 // The reference texture is finalized now.
2181 p->texture_w = img[n].w;
2182 p->texture_h = img[n].h;
2183 p->texture_offset = offsets[n];
2184 }
2185 }
2186
2187 // At this point all planes are finalized but they may not be at the
2188 // required size yet. Furthermore, they may have texture offsets that
2189 // require realignment.
2190
2191 // Compute the reference rect
2192 struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h};
2193 struct mp_rect_f ref = src;
2194 gl_transform_rect(p->texture_offset, &ref);
2195
2196 // Explicitly scale all of the textures that don't match
2197 for (int n = 0; n < 4; n++) {
2198 if (img[n].type == PLANE_NONE)
2199 continue;
2200
2201 // If the planes are aligned identically, we will end up with the
2202 // exact same source rectangle.
2203 struct mp_rect_f rect = src;
2204 gl_transform_rect(offsets[n], &rect);
2205 if (mp_rect_f_seq(ref, rect))
2206 continue;
2207
2208 // If the rectangles differ, then our planes have a different
2209 // alignment and/or size. First of all, we have to compute the
2210 // corrections required to meet the target rectangle
2211 struct gl_transform fix = {
2212 .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0},
2213 {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}},
2214 .t = {ref.x0, ref.y0},
2215 };
2216
2217 // Since the scale in texture space is different from the scale in
2218 // absolute terms, we have to scale the coefficients down to be
2219 // relative to the texture's physical dimensions and local offset
2220 struct gl_transform scale = {
2221 .m = {{(float)img[n].w / p->texture_w, 0.0},
2222 {0.0, (float)img[n].h / p->texture_h}},
2223 .t = {-rect.x0, -rect.y0},
2224 };
2225 if (p->image_params.rotate % 180 == 90)
2226 MPSWAP(double, scale.m[0][0], scale.m[1][1]);
2227
2228 gl_transform_trans(scale, &fix);
2229
2230 // Since the texture transform is a function of the texture coordinates
2231 // to texture space, rather than the other way around, we have to
2232 // actually apply the *inverse* of this. Fortunately, calculating
2233 // the inverse is relatively easy here.
2234 fix.m[0][0] = 1.0 / fix.m[0][0];
2235 fix.m[1][1] = 1.0 / fix.m[1][1];
2236 fix.t[0] = fix.m[0][0] * -fix.t[0];
2237 fix.t[1] = fix.m[1][1] * -fix.t[1];
2238 gl_transform_trans(fix, &img[n].transform);
2239
2240 int scaler_id = -1;
2241 const char *name = NULL;
2242 switch (img[n].type) {
2243 case PLANE_RGB:
2244 case PLANE_LUMA:
2245 case PLANE_XYZ:
2246 scaler_id = SCALER_SCALE;
2247 // these aren't worth hooking, fringe hypothetical cases only
2248 break;
2249 case PLANE_CHROMA:
2250 scaler_id = SCALER_CSCALE;
2251 name = "CHROMA_SCALED";
2252 break;
2253 case PLANE_ALPHA:
2254 // alpha always uses bilinear
2255 name = "ALPHA_SCALED";
2256 }
2257
2258 if (scaler_id < 0)
2259 continue;
2260
2261 const struct scaler_config *conf = &p->opts.scaler[scaler_id];
2262 struct scaler *scaler = &p->scaler[scaler_id];
2263
2264 // bilinear scaling is a free no-op thanks to GPU sampling
2265 if (strcmp(conf->kernel.name, "bilinear") != 0) {
2266 GLSLF("// upscaling plane %d\n", n);
2267 pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h);
2268 finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h);
2269 img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components);
2270 }
2271
2272 // Run any post-scaling hooks
2273 img[n] = pass_hook(p, name, img[n], NULL);
2274 }
2275
2276 // All planes are of the same size and properly aligned at this point
2277 pass_describe(p, "combining planes");
2278 int coord = 0;
2279 for (int i = 0; i < 4; i++) {
2280 if (img[i].type != PLANE_NONE)
2281 copy_image(p, &coord, img[i]);
2282 }
2283 p->components = coord;
2284 }
2285
2286 // Utility function that simply binds a texture and reads from it, without any
2287 // transformations.
pass_read_tex(struct gl_video * p,struct ra_tex * tex)2288 static void pass_read_tex(struct gl_video *p, struct ra_tex *tex)
2289 {
2290 struct image img = image_wrap(tex, PLANE_RGB, p->components);
2291 copy_image(p, &(int){0}, img);
2292 }
2293
2294 // yuv conversion, and any other conversions before main up/down-scaling
pass_convert_yuv(struct gl_video * p)2295 static void pass_convert_yuv(struct gl_video *p)
2296 {
2297 struct gl_shader_cache *sc = p->sc;
2298
2299 struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
2300 cparams.gray = p->is_gray;
2301 cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT;
2302 mp_csp_set_image_params(&cparams, &p->image_params);
2303 mp_csp_equalizer_state_get(p->video_eq, &cparams);
2304 p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
2305
2306 pass_describe(p, "color conversion");
2307
2308 if (p->color_swizzle[0])
2309 GLSLF("color = color.%s;\n", p->color_swizzle);
2310
2311 // Pre-colormatrix input gamma correction
2312 if (cparams.color.space == MP_CSP_XYZ)
2313 GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light
2314
2315 // We always explicitly normalize the range in pass_read_video
2316 cparams.input_bits = cparams.texture_bits = 0;
2317
2318 // Conversion to RGB. For RGB itself, this still applies e.g. brightness
2319 // and contrast controls, or expansion of e.g. LSB-packed 10 bit data.
2320 struct mp_cmat m = {{{0}}};
2321 mp_get_csp_matrix(&cparams, &m);
2322 gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]);
2323 gl_sc_uniform_vec3(sc, "colormatrix_c", m.c);
2324
2325 GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;)
2326
2327 if (p->image_params.color.space == MP_CSP_BT_2020_C) {
2328 // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
2329 // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0
2330 // = (B'-Y'c) / 1.5816 | C'bc > 0
2331 //
2332 // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0
2333 // = (R'-Y'c) / 0.9936 | C'rc > 0
2334 //
2335 // as per the BT.2020 specification, table 4. This is a non-linear
2336 // transformation because (constant) luminance receives non-equal
2337 // contributions from the three different channels.
2338 GLSLF("// constant luminance conversion \n"
2339 "color.br = color.br * mix(vec2(1.5816, 0.9936), \n"
2340 " vec2(1.9404, 1.7184), \n"
2341 " %s(lessThanEqual(color.br, vec2(0))))\n"
2342 " + color.gg; \n",
2343 gl_sc_bvec(p->sc, 2));
2344 // Expand channels to camera-linear light. This shader currently just
2345 // assumes everything uses the BT.2020 12-bit gamma function, since the
2346 // difference between 10 and 12-bit is negligible for anything other
2347 // than 12-bit content.
2348 GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n"
2349 " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
2350 " vec3(1.0/0.45)), \n"
2351 " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n",
2352 gl_sc_bvec(p->sc, 3));
2353 // Calculate the green channel from the expanded RYcB
2354 // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
2355 GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;)
2356 // Recompress to receive the R'G'B' result, same as other systems
2357 GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n"
2358 " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
2359 " %s(lessThanEqual(vec3(0.0181), color.rgb))); \n",
2360 gl_sc_bvec(p->sc, 3));
2361 }
2362
2363 p->components = 3;
2364 if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) {
2365 GLSL(color.a = 1.0;)
2366 } else if (p->image_params.alpha == MP_ALPHA_PREMUL) {
2367 p->components = 4;
2368 } else {
2369 p->components = 4;
2370 GLSL(color = vec4(color.rgb * color.a, color.a);) // straight -> premul
2371 }
2372 }
2373
get_scale_factors(struct gl_video * p,bool transpose_rot,double xy[2])2374 static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2])
2375 {
2376 double target_w = p->src_rect.x1 - p->src_rect.x0;
2377 double target_h = p->src_rect.y1 - p->src_rect.y0;
2378 if (transpose_rot && p->image_params.rotate % 180 == 90)
2379 MPSWAP(double, target_w, target_h);
2380 xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w;
2381 xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h;
2382 }
2383
2384 // Cropping.
compute_src_transform(struct gl_video * p,struct gl_transform * tr)2385 static void compute_src_transform(struct gl_video *p, struct gl_transform *tr)
2386 {
2387 float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w,
2388 sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h,
2389 ox = p->src_rect.x0,
2390 oy = p->src_rect.y0;
2391 struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}};
2392
2393 gl_transform_trans(p->texture_offset, &transform);
2394
2395 *tr = transform;
2396 }
2397
2398 // Takes care of the main scaling and pre/post-conversions
pass_scale_main(struct gl_video * p)2399 static void pass_scale_main(struct gl_video *p)
2400 {
2401 // Figure out the main scaler.
2402 double xy[2];
2403 get_scale_factors(p, true, xy);
2404
2405 // actual scale factor should be divided by the scale factor of prescaling.
2406 xy[0] /= p->texture_offset.m[0][0];
2407 xy[1] /= p->texture_offset.m[1][1];
2408
2409 // The calculation of scale factor involves 32-bit float(from gl_transform),
2410 // use non-strict equality test to tolerate precision loss.
2411 bool downscaling = xy[0] < 1.0 - FLT_EPSILON || xy[1] < 1.0 - FLT_EPSILON;
2412 bool upscaling = !downscaling && (xy[0] > 1.0 + FLT_EPSILON ||
2413 xy[1] > 1.0 + FLT_EPSILON);
2414 double scale_factor = 1.0;
2415
2416 struct scaler *scaler = &p->scaler[SCALER_SCALE];
2417 struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE];
2418 if (p->opts.scaler_resizes_only && !downscaling && !upscaling) {
2419 scaler_conf.kernel.name = "bilinear";
2420 // For scaler-resizes-only, we round the texture offset to
2421 // the nearest round value in order to prevent ugly blurriness
2422 // (in exchange for slightly shifting the image by up to half a
2423 // subpixel)
2424 p->texture_offset.t[0] = roundf(p->texture_offset.t[0]);
2425 p->texture_offset.t[1] = roundf(p->texture_offset.t[1]);
2426 }
2427 if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) {
2428 scaler_conf = p->opts.scaler[SCALER_DSCALE];
2429 scaler = &p->scaler[SCALER_DSCALE];
2430 }
2431
2432 // When requesting correct-downscaling and the clip is anamorphic, and
2433 // because only a single scale factor is used for both axes, enable it only
2434 // when both axes are downscaled, and use the milder of the factors to not
2435 // end up with too much blur on one axis (even if we end up with sub-optimal
2436 // scale factor on the other axis). This is better than not respecting
2437 // correct scaling at all for anamorphic clips.
2438 double f = MPMAX(xy[0], xy[1]);
2439 if (p->opts.correct_downscaling && f < 1.0)
2440 scale_factor = 1.0 / f;
2441
2442 // Pre-conversion, like linear light/sigmoidization
2443 GLSLF("// scaler pre-conversion\n");
2444 bool use_linear = false;
2445 if (downscaling) {
2446 use_linear = p->opts.linear_downscaling;
2447
2448 // Linear light downscaling results in nasty artifacts for HDR curves
2449 // due to the potentially extreme brightness differences severely
2450 // compounding any ringing. So just scale in gamma light instead.
2451 if (mp_trc_is_hdr(p->image_params.color.gamma))
2452 use_linear = false;
2453 } else if (upscaling) {
2454 use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling;
2455 }
2456
2457 if (use_linear) {
2458 p->use_linear = true;
2459 pass_linearize(p->sc, p->image_params.color.gamma);
2460 pass_opt_hook_point(p, "LINEAR", NULL);
2461 }
2462
2463 bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling;
2464 float sig_center, sig_slope, sig_offset, sig_scale;
2465 if (use_sigmoid) {
2466 // Coefficients for the sigmoidal transform are taken from the
2467 // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal
2468 sig_center = p->opts.sigmoid_center;
2469 sig_slope = p->opts.sigmoid_slope;
2470 // This function needs to go through (0,0) and (1,1) so we compute the
2471 // values at 1 and 0, and then scale/shift them, respectively.
2472 sig_offset = 1.0/(1+expf(sig_slope * sig_center));
2473 sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset;
2474 GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
2475 GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n",
2476 sig_center, sig_scale, sig_offset, sig_slope);
2477 pass_opt_hook_point(p, "SIGMOID", NULL);
2478 }
2479
2480 pass_opt_hook_point(p, "PREKERNEL", NULL);
2481
2482 int vp_w = p->dst_rect.x1 - p->dst_rect.x0;
2483 int vp_h = p->dst_rect.y1 - p->dst_rect.y0;
2484 struct gl_transform transform;
2485 compute_src_transform(p, &transform);
2486
2487 GLSLF("// main scaling\n");
2488 finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h);
2489 struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components);
2490 gl_transform_trans(transform, &src.transform);
2491 pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h);
2492
2493 // Changes the texture size to display size after main scaler.
2494 p->texture_w = vp_w;
2495 p->texture_h = vp_h;
2496
2497 pass_opt_hook_point(p, "POSTKERNEL", NULL);
2498
2499 GLSLF("// scaler post-conversion\n");
2500 if (use_sigmoid) {
2501 // Inverse of the transformation above
2502 GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
2503 GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n",
2504 sig_slope, sig_center, sig_offset, sig_scale);
2505 }
2506 }
2507
2508 // Adapts the colors to the right output color space. (Final pass during
2509 // rendering)
2510 // If OSD is true, ignore any changes that may have been made to the video
2511 // by previous passes (i.e. linear scaling)
pass_colormanage(struct gl_video * p,struct mp_colorspace src,struct mp_colorspace fbo_csp,bool osd)2512 static void pass_colormanage(struct gl_video *p, struct mp_colorspace src,
2513 struct mp_colorspace fbo_csp, bool osd)
2514 {
2515 struct ra *ra = p->ra;
2516
2517 // Configure the destination according to the FBO color space,
2518 // unless specific transfer function, primaries or target peak
2519 // is set. If values are set to _AUTO, the most likely intended
2520 // values are guesstimated later in this function.
2521 struct mp_colorspace dst = {
2522 .gamma = p->opts.target_trc == MP_CSP_TRC_AUTO ?
2523 fbo_csp.gamma : p->opts.target_trc,
2524 .primaries = p->opts.target_prim == MP_CSP_PRIM_AUTO ?
2525 fbo_csp.primaries : p->opts.target_prim,
2526 .light = MP_CSP_LIGHT_DISPLAY,
2527 .sig_peak = !p->opts.target_peak ?
2528 fbo_csp.sig_peak : p->opts.target_peak / MP_REF_WHITE,
2529 };
2530
2531 if (!p->colorspace_override_warned &&
2532 ((fbo_csp.gamma && dst.gamma != fbo_csp.gamma) ||
2533 (fbo_csp.primaries && dst.primaries != fbo_csp.primaries)))
2534 {
2535 MP_WARN(p, "One or more colorspace value is being overridden "
2536 "by user while the FBO provides colorspace information: "
2537 "transfer function: (dst: %s, fbo: %s), "
2538 "primaries: (dst: %s, fbo: %s). "
2539 "Rendering can lead to incorrect results!\n",
2540 m_opt_choice_str(mp_csp_trc_names, dst.gamma),
2541 m_opt_choice_str(mp_csp_trc_names, fbo_csp.gamma),
2542 m_opt_choice_str(mp_csp_prim_names, dst.primaries),
2543 m_opt_choice_str(mp_csp_prim_names, fbo_csp.primaries));
2544 p->colorspace_override_warned = true;
2545 }
2546
2547 if (dst.gamma == MP_CSP_TRC_HLG)
2548 dst.light = MP_CSP_LIGHT_SCENE_HLG;
2549
2550 if (p->use_lut_3d) {
2551 // The 3DLUT is always generated against the video's original source
2552 // space, *not* the reference space. (To avoid having to regenerate
2553 // the 3DLUT for the OSD on every frame)
2554 enum mp_csp_prim prim_orig = p->image_params.color.primaries;
2555 enum mp_csp_trc trc_orig = p->image_params.color.gamma;
2556
2557 // One exception: HDR is not implemented by LittleCMS for technical
2558 // limitation reasons, so we use a gamma 2.2 input curve here instead.
2559 // We could pick any value we want here, the difference is just coding
2560 // efficiency.
2561 if (mp_trc_is_hdr(trc_orig))
2562 trc_orig = MP_CSP_TRC_GAMMA22;
2563
2564 if (gl_video_get_lut3d(p, prim_orig, trc_orig)) {
2565 dst.primaries = prim_orig;
2566 dst.gamma = trc_orig;
2567 assert(dst.primaries && dst.gamma);
2568 }
2569 }
2570
2571 if (dst.primaries == MP_CSP_PRIM_AUTO) {
2572 // The vast majority of people are on sRGB or BT.709 displays, so pick
2573 // this as the default output color space.
2574 dst.primaries = MP_CSP_PRIM_BT_709;
2575
2576 if (src.primaries == MP_CSP_PRIM_BT_601_525 ||
2577 src.primaries == MP_CSP_PRIM_BT_601_625)
2578 {
2579 // Since we auto-pick BT.601 and BT.709 based on the dimensions,
2580 // combined with the fact that they're very similar to begin with,
2581 // and to avoid confusing the average user, just don't adapt BT.601
2582 // content automatically at all.
2583 dst.primaries = src.primaries;
2584 }
2585 }
2586
2587 if (dst.gamma == MP_CSP_TRC_AUTO) {
2588 // Most people seem to complain when the image is darker or brighter
2589 // than what they're "used to", so just avoid changing the gamma
2590 // altogether by default. The only exceptions to this rule apply to
2591 // very unusual TRCs, which even hardcode technoluddites would probably
2592 // not enjoy viewing unaltered.
2593 dst.gamma = src.gamma;
2594
2595 // Avoid outputting linear light or HDR content "by default". For these
2596 // just pick gamma 2.2 as a default, since it's a good estimate for
2597 // the response of typical displays
2598 if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma))
2599 dst.gamma = MP_CSP_TRC_GAMMA22;
2600 }
2601
2602 // If there's no specific signal peak known for the output display, infer
2603 // it from the chosen transfer function. Also normalize the src peak, in
2604 // case it was unknown
2605 if (!dst.sig_peak)
2606 dst.sig_peak = mp_trc_nom_peak(dst.gamma);
2607 if (!src.sig_peak)
2608 src.sig_peak = mp_trc_nom_peak(src.gamma);
2609
2610 struct gl_tone_map_opts tone_map = p->opts.tone_map;
2611 bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
2612 && src.sig_peak > dst.sig_peak;
2613
2614 if (detect_peak && !p->hdr_peak_ssbo) {
2615 struct {
2616 float average[2];
2617 int32_t frame_sum;
2618 uint32_t frame_max;
2619 uint32_t counter;
2620 } peak_ssbo = {0};
2621
2622 struct ra_buf_params params = {
2623 .type = RA_BUF_TYPE_SHADER_STORAGE,
2624 .size = sizeof(peak_ssbo),
2625 .initial_data = &peak_ssbo,
2626 };
2627
2628 p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms);
2629 if (!p->hdr_peak_ssbo) {
2630 MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
2631 tone_map.compute_peak = p->opts.tone_map.compute_peak = -1;
2632 detect_peak = false;
2633 }
2634 }
2635
2636 if (detect_peak) {
2637 pass_describe(p, "detect HDR peak");
2638 pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
2639 gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
2640 "vec2 average;"
2641 "int frame_sum;"
2642 "uint frame_max;"
2643 "uint counter;"
2644 );
2645 } else {
2646 tone_map.compute_peak = -1;
2647 }
2648
2649 // Adapt from src to dst as necessary
2650 pass_color_map(p->sc, p->use_linear && !osd, src, dst, &tone_map);
2651
2652 if (p->use_lut_3d) {
2653 gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture);
2654 GLSL(vec3 cpos;)
2655 for (int i = 0; i < 3; i++)
2656 GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]);
2657 GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;)
2658 }
2659 }
2660
gl_video_set_fb_depth(struct gl_video * p,int fb_depth)2661 void gl_video_set_fb_depth(struct gl_video *p, int fb_depth)
2662 {
2663 p->fb_depth = fb_depth;
2664 }
2665
pass_dither(struct gl_video * p)2666 static void pass_dither(struct gl_video *p)
2667 {
2668 // Assume 8 bits per component if unknown.
2669 int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8;
2670 if (p->opts.dither_depth > 0)
2671 dst_depth = p->opts.dither_depth;
2672
2673 if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE)
2674 return;
2675
2676 if (p->opts.dither_algo == DITHER_ERROR_DIFFUSION) {
2677 const struct error_diffusion_kernel *kernel =
2678 mp_find_error_diffusion_kernel(p->opts.error_diffusion);
2679 int o_w = p->dst_rect.x1 - p->dst_rect.x0,
2680 o_h = p->dst_rect.y1 - p->dst_rect.y0;
2681
2682 int shmem_req = mp_ef_compute_shared_memory_size(kernel, o_h);
2683 if (shmem_req > p->ra->max_shmem) {
2684 MP_WARN(p, "Fallback to dither=fruit because there is no enough "
2685 "shared memory (%d/%d).\n",
2686 shmem_req, (int)p->ra->max_shmem);
2687 p->opts.dither_algo = DITHER_FRUIT;
2688 } else {
2689 finish_pass_tex(p, &p->error_diffusion_tex[0], o_w, o_h);
2690
2691 struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components);
2692
2693 // 1024 is minimal required number of invocation allowed in single
2694 // work group in OpenGL. Use it for maximal performance.
2695 int block_size = MPMIN(1024, o_h);
2696
2697 pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)",
2698 kernel->name, dst_depth);
2699
2700 p->pass_compute = (struct compute_info) {
2701 .active = true,
2702 .threads_w = block_size,
2703 .threads_h = 1,
2704 .directly_writes = true
2705 };
2706
2707 int tex_id = pass_bind(p, img);
2708
2709 pass_error_diffusion(p->sc, kernel, tex_id, o_w, o_h,
2710 dst_depth, block_size);
2711
2712 finish_pass_tex(p, &p->error_diffusion_tex[1], o_w, o_h);
2713
2714 img = image_wrap(p->error_diffusion_tex[1], PLANE_RGB, p->components);
2715 copy_image(p, &(int){0}, img);
2716
2717 return;
2718 }
2719 }
2720
2721 if (!p->dither_texture) {
2722 MP_VERBOSE(p, "Dither to %d.\n", dst_depth);
2723
2724 int tex_size = 0;
2725 void *tex_data = NULL;
2726 const struct ra_format *fmt = NULL;
2727 void *temp = NULL;
2728
2729 if (p->opts.dither_algo == DITHER_FRUIT) {
2730 int sizeb = p->opts.dither_size;
2731 int size = 1 << sizeb;
2732
2733 if (p->last_dither_matrix_size != size) {
2734 p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
2735 float, size * size);
2736 mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
2737 p->last_dither_matrix_size = size;
2738 }
2739
2740 // Prefer R16 texture since they provide higher precision.
2741 fmt = ra_find_unorm_format(p->ra, 2, 1);
2742 if (!fmt)
2743 fmt = ra_find_float16_format(p->ra, 1);
2744 if (fmt) {
2745 tex_size = size;
2746 tex_data = p->last_dither_matrix;
2747 if (fmt->ctype == RA_CTYPE_UNORM) {
2748 uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size);
2749 for (int n = 0; n < size * size; n++)
2750 t[n] = p->last_dither_matrix[n] * UINT16_MAX;
2751 tex_data = t;
2752 }
2753 } else {
2754 MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n");
2755 p->opts.dither_algo = DITHER_ORDERED;
2756 }
2757 }
2758
2759 if (p->opts.dither_algo == DITHER_ORDERED) {
2760 temp = talloc_array(NULL, char, 8 * 8);
2761 mp_make_ordered_dither_matrix(temp, 8);
2762
2763 fmt = ra_find_unorm_format(p->ra, 1, 1);
2764 tex_size = 8;
2765 tex_data = temp;
2766 }
2767
2768 struct ra_tex_params params = {
2769 .dimensions = 2,
2770 .w = tex_size,
2771 .h = tex_size,
2772 .d = 1,
2773 .format = fmt,
2774 .render_src = true,
2775 .src_repeat = true,
2776 .initial_data = tex_data,
2777 };
2778 p->dither_texture = ra_tex_create(p->ra, ¶ms);
2779
2780 debug_check_gl(p, "dither setup");
2781
2782 talloc_free(temp);
2783
2784 if (!p->dither_texture)
2785 return;
2786 }
2787
2788 GLSLF("// dithering\n");
2789
2790 // This defines how many bits are considered significant for output on
2791 // screen. The superfluous bits will be used for rounding according to the
2792 // dither matrix. The precision of the source implicitly decides how many
2793 // dither patterns can be visible.
2794 int dither_quantization = (1 << dst_depth) - 1;
2795 int dither_size = p->dither_texture->params.w;
2796
2797 gl_sc_uniform_texture(p->sc, "dither", p->dither_texture);
2798
2799 GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size);
2800
2801 if (p->opts.temporal_dither) {
2802 int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u;
2803 float r = phase * (M_PI / 2); // rotate
2804 float m = phase < 4 ? 1 : -1; // mirror
2805
2806 float matrix[2][2] = {{cos(r), -sin(r) },
2807 {sin(r) * m, cos(r) * m}};
2808 gl_sc_uniform_dynamic(p->sc);
2809 gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]);
2810
2811 GLSL(dither_pos = dither_trafo * dither_pos;)
2812 }
2813
2814 GLSL(float dither_value = texture(dither, dither_pos).r;)
2815 GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n",
2816 dither_quantization, dither_size * dither_size, dither_quantization);
2817 }
2818
2819 // Draws the OSD, in scene-referred colors.. If cms is true, subtitles are
2820 // instead adapted to the display's gamut.
pass_draw_osd(struct gl_video * p,int osd_flags,int frame_flags,double pts,struct mp_osd_res rect,struct ra_fbo fbo,bool cms)2821 static void pass_draw_osd(struct gl_video *p, int osd_flags, int frame_flags,
2822 double pts, struct mp_osd_res rect, struct ra_fbo fbo,
2823 bool cms)
2824 {
2825 if (frame_flags & RENDER_FRAME_VF_SUBS)
2826 osd_flags |= OSD_DRAW_SUB_FILTER;
2827
2828 if ((osd_flags & OSD_DRAW_SUB_ONLY) && (osd_flags & OSD_DRAW_OSD_ONLY))
2829 return;
2830
2831 mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo3d, osd_flags);
2832
2833 timer_pool_start(p->osd_timer);
2834 for (int n = 0; n < MAX_OSD_PARTS; n++) {
2835 // (This returns false if this part is empty with nothing to draw.)
2836 if (!mpgl_osd_draw_prepare(p->osd, n, p->sc))
2837 continue;
2838 // When subtitles need to be color managed, assume they're in sRGB
2839 // (for lack of anything saner to do)
2840 if (cms) {
2841 static const struct mp_colorspace csp_srgb = {
2842 .primaries = MP_CSP_PRIM_BT_709,
2843 .gamma = MP_CSP_TRC_SRGB,
2844 .light = MP_CSP_LIGHT_DISPLAY,
2845 };
2846
2847 pass_colormanage(p, csp_srgb, fbo.color_space, true);
2848 }
2849 mpgl_osd_draw_finish(p->osd, n, p->sc, fbo);
2850 }
2851
2852 timer_pool_stop(p->osd_timer);
2853 pass_describe(p, "drawing osd");
2854 pass_record(p, timer_pool_measure(p->osd_timer));
2855 }
2856
chroma_realign(int size,int pixel)2857 static float chroma_realign(int size, int pixel)
2858 {
2859 return size / (float)chroma_upsize(size, pixel);
2860 }
2861
2862 // Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs.
pass_render_frame_dumb(struct gl_video * p)2863 static void pass_render_frame_dumb(struct gl_video *p)
2864 {
2865 struct image img[4];
2866 struct gl_transform off[4];
2867 pass_get_images(p, &p->image, img, off);
2868
2869 struct gl_transform transform;
2870 compute_src_transform(p, &transform);
2871
2872 int index = 0;
2873 for (int i = 0; i < p->plane_count; i++) {
2874 int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1;
2875 int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1;
2876 if (p->image_params.rotate % 180 == 90)
2877 MPSWAP(int, cw, ch);
2878
2879 struct gl_transform t = transform;
2880 t.m[0][0] *= chroma_realign(p->texture_w, cw);
2881 t.m[1][1] *= chroma_realign(p->texture_h, ch);
2882
2883 t.t[0] /= cw;
2884 t.t[1] /= ch;
2885
2886 t.t[0] += off[i].t[0];
2887 t.t[1] += off[i].t[1];
2888
2889 gl_transform_trans(img[i].transform, &t);
2890 img[i].transform = t;
2891
2892 copy_image(p, &index, img[i]);
2893 }
2894
2895 pass_convert_yuv(p);
2896 }
2897
2898 // The main rendering function, takes care of everything up to and including
2899 // upscaling. p->image is rendered.
2900 // flags: bit set of RENDER_FRAME_* flags
pass_render_frame(struct gl_video * p,struct mp_image * mpi,uint64_t id,int flags)2901 static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi,
2902 uint64_t id, int flags)
2903 {
2904 // initialize the texture parameters and temporary variables
2905 p->texture_w = p->image_params.w;
2906 p->texture_h = p->image_params.h;
2907 p->texture_offset = identity_trans;
2908 p->components = 0;
2909 p->num_saved_imgs = 0;
2910 p->idx_hook_textures = 0;
2911 p->use_linear = false;
2912
2913 // try uploading the frame
2914 if (!pass_upload_image(p, mpi, id))
2915 return false;
2916
2917 if (p->image_params.rotate % 180 == 90)
2918 MPSWAP(int, p->texture_w, p->texture_h);
2919
2920 if (p->dumb_mode)
2921 return true;
2922
2923 pass_read_video(p);
2924 pass_opt_hook_point(p, "NATIVE", &p->texture_offset);
2925 pass_convert_yuv(p);
2926 pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset);
2927
2928 // For subtitles
2929 double vpts = p->image.mpi->pts;
2930 if (vpts == MP_NOPTS_VALUE)
2931 vpts = p->osd_pts;
2932
2933 if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO &&
2934 (flags & RENDER_FRAME_SUBS))
2935 {
2936 double scale[2];
2937 get_scale_factors(p, false, scale);
2938 struct mp_osd_res rect = {
2939 .w = p->texture_w, .h = p->texture_h,
2940 .display_par = scale[1] / scale[0], // counter compensate scaling
2941 };
2942 finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h);
2943 struct ra_fbo fbo = { p->blend_subs_tex };
2944 pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false);
2945 pass_read_tex(p, p->blend_subs_tex);
2946 pass_describe(p, "blend subs video");
2947 }
2948 pass_opt_hook_point(p, "MAIN", &p->texture_offset);
2949
2950 pass_scale_main(p);
2951
2952 int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
2953 vp_h = p->dst_rect.y1 - p->dst_rect.y0;
2954 if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES &&
2955 (flags & RENDER_FRAME_SUBS))
2956 {
2957 // Recreate the real video size from the src/dst rects
2958 struct mp_osd_res rect = {
2959 .w = vp_w, .h = vp_h,
2960 .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w,
2961 .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h,
2962 .display_par = 1.0,
2963 };
2964 // Adjust margins for scale
2965 double scale[2];
2966 get_scale_factors(p, true, scale);
2967 rect.ml *= scale[0]; rect.mr *= scale[0];
2968 rect.mt *= scale[1]; rect.mb *= scale[1];
2969 // We should always blend subtitles in non-linear light
2970 if (p->use_linear) {
2971 pass_delinearize(p->sc, p->image_params.color.gamma);
2972 p->use_linear = false;
2973 }
2974 finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h);
2975 struct ra_fbo fbo = { p->blend_subs_tex };
2976 pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false);
2977 pass_read_tex(p, p->blend_subs_tex);
2978 pass_describe(p, "blend subs");
2979 }
2980
2981 pass_opt_hook_point(p, "SCALED", NULL);
2982
2983 return true;
2984 }
2985
pass_draw_to_screen(struct gl_video * p,struct ra_fbo fbo)2986 static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo)
2987 {
2988 if (p->dumb_mode)
2989 pass_render_frame_dumb(p);
2990
2991 // Adjust the overall gamma before drawing to screen
2992 if (p->user_gamma != 1) {
2993 gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma);
2994 GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
2995 GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));)
2996 }
2997
2998 pass_colormanage(p, p->image_params.color, fbo.color_space, false);
2999
3000 // Since finish_pass_fbo doesn't work with compute shaders, and neither
3001 // does the checkerboard/dither code, we may need an indirection via
3002 // p->screen_tex here.
3003 if (p->pass_compute.active) {
3004 int o_w = p->dst_rect.x1 - p->dst_rect.x0,
3005 o_h = p->dst_rect.y1 - p->dst_rect.y0;
3006 finish_pass_tex(p, &p->screen_tex, o_w, o_h);
3007 struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components);
3008 copy_image(p, &(int){0}, tmp);
3009 }
3010
3011 if (p->has_alpha){
3012 if (p->opts.alpha_mode == ALPHA_BLEND_TILES) {
3013 // Draw checkerboard pattern to indicate transparency
3014 GLSLF("// transparency checkerboard\n");
3015 GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));)
3016 GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);)
3017 GLSL(color.rgb += background.rgb * (1.0 - color.a);)
3018 GLSL(color.a = 1.0;)
3019 } else if (p->opts.alpha_mode == ALPHA_BLEND) {
3020 // Blend into background color (usually black)
3021 struct m_color c = p->opts.background;
3022 GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n",
3023 c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0);
3024 GLSL(color.rgb += background.rgb * (1.0 - color.a);)
3025 GLSL(color.a = background.a;)
3026 }
3027 }
3028
3029 pass_opt_hook_point(p, "OUTPUT", NULL);
3030
3031 pass_dither(p);
3032 pass_describe(p, "output to screen");
3033 finish_pass_fbo(p, fbo, false, &p->dst_rect);
3034 }
3035
3036 // flags: bit set of RENDER_FRAME_* flags
update_surface(struct gl_video * p,struct mp_image * mpi,uint64_t id,struct surface * surf,int flags)3037 static bool update_surface(struct gl_video *p, struct mp_image *mpi,
3038 uint64_t id, struct surface *surf, int flags)
3039 {
3040 int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
3041 vp_h = p->dst_rect.y1 - p->dst_rect.y0;
3042
3043 pass_info_reset(p, false);
3044 if (!pass_render_frame(p, mpi, id, flags))
3045 return false;
3046
3047 // Frame blending should always be done in linear light to preserve the
3048 // overall brightness, otherwise this will result in flashing dark frames
3049 // because mixing in compressed light artificially darkens the results
3050 if (!p->use_linear) {
3051 p->use_linear = true;
3052 pass_linearize(p->sc, p->image_params.color.gamma);
3053 }
3054
3055 finish_pass_tex(p, &surf->tex, vp_w, vp_h);
3056 surf->id = id;
3057 surf->pts = mpi->pts;
3058 return true;
3059 }
3060
3061 // Draws an interpolate frame to fbo, based on the frame timing in t
3062 // flags: bit set of RENDER_FRAME_* flags
gl_video_interpolate_frame(struct gl_video * p,struct vo_frame * t,struct ra_fbo fbo,int flags)3063 static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
3064 struct ra_fbo fbo, int flags)
3065 {
3066 bool is_new = false;
3067
3068 // Reset the queue completely if this is a still image, to avoid any
3069 // interpolation artifacts from surrounding frames when unpausing or
3070 // framestepping
3071 if (t->still)
3072 gl_video_reset_surfaces(p);
3073
3074 // First of all, figure out if we have a frame available at all, and draw
3075 // it manually + reset the queue if not
3076 if (p->surfaces[p->surface_now].id == 0) {
3077 struct surface *now = &p->surfaces[p->surface_now];
3078 if (!update_surface(p, t->current, t->frame_id, now, flags))
3079 return;
3080 p->surface_idx = p->surface_now;
3081 is_new = true;
3082 }
3083
3084 // Find the right frame for this instant
3085 if (t->current) {
3086 int next = surface_wrap(p->surface_now + 1);
3087 while (p->surfaces[next].id &&
3088 p->surfaces[next].id > p->surfaces[p->surface_now].id &&
3089 p->surfaces[p->surface_now].id < t->frame_id)
3090 {
3091 p->surface_now = next;
3092 next = surface_wrap(next + 1);
3093 }
3094 }
3095
3096 // Figure out the queue size. For illustration, a filter radius of 2 would
3097 // look like this: _ A [B] C D _
3098 // A is surface_bse, B is surface_now, C is surface_now+1 and D is
3099 // surface_end.
3100 struct scaler *tscale = &p->scaler[SCALER_TSCALE];
3101 reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes);
3102 bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0;
3103 bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0;
3104 int size;
3105
3106 if (oversample || linear) {
3107 size = 2;
3108 } else {
3109 assert(tscale->kernel && !tscale->kernel->polar);
3110 size = ceil(tscale->kernel->size);
3111 }
3112
3113 int radius = size/2;
3114 int surface_now = p->surface_now;
3115 int surface_bse = surface_wrap(surface_now - (radius-1));
3116 int surface_end = surface_wrap(surface_now + radius);
3117 assert(surface_wrap(surface_bse + size-1) == surface_end);
3118
3119 // Render new frames while there's room in the queue. Note that technically,
3120 // this should be done before the step where we find the right frame, but
3121 // it only barely matters at the very beginning of playback, and this way
3122 // makes the code much more linear.
3123 int surface_dst = surface_wrap(p->surface_idx + 1);
3124 for (int i = 0; i < t->num_frames; i++) {
3125 // Avoid overwriting data we might still need
3126 if (surface_dst == surface_bse - 1)
3127 break;
3128
3129 struct mp_image *f = t->frames[i];
3130 uint64_t f_id = t->frame_id + i;
3131 if (!mp_image_params_equal(&f->params, &p->real_image_params))
3132 continue;
3133
3134 if (f_id > p->surfaces[p->surface_idx].id) {
3135 struct surface *dst = &p->surfaces[surface_dst];
3136 if (!update_surface(p, f, f_id, dst, flags))
3137 return;
3138 p->surface_idx = surface_dst;
3139 surface_dst = surface_wrap(surface_dst + 1);
3140 is_new = true;
3141 }
3142 }
3143
3144 // Figure out whether the queue is "valid". A queue is invalid if the
3145 // frames' PTS is not monotonically increasing. Anything else is invalid,
3146 // so avoid blending incorrect data and just draw the latest frame as-is.
3147 // Possible causes for failure of this condition include seeks, pausing,
3148 // end of playback or start of playback.
3149 bool valid = true;
3150 for (int i = surface_bse, ii; valid && i != surface_end; i = ii) {
3151 ii = surface_wrap(i + 1);
3152 if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) {
3153 valid = false;
3154 } else if (p->surfaces[ii].id < p->surfaces[i].id) {
3155 valid = false;
3156 MP_DBG(p, "interpolation queue underrun\n");
3157 }
3158 }
3159
3160 // Update OSD PTS to synchronize subtitles with the displayed frame
3161 p->osd_pts = p->surfaces[surface_now].pts;
3162
3163 // Finally, draw the right mix of frames to the screen.
3164 if (!is_new)
3165 pass_info_reset(p, true);
3166 pass_describe(p, "interpolation");
3167 if (!valid || t->still) {
3168 // surface_now is guaranteed to be valid, so we can safely use it.
3169 pass_read_tex(p, p->surfaces[surface_now].tex);
3170 p->is_interpolated = false;
3171 } else {
3172 double mix = t->vsync_offset / t->ideal_frame_duration;
3173 // The scaler code always wants the fcoord to be between 0 and 1,
3174 // so we try to adjust by using the previous set of N frames instead
3175 // (which requires some extra checking to make sure it's valid)
3176 if (mix < 0.0) {
3177 int prev = surface_wrap(surface_bse - 1);
3178 if (p->surfaces[prev].id != 0 &&
3179 p->surfaces[prev].id < p->surfaces[surface_bse].id)
3180 {
3181 mix += 1.0;
3182 surface_bse = prev;
3183 } else {
3184 mix = 0.0; // at least don't blow up, this should only
3185 // ever happen at the start of playback
3186 }
3187 }
3188
3189 if (oversample) {
3190 // Oversample uses the frame area as mix ratio, not the the vsync
3191 // position itself
3192 double vsync_dist = t->vsync_interval / t->ideal_frame_duration,
3193 threshold = tscale->conf.kernel.params[0];
3194 threshold = isnan(threshold) ? 0.0 : threshold;
3195 mix = (1 - mix) / vsync_dist;
3196 mix = mix <= 0 + threshold ? 0 : mix;
3197 mix = mix >= 1 - threshold ? 1 : mix;
3198 mix = 1 - mix;
3199 }
3200
3201 // Blend the frames together
3202 if (oversample || linear) {
3203 gl_sc_uniform_dynamic(p->sc);
3204 gl_sc_uniform_f(p->sc, "inter_coeff", mix);
3205 GLSL(color = mix(texture(texture0, texcoord0),
3206 texture(texture1, texcoord1),
3207 inter_coeff);)
3208 } else {
3209 gl_sc_uniform_dynamic(p->sc);
3210 gl_sc_uniform_f(p->sc, "fcoord", mix);
3211 pass_sample_separated_gen(p->sc, tscale, 0, 0);
3212 }
3213
3214 // Load all the required frames
3215 for (int i = 0; i < size; i++) {
3216 struct image img =
3217 image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex,
3218 PLANE_RGB, p->components);
3219 // Since the code in pass_sample_separated currently assumes
3220 // the textures are bound in-order and starting at 0, we just
3221 // assert to make sure this is the case (which it should always be)
3222 int id = pass_bind(p, img);
3223 assert(id == i);
3224 }
3225
3226 MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n",
3227 t->ideal_frame_duration, t->vsync_interval, mix);
3228 p->is_interpolated = true;
3229 }
3230 pass_draw_to_screen(p, fbo);
3231
3232 p->frames_drawn += 1;
3233 }
3234
gl_video_render_frame(struct gl_video * p,struct vo_frame * frame,struct ra_fbo fbo,int flags)3235 void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame,
3236 struct ra_fbo fbo, int flags)
3237 {
3238 gl_video_update_options(p);
3239
3240 struct mp_rect target_rc = {0, 0, fbo.tex->params.w, fbo.tex->params.h};
3241
3242 p->broken_frame = false;
3243
3244 bool has_frame = !!frame->current;
3245
3246 struct m_color c = p->clear_color;
3247 float clear_color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0};
3248 p->ra->fns->clear(p->ra, fbo.tex, clear_color, &target_rc);
3249
3250 if (p->hwdec_overlay) {
3251 if (has_frame) {
3252 float *color = p->hwdec_overlay->overlay_colorkey;
3253 p->ra->fns->clear(p->ra, fbo.tex, color, &p->dst_rect);
3254 }
3255
3256 p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current,
3257 &p->src_rect, &p->dst_rect,
3258 frame->frame_id != p->image.id);
3259
3260 if (frame->current)
3261 p->osd_pts = frame->current->pts;
3262
3263 // Disable GL rendering
3264 has_frame = false;
3265 }
3266
3267 if (has_frame) {
3268 bool interpolate = p->opts.interpolation && frame->display_synced &&
3269 (p->frames_drawn || !frame->still);
3270 if (interpolate) {
3271 double ratio = frame->ideal_frame_duration / frame->vsync_interval;
3272 if (fabs(ratio - 1.0) < p->opts.interpolation_threshold)
3273 interpolate = false;
3274 }
3275
3276 if (interpolate) {
3277 gl_video_interpolate_frame(p, frame, fbo, flags);
3278 } else {
3279 bool is_new = frame->frame_id != p->image.id;
3280
3281 // Redrawing a frame might update subtitles.
3282 if (frame->still && p->opts.blend_subs)
3283 is_new = true;
3284
3285 if (is_new || !p->output_tex_valid) {
3286 p->output_tex_valid = false;
3287
3288 pass_info_reset(p, !is_new);
3289 if (!pass_render_frame(p, frame->current, frame->frame_id, flags))
3290 goto done;
3291
3292 // For the non-interpolation case, we draw to a single "cache"
3293 // texture to speed up subsequent re-draws (if any exist)
3294 struct ra_fbo dest_fbo = fbo;
3295 bool repeats = frame->num_vsyncs > 1 && frame->display_synced;
3296 if ((repeats || frame->still) && !p->dumb_mode &&
3297 (p->ra->caps & RA_CAP_BLIT) && fbo.tex->params.blit_dst)
3298 {
3299 // Attempt to use the same format as the destination FBO
3300 // if possible. Some RAs use a wrapped dummy format here,
3301 // so fall back to the fbo_format in that case.
3302 const struct ra_format *fmt = fbo.tex->params.format;
3303 if (fmt->dummy_format)
3304 fmt = p->fbo_format;
3305
3306 bool r = ra_tex_resize(p->ra, p->log, &p->output_tex,
3307 fbo.tex->params.w, fbo.tex->params.h,
3308 fmt);
3309 if (r) {
3310 dest_fbo = (struct ra_fbo) { p->output_tex };
3311 p->output_tex_valid = true;
3312 }
3313 }
3314 pass_draw_to_screen(p, dest_fbo);
3315 }
3316
3317 // "output tex valid" and "output tex needed" are equivalent
3318 if (p->output_tex_valid && fbo.tex->params.blit_dst) {
3319 pass_info_reset(p, true);
3320 pass_describe(p, "redraw cached frame");
3321 struct mp_rect src = p->dst_rect;
3322 struct mp_rect dst = src;
3323 if (fbo.flip) {
3324 dst.y0 = fbo.tex->params.h - src.y0;
3325 dst.y1 = fbo.tex->params.h - src.y1;
3326 }
3327 timer_pool_start(p->blit_timer);
3328 p->ra->fns->blit(p->ra, fbo.tex, p->output_tex, &dst, &src);
3329 timer_pool_stop(p->blit_timer);
3330 pass_record(p, timer_pool_measure(p->blit_timer));
3331 }
3332 }
3333 }
3334
3335 done:
3336
3337 debug_check_gl(p, "after video rendering");
3338
3339 if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) {
3340 // If we haven't actually drawn anything so far, then we technically
3341 // need to consider this the start of a new pass. Let's call it a
3342 // redraw just because, since it's basically a blank frame anyway
3343 if (!has_frame)
3344 pass_info_reset(p, true);
3345
3346 int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0;
3347 if (!(flags & RENDER_FRAME_SUBS))
3348 osd_flags |= OSD_DRAW_OSD_ONLY;
3349 if (!(flags & RENDER_FRAME_OSD))
3350 osd_flags |= OSD_DRAW_SUB_ONLY;
3351
3352 pass_draw_osd(p, osd_flags, flags, p->osd_pts, p->osd_rect, fbo, true);
3353 debug_check_gl(p, "after OSD rendering");
3354 }
3355
3356 p->broken_frame |= gl_sc_error_state(p->sc);
3357 if (p->broken_frame) {
3358 // Make the screen solid blue to make it visually clear that an
3359 // error has occurred
3360 float color[4] = {0.0, 0.05, 0.5, 1.0};
3361 p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc);
3362 }
3363
3364 p->frames_rendered++;
3365 pass_report_performance(p);
3366 }
3367
gl_video_screenshot(struct gl_video * p,struct vo_frame * frame,struct voctrl_screenshot * args)3368 void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame,
3369 struct voctrl_screenshot *args)
3370 {
3371 if (!p->ra->fns->tex_download)
3372 return;
3373
3374 bool ok = false;
3375 struct mp_image *res = NULL;
3376 struct ra_tex *target = NULL;
3377 struct mp_rect old_src = p->src_rect;
3378 struct mp_rect old_dst = p->dst_rect;
3379 struct mp_osd_res old_osd = p->osd_rect;
3380 struct vo_frame *nframe = vo_frame_ref(frame);
3381
3382 // Disable interpolation and such.
3383 nframe->redraw = true;
3384 nframe->repeat = false;
3385 nframe->still = true;
3386 nframe->pts = 0;
3387 nframe->duration = -1;
3388
3389 if (!args->scaled) {
3390 int w, h;
3391 mp_image_params_get_dsize(&p->image_params, &w, &h);
3392 if (w < 1 || h < 1)
3393 return;
3394
3395 int src_w = p->image_params.w;
3396 int src_h = p->image_params.h;
3397 if (p->image_params.rotate % 180 == 90) {
3398 MPSWAP(int, w, h);
3399 MPSWAP(int, src_w, src_h);
3400 }
3401
3402 struct mp_rect src = {0, 0, src_w, src_h};
3403 struct mp_rect dst = {0, 0, w, h};
3404 struct mp_osd_res osd = {.w = w, .h = h, .display_par = 1.0};
3405 gl_video_resize(p, &src, &dst, &osd);
3406 }
3407
3408 gl_video_reset_surfaces(p);
3409
3410 struct ra_tex_params params = {
3411 .dimensions = 2,
3412 .downloadable = true,
3413 .w = p->osd_rect.w,
3414 .h = p->osd_rect.h,
3415 .render_dst = true,
3416 };
3417
3418 params.format = ra_find_unorm_format(p->ra, 1, 4);
3419 int mpfmt = IMGFMT_RGB0;
3420 if (args->high_bit_depth && p->ra_format.component_bits > 8) {
3421 const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
3422 if (fmt && fmt->renderable) {
3423 params.format = fmt;
3424 mpfmt = IMGFMT_RGBA64;
3425 }
3426 }
3427
3428 if (!params.format || !params.format->renderable)
3429 goto done;
3430 target = ra_tex_create(p->ra, ¶ms);
3431 if (!target)
3432 goto done;
3433
3434 int flags = 0;
3435 if (args->subs)
3436 flags |= RENDER_FRAME_SUBS;
3437 if (args->osd)
3438 flags |= RENDER_FRAME_OSD;
3439 gl_video_render_frame(p, nframe, (struct ra_fbo){target}, flags);
3440
3441 res = mp_image_alloc(mpfmt, params.w, params.h);
3442 if (!res)
3443 goto done;
3444
3445 struct ra_tex_download_params download_params = {
3446 .tex = target,
3447 .dst = res->planes[0],
3448 .stride = res->stride[0],
3449 };
3450 if (!p->ra->fns->tex_download(p->ra, &download_params))
3451 goto done;
3452
3453 if (p->broken_frame)
3454 goto done;
3455
3456 ok = true;
3457 done:
3458 talloc_free(nframe);
3459 ra_tex_free(p->ra, &target);
3460 gl_video_resize(p, &old_src, &old_dst, &old_osd);
3461 gl_video_reset_surfaces(p);
3462 if (!ok)
3463 TA_FREEP(&res);
3464 args->res = res;
3465 }
3466
3467 // Use this color instead of the global option.
gl_video_set_clear_color(struct gl_video * p,struct m_color c)3468 void gl_video_set_clear_color(struct gl_video *p, struct m_color c)
3469 {
3470 p->force_clear_color = true;
3471 p->clear_color = c;
3472 }
3473
gl_video_set_osd_pts(struct gl_video * p,double pts)3474 void gl_video_set_osd_pts(struct gl_video *p, double pts)
3475 {
3476 p->osd_pts = pts;
3477 }
3478
gl_video_check_osd_change(struct gl_video * p,struct mp_osd_res * res,double pts)3479 bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res,
3480 double pts)
3481 {
3482 return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false;
3483 }
3484
gl_video_resize(struct gl_video * p,struct mp_rect * src,struct mp_rect * dst,struct mp_osd_res * osd)3485 void gl_video_resize(struct gl_video *p,
3486 struct mp_rect *src, struct mp_rect *dst,
3487 struct mp_osd_res *osd)
3488 {
3489 if (mp_rect_equals(&p->src_rect, src) &&
3490 mp_rect_equals(&p->dst_rect, dst) &&
3491 osd_res_equals(p->osd_rect, *osd))
3492 return;
3493
3494 p->src_rect = *src;
3495 p->dst_rect = *dst;
3496 p->osd_rect = *osd;
3497
3498 gl_video_reset_surfaces(p);
3499
3500 if (p->osd)
3501 mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo3d);
3502 }
3503
frame_perf_data(struct pass_info pass[],struct mp_frame_perf * out)3504 static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out)
3505 {
3506 for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
3507 if (!pass[i].desc.len)
3508 break;
3509 out->perf[out->count] = pass[i].perf;
3510 out->desc[out->count] = pass[i].desc.start;
3511 out->count++;
3512 }
3513 }
3514
gl_video_perfdata(struct gl_video * p,struct voctrl_performance_data * out)3515 void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out)
3516 {
3517 *out = (struct voctrl_performance_data){0};
3518 frame_perf_data(p->pass_fresh, &out->fresh);
3519 frame_perf_data(p->pass_redraw, &out->redraw);
3520 }
3521
3522 // Returns false on failure.
pass_upload_image(struct gl_video * p,struct mp_image * mpi,uint64_t id)3523 static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id)
3524 {
3525 struct video_image *vimg = &p->image;
3526
3527 if (vimg->id == id)
3528 return true;
3529
3530 unref_current_image(p);
3531
3532 mpi = mp_image_new_ref(mpi);
3533 if (!mpi)
3534 goto error;
3535
3536 vimg->mpi = mpi;
3537 vimg->id = id;
3538 p->osd_pts = mpi->pts;
3539 p->frames_uploaded++;
3540
3541 if (p->hwdec_active) {
3542 // Hardware decoding
3543
3544 if (!p->hwdec_mapper)
3545 goto error;
3546
3547 pass_describe(p, "map frame (hwdec)");
3548 timer_pool_start(p->upload_timer);
3549 bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0;
3550 timer_pool_stop(p->upload_timer);
3551 pass_record(p, timer_pool_measure(p->upload_timer));
3552
3553 vimg->hwdec_mapped = true;
3554 if (ok) {
3555 struct mp_image layout = {0};
3556 mp_image_set_params(&layout, &p->image_params);
3557 struct ra_tex **tex = p->hwdec_mapper->tex;
3558 for (int n = 0; n < p->plane_count; n++) {
3559 vimg->planes[n] = (struct texplane){
3560 .w = mp_image_plane_w(&layout, n),
3561 .h = mp_image_plane_h(&layout, n),
3562 .tex = tex[n],
3563 };
3564 }
3565 } else {
3566 MP_FATAL(p, "Mapping hardware decoded surface failed.\n");
3567 goto error;
3568 }
3569 return true;
3570 }
3571
3572 // Software decoding
3573 assert(mpi->num_planes == p->plane_count);
3574
3575 timer_pool_start(p->upload_timer);
3576 for (int n = 0; n < p->plane_count; n++) {
3577 struct texplane *plane = &vimg->planes[n];
3578 if (!plane->tex) {
3579 timer_pool_stop(p->upload_timer);
3580 goto error;
3581 }
3582
3583 struct ra_tex_upload_params params = {
3584 .tex = plane->tex,
3585 .src = mpi->planes[n],
3586 .invalidate = true,
3587 .stride = mpi->stride[n],
3588 };
3589
3590 plane->flipped = params.stride < 0;
3591 if (plane->flipped) {
3592 int h = mp_image_plane_h(mpi, n);
3593 params.src = (char *)params.src + (h - 1) * params.stride;
3594 params.stride = -params.stride;
3595 }
3596
3597 struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]);
3598 if (mapped) {
3599 params.buf = mapped->buf;
3600 params.buf_offset = (uintptr_t)params.src -
3601 (uintptr_t)mapped->buf->data;
3602 params.src = NULL;
3603 }
3604
3605 if (p->using_dr_path != !!mapped) {
3606 p->using_dr_path = !!mapped;
3607 MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
3608 }
3609
3610 if (!p->ra->fns->tex_upload(p->ra, ¶ms)) {
3611 timer_pool_stop(p->upload_timer);
3612 goto error;
3613 }
3614
3615 if (mapped && !mapped->mpi)
3616 mapped->mpi = mp_image_new_ref(mpi);
3617 }
3618 timer_pool_stop(p->upload_timer);
3619
3620 bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD);
3621 const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive";
3622 pass_describe(p, "upload frame (%s)", mode);
3623 pass_record(p, timer_pool_measure(p->upload_timer));
3624
3625 return true;
3626
3627 error:
3628 unref_current_image(p);
3629 p->broken_frame = true;
3630 return false;
3631 }
3632
test_fbo(struct gl_video * p,const struct ra_format * fmt)3633 static bool test_fbo(struct gl_video *p, const struct ra_format *fmt)
3634 {
3635 MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name);
3636 struct ra_tex *tex = NULL;
3637 bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt);
3638 ra_tex_free(p->ra, &tex);
3639 return success;
3640 }
3641
3642 // Return whether dumb-mode can be used without disabling any features.
3643 // Essentially, vo_gpu with mostly default settings will return true.
check_dumb_mode(struct gl_video * p)3644 static bool check_dumb_mode(struct gl_video *p)
3645 {
3646 struct gl_video_opts *o = &p->opts;
3647 if (p->use_integer_conversion)
3648 return false;
3649 if (o->dumb_mode > 0) // requested by user
3650 return true;
3651 if (o->dumb_mode < 0) // disabled by user
3652 return false;
3653
3654 // otherwise, use auto-detection
3655 if (o->correct_downscaling || o->linear_downscaling ||
3656 o->linear_upscaling || o->sigmoid_upscaling || o->interpolation ||
3657 o->blend_subs || o->deband || o->unsharp)
3658 return false;
3659 // check remaining scalers (tscale is already implicitly excluded above)
3660 for (int i = 0; i < SCALER_COUNT; i++) {
3661 if (i != SCALER_TSCALE) {
3662 const char *name = o->scaler[i].kernel.name;
3663 if (name && strcmp(name, "bilinear") != 0)
3664 return false;
3665 }
3666 }
3667 if (o->user_shaders && o->user_shaders[0])
3668 return false;
3669 return true;
3670 }
3671
3672 // Disable features that are not supported with the current OpenGL version.
check_gl_features(struct gl_video * p)3673 static void check_gl_features(struct gl_video *p)
3674 {
3675 struct ra *ra = p->ra;
3676 bool have_float_tex = !!ra_find_float16_format(ra, 1);
3677 bool have_mglsl = ra->glsl_version >= 130; // modern GLSL
3678 const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2);
3679 bool have_texrg = rg_tex && !rg_tex->luminance_alpha;
3680 bool have_compute = ra->caps & RA_CAP_COMPUTE;
3681 bool have_ssbo = ra->caps & RA_CAP_BUF_RW;
3682 bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD;
3683
3684 const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16",
3685 "rgb10_a2", "rgba8", 0};
3686 const char *user_fbo_fmts[] = {p->opts.fbo_format, 0};
3687 const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto")
3688 ? user_fbo_fmts : auto_fbo_fmts;
3689 bool user_specified_fbo_fmt = fbo_fmts == user_fbo_fmts;
3690 bool fbo_test_result = false;
3691 bool have_fbo = false;
3692 p->fbo_format = NULL;
3693 for (int n = 0; fbo_fmts[n]; n++) {
3694 const char *fmt = fbo_fmts[n];
3695 const struct ra_format *f = ra_find_named_format(p->ra, fmt);
3696 if (!f && user_specified_fbo_fmt)
3697 MP_WARN(p, "FBO format '%s' not found!\n", fmt);
3698 if (f && f->renderable && f->linear_filter &&
3699 (fbo_test_result = test_fbo(p, f))) {
3700 MP_VERBOSE(p, "Using FBO format %s.\n", f->name);
3701 have_fbo = true;
3702 p->fbo_format = f;
3703 break;
3704 }
3705
3706 if (user_specified_fbo_fmt) {
3707 MP_WARN(p, "User-specified FBO format '%s' failed to initialize! "
3708 "(exists=%d, renderable=%d, linear_filter=%d, "
3709 "fbo_test_result=%d)\n",
3710 fmt, !!f, f ? f->renderable : 0, f ? f->linear_filter : 0,
3711 fbo_test_result);
3712 }
3713 }
3714
3715 if (!have_fragcoord && p->opts.dither_depth >= 0 &&
3716 p->opts.dither_algo != DITHER_NONE)
3717 {
3718 p->opts.dither_algo = DITHER_NONE;
3719 MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n");
3720 }
3721 if (!have_fragcoord && p->opts.alpha_mode == ALPHA_BLEND_TILES) {
3722 p->opts.alpha_mode = ALPHA_BLEND;
3723 // Verbose, since this is the default setting
3724 MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n");
3725 }
3726 if (!have_fbo && have_compute) {
3727 have_compute = false;
3728 MP_WARN(p, "Force-disabling compute shaders as an FBO format was not "
3729 "available! See your FBO format configuration!\n");
3730 }
3731
3732 if (have_compute && have_fbo && !p->fbo_format->storable) {
3733 have_compute = false;
3734 MP_WARN(p, "Force-disabling compute shaders as the chosen FBO format "
3735 "is not storable! See your FBO format configuration!\n");
3736 }
3737
3738 if (!have_compute && p->opts.dither_algo == DITHER_ERROR_DIFFUSION) {
3739 MP_WARN(p, "Disabling error diffusion dithering because compute shader "
3740 "was not supported. Fallback to dither=fruit instead.\n");
3741 p->opts.dither_algo = DITHER_FRUIT;
3742 }
3743
3744 bool have_compute_peak = have_compute && have_ssbo;
3745 if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) {
3746 int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V;
3747 MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the "
3748 "following is not supported: compute shaders=%d, "
3749 "SSBO=%d).\n", have_compute, have_ssbo);
3750 p->opts.tone_map.compute_peak = -1;
3751 }
3752
3753 p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg;
3754 bool voluntarily_dumb = check_dumb_mode(p);
3755 if (p->forced_dumb_mode || voluntarily_dumb) {
3756 if (voluntarily_dumb) {
3757 MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n");
3758 } else if (p->opts.dumb_mode <= 0) {
3759 MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n"
3760 "Most extended features will be disabled.\n");
3761 }
3762 p->dumb_mode = true;
3763 // Most things don't work, so whitelist all options that still work.
3764 p->opts = (struct gl_video_opts){
3765 .gamma = p->opts.gamma,
3766 .gamma_auto = p->opts.gamma_auto,
3767 .pbo = p->opts.pbo,
3768 .fbo_format = p->opts.fbo_format,
3769 .alpha_mode = p->opts.alpha_mode,
3770 .use_rectangle = p->opts.use_rectangle,
3771 .background = p->opts.background,
3772 .dither_algo = p->opts.dither_algo,
3773 .dither_depth = p->opts.dither_depth,
3774 .dither_size = p->opts.dither_size,
3775 .error_diffusion = p->opts.error_diffusion,
3776 .temporal_dither = p->opts.temporal_dither,
3777 .temporal_dither_period = p->opts.temporal_dither_period,
3778 .tex_pad_x = p->opts.tex_pad_x,
3779 .tex_pad_y = p->opts.tex_pad_y,
3780 .tone_map = p->opts.tone_map,
3781 .early_flush = p->opts.early_flush,
3782 .icc_opts = p->opts.icc_opts,
3783 .hwdec_interop = p->opts.hwdec_interop,
3784 .target_trc = p->opts.target_trc,
3785 .target_prim = p->opts.target_prim,
3786 .target_peak = p->opts.target_peak,
3787 };
3788 for (int n = 0; n < SCALER_COUNT; n++)
3789 p->opts.scaler[n] = gl_video_opts_def.scaler[n];
3790 if (!have_fbo)
3791 p->use_lut_3d = false;
3792 return;
3793 }
3794 p->dumb_mode = false;
3795
3796 // Normally, we want to disable them by default if FBOs are unavailable,
3797 // because they will be slow (not critically slow, but still slower).
3798 // Without FP textures, we must always disable them.
3799 // I don't know if luminance alpha float textures exist, so disregard them.
3800 for (int n = 0; n < SCALER_COUNT; n++) {
3801 const struct filter_kernel *kernel =
3802 mp_find_filter_kernel(p->opts.scaler[n].kernel.name);
3803 if (kernel) {
3804 char *reason = NULL;
3805 if (!have_float_tex)
3806 reason = "(float tex. missing)";
3807 if (!have_mglsl)
3808 reason = "(GLSL version too old)";
3809 if (reason) {
3810 MP_WARN(p, "Disabling scaler #%d %s %s.\n", n,
3811 p->opts.scaler[n].kernel.name, reason);
3812 // p->opts is a copy => we can just mess with it.
3813 p->opts.scaler[n].kernel.name = "bilinear";
3814 if (n == SCALER_TSCALE)
3815 p->opts.interpolation = 0;
3816 }
3817 }
3818 }
3819
3820 int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO ||
3821 p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d;
3822
3823 // mix() is needed for some gamma functions
3824 if (!have_mglsl && (p->opts.linear_downscaling ||
3825 p->opts.linear_upscaling || p->opts.sigmoid_upscaling))
3826 {
3827 p->opts.linear_downscaling = false;
3828 p->opts.linear_upscaling = false;
3829 p->opts.sigmoid_upscaling = false;
3830 MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n");
3831 }
3832 if (!have_mglsl && use_cms) {
3833 p->opts.target_prim = MP_CSP_PRIM_AUTO;
3834 p->opts.target_trc = MP_CSP_TRC_AUTO;
3835 p->use_lut_3d = false;
3836 MP_WARN(p, "Disabling color management (GLSL version too old).\n");
3837 }
3838 if (!have_mglsl && p->opts.deband) {
3839 p->opts.deband = 0;
3840 MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
3841 }
3842 }
3843
init_gl(struct gl_video * p)3844 static void init_gl(struct gl_video *p)
3845 {
3846 debug_check_gl(p, "before init_gl");
3847
3848 p->upload_timer = timer_pool_create(p->ra);
3849 p->blit_timer = timer_pool_create(p->ra);
3850 p->osd_timer = timer_pool_create(p->ra);
3851
3852 debug_check_gl(p, "after init_gl");
3853
3854 ra_dump_tex_formats(p->ra, MSGL_DEBUG);
3855 ra_dump_img_formats(p->ra, MSGL_DEBUG);
3856 }
3857
gl_video_uninit(struct gl_video * p)3858 void gl_video_uninit(struct gl_video *p)
3859 {
3860 if (!p)
3861 return;
3862
3863 uninit_video(p);
3864
3865 for (int n = 0; n < p->num_hwdecs; n++)
3866 ra_hwdec_uninit(p->hwdecs[n]);
3867 p->num_hwdecs = 0;
3868
3869 gl_sc_destroy(p->sc);
3870
3871 ra_tex_free(p->ra, &p->lut_3d_texture);
3872 ra_buf_free(p->ra, &p->hdr_peak_ssbo);
3873
3874 timer_pool_destroy(p->upload_timer);
3875 timer_pool_destroy(p->blit_timer);
3876 timer_pool_destroy(p->osd_timer);
3877
3878 for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
3879 talloc_free(p->pass_fresh[i].desc.start);
3880 talloc_free(p->pass_redraw[i].desc.start);
3881 }
3882
3883 mpgl_osd_destroy(p->osd);
3884
3885 // Forcibly destroy possibly remaining image references. This should also
3886 // cause gl_video_dr_free_buffer() to be called for the remaining buffers.
3887 gc_pending_dr_fences(p, true);
3888
3889 // Should all have been unreffed already.
3890 assert(!p->num_dr_buffers);
3891
3892 talloc_free(p);
3893 }
3894
gl_video_reset(struct gl_video * p)3895 void gl_video_reset(struct gl_video *p)
3896 {
3897 gl_video_reset_surfaces(p);
3898 }
3899
gl_video_showing_interpolated_frame(struct gl_video * p)3900 bool gl_video_showing_interpolated_frame(struct gl_video *p)
3901 {
3902 return p->is_interpolated;
3903 }
3904
is_imgfmt_desc_supported(struct gl_video * p,const struct ra_imgfmt_desc * desc)3905 static bool is_imgfmt_desc_supported(struct gl_video *p,
3906 const struct ra_imgfmt_desc *desc)
3907 {
3908 if (!desc->num_planes)
3909 return false;
3910
3911 if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode)
3912 return false;
3913
3914 return true;
3915 }
3916
gl_video_check_format(struct gl_video * p,int mp_format)3917 bool gl_video_check_format(struct gl_video *p, int mp_format)
3918 {
3919 struct ra_imgfmt_desc desc;
3920 if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) &&
3921 is_imgfmt_desc_supported(p, &desc))
3922 return true;
3923 for (int n = 0; n < p->num_hwdecs; n++) {
3924 if (ra_hwdec_test_format(p->hwdecs[n], mp_format))
3925 return true;
3926 }
3927 return false;
3928 }
3929
gl_video_config(struct gl_video * p,struct mp_image_params * params)3930 void gl_video_config(struct gl_video *p, struct mp_image_params *params)
3931 {
3932 unmap_overlay(p);
3933 unref_current_image(p);
3934
3935 if (!mp_image_params_equal(&p->real_image_params, params)) {
3936 uninit_video(p);
3937 p->real_image_params = *params;
3938 p->image_params = *params;
3939 if (params->imgfmt)
3940 init_video(p);
3941 }
3942
3943 gl_video_reset_surfaces(p);
3944 }
3945
gl_video_set_osd_source(struct gl_video * p,struct osd_state * osd)3946 void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd)
3947 {
3948 mpgl_osd_destroy(p->osd);
3949 p->osd = NULL;
3950 p->osd_state = osd;
3951 reinit_osd(p);
3952 }
3953
gl_video_init(struct ra * ra,struct mp_log * log,struct mpv_global * g)3954 struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log,
3955 struct mpv_global *g)
3956 {
3957 struct gl_video *p = talloc_ptrtype(NULL, p);
3958 *p = (struct gl_video) {
3959 .ra = ra,
3960 .global = g,
3961 .log = log,
3962 .sc = gl_sc_create(ra, g, log),
3963 .video_eq = mp_csp_equalizer_create(p, g),
3964 .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf),
3965 };
3966 // make sure this variable is initialized to *something*
3967 p->pass = p->pass_fresh;
3968 struct gl_video_opts *opts = p->opts_cache->opts;
3969 p->cms = gl_lcms_init(p, log, g, opts->icc_opts),
3970 p->opts = *opts;
3971 for (int n = 0; n < SCALER_COUNT; n++)
3972 p->scaler[n] = (struct scaler){.index = n};
3973 // our VAO always has the vec2 position as the first element
3974 MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
3975 .name = "position",
3976 .type = RA_VARTYPE_FLOAT,
3977 .dim_v = 2,
3978 .dim_m = 1,
3979 .offset = 0,
3980 });
3981 init_gl(p);
3982 reinit_from_options(p);
3983 return p;
3984 }
3985
3986 // Get static string for scaler shader. If "tscale" is set to true, the
3987 // scaler must be a separable convolution filter.
handle_scaler_opt(const char * name,bool tscale)3988 static const char *handle_scaler_opt(const char *name, bool tscale)
3989 {
3990 if (name && name[0]) {
3991 const struct filter_kernel *kernel = mp_find_filter_kernel(name);
3992 if (kernel && (!tscale || !kernel->polar))
3993 return kernel->f.name;
3994
3995 for (const char *const *filter = tscale ? fixed_tscale_filters
3996 : fixed_scale_filters;
3997 *filter; filter++) {
3998 if (strcmp(*filter, name) == 0)
3999 return *filter;
4000 }
4001 }
4002 return NULL;
4003 }
4004
gl_video_update_options(struct gl_video * p)4005 static void gl_video_update_options(struct gl_video *p)
4006 {
4007 if (m_config_cache_update(p->opts_cache)) {
4008 gl_lcms_update_options(p->cms);
4009 reinit_from_options(p);
4010 }
4011
4012 if (mp_csp_equalizer_state_changed(p->video_eq))
4013 p->output_tex_valid = false;
4014 }
4015
reinit_from_options(struct gl_video * p)4016 static void reinit_from_options(struct gl_video *p)
4017 {
4018 p->use_lut_3d = gl_lcms_has_profile(p->cms);
4019
4020 // Copy the option fields, so that check_gl_features() can mutate them.
4021 // This works only for the fields themselves of course, not for any memory
4022 // referenced by them.
4023 p->opts = *(struct gl_video_opts *)p->opts_cache->opts;
4024
4025 if (!p->force_clear_color)
4026 p->clear_color = p->opts.background;
4027
4028 check_gl_features(p);
4029 uninit_rendering(p);
4030 gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir);
4031 p->ra->use_pbo = p->opts.pbo;
4032 gl_video_setup_hooks(p);
4033 reinit_osd(p);
4034
4035 int vs;
4036 mp_read_option_raw(p->global, "video-sync", &m_option_type_choice, &vs);
4037 if (p->opts.interpolation && !vs && !p->dsi_warned) {
4038 MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n"
4039 "E.g.: --video-sync=display-resample\n");
4040 p->dsi_warned = true;
4041 }
4042
4043 if (p->opts.correct_downscaling && !p->correct_downscaling_warned) {
4044 const char *name = p->opts.scaler[SCALER_DSCALE].kernel.name;
4045 if (!name)
4046 name = p->opts.scaler[SCALER_SCALE].kernel.name;
4047 if (!name || !strcmp(name, "bilinear")) {
4048 MP_WARN(p, "correct-downscaling requires non-bilinear scaler.\n");
4049 p->correct_downscaling_warned = true;
4050 }
4051 }
4052 }
4053
gl_video_configure_queue(struct gl_video * p,struct vo * vo)4054 void gl_video_configure_queue(struct gl_video *p, struct vo *vo)
4055 {
4056 gl_video_update_options(p);
4057
4058 int queue_size = 1;
4059
4060 // Figure out an adequate size for the interpolation queue. The larger
4061 // the radius, the earlier we need to queue frames.
4062 if (p->opts.interpolation) {
4063 const struct filter_kernel *kernel =
4064 mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name);
4065 if (kernel) {
4066 // filter_scale wouldn't be correctly initialized were we to use it here.
4067 // This is fine since we're always upsampling, but beware if downsampling
4068 // is added!
4069 double radius = kernel->f.radius;
4070 radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius;
4071 queue_size += 1 + ceil(radius);
4072 } else {
4073 // Oversample/linear case
4074 queue_size += 2;
4075 }
4076 }
4077
4078 vo_set_queue_params(vo, 0, queue_size);
4079 }
4080
validate_scaler_opt(struct mp_log * log,const m_option_t * opt,struct bstr name,const char ** value)4081 static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
4082 struct bstr name, const char **value)
4083 {
4084 struct bstr param = bstr0(*value);
4085 char s[20] = {0};
4086 int r = 1;
4087 bool tscale = bstr_equals0(name, "tscale");
4088 if (bstr_equals0(param, "help")) {
4089 r = M_OPT_EXIT;
4090 } else if (bstr_equals0(name, "dscale") && !param.len) {
4091 return r; // empty dscale means "use same as upscaler"
4092 } else {
4093 snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
4094 if (!handle_scaler_opt(s, tscale))
4095 r = M_OPT_INVALID;
4096 }
4097 if (r < 1) {
4098 mp_info(log, "Available scalers:\n");
4099 for (const char *const *filter = tscale ? fixed_tscale_filters
4100 : fixed_scale_filters;
4101 *filter; filter++) {
4102 mp_info(log, " %s\n", *filter);
4103 }
4104 for (int n = 0; mp_filter_kernels[n].f.name; n++) {
4105 if (!tscale || !mp_filter_kernels[n].polar)
4106 mp_info(log, " %s\n", mp_filter_kernels[n].f.name);
4107 }
4108 if (s[0])
4109 mp_fatal(log, "No scaler named '%s' found!\n", s);
4110 }
4111 return r;
4112 }
4113
validate_window_opt(struct mp_log * log,const m_option_t * opt,struct bstr name,const char ** value)4114 static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
4115 struct bstr name, const char **value)
4116 {
4117 struct bstr param = bstr0(*value);
4118 char s[20] = {0};
4119 int r = 1;
4120 if (bstr_equals0(param, "help")) {
4121 r = M_OPT_EXIT;
4122 } else if (!param.len) {
4123 return r; // empty string means "use preferred window"
4124 } else {
4125 snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
4126 const struct filter_window *window = mp_find_filter_window(s);
4127 if (!window)
4128 r = M_OPT_INVALID;
4129 }
4130 if (r < 1) {
4131 mp_info(log, "Available windows:\n");
4132 for (int n = 0; mp_filter_windows[n].name; n++)
4133 mp_info(log, " %s\n", mp_filter_windows[n].name);
4134 if (s[0])
4135 mp_fatal(log, "No window named '%s' found!\n", s);
4136 }
4137 return r;
4138 }
4139
validate_error_diffusion_opt(struct mp_log * log,const m_option_t * opt,struct bstr name,const char ** value)4140 static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt,
4141 struct bstr name, const char **value)
4142 {
4143 struct bstr param = bstr0(*value);
4144 char s[20] = {0};
4145 int r = 1;
4146 if (bstr_equals0(param, "help")) {
4147 r = M_OPT_EXIT;
4148 } else {
4149 snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
4150 const struct error_diffusion_kernel *k = mp_find_error_diffusion_kernel(s);
4151 if (!k)
4152 r = M_OPT_INVALID;
4153 }
4154 if (r < 1) {
4155 mp_info(log, "Available error diffusion kernels:\n");
4156 for (int n = 0; mp_error_diffusion_kernels[n].name; n++)
4157 mp_info(log, " %s\n", mp_error_diffusion_kernels[n].name);
4158 if (s[0])
4159 mp_fatal(log, "No error diffusion kernel named '%s' found!\n", s);
4160 }
4161 return r;
4162 }
4163
gl_video_scale_ambient_lux(float lmin,float lmax,float rmin,float rmax,float lux)4164 float gl_video_scale_ambient_lux(float lmin, float lmax,
4165 float rmin, float rmax, float lux)
4166 {
4167 assert(lmax > lmin);
4168
4169 float num = (rmax - rmin) * (log10(lux) - log10(lmin));
4170 float den = log10(lmax) - log10(lmin);
4171 float result = num / den + rmin;
4172
4173 // clamp the result
4174 float max = MPMAX(rmax, rmin);
4175 float min = MPMIN(rmax, rmin);
4176 return MPMAX(MPMIN(result, max), min);
4177 }
4178
gl_video_set_ambient_lux(struct gl_video * p,int lux)4179 void gl_video_set_ambient_lux(struct gl_video *p, int lux)
4180 {
4181 if (p->opts.gamma_auto) {
4182 p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux);
4183 MP_TRACE(p, "ambient light changed: %d lux (gamma: %f)\n", lux,
4184 p->opts.gamma);
4185 }
4186 }
4187
gl_video_dr_alloc_buffer(struct gl_video * p,size_t size)4188 static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size)
4189 {
4190 struct ra_buf_params params = {
4191 .type = RA_BUF_TYPE_TEX_UPLOAD,
4192 .host_mapped = true,
4193 .size = size,
4194 };
4195
4196 struct ra_buf *buf = ra_buf_create(p->ra, ¶ms);
4197 if (!buf)
4198 return NULL;
4199
4200 MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers);
4201 p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf };
4202
4203 return buf->data;
4204 }
4205
gl_video_dr_free_buffer(void * opaque,uint8_t * data)4206 static void gl_video_dr_free_buffer(void *opaque, uint8_t *data)
4207 {
4208 struct gl_video *p = opaque;
4209
4210 for (int n = 0; n < p->num_dr_buffers; n++) {
4211 struct dr_buffer *buffer = &p->dr_buffers[n];
4212 if (buffer->buf->data == data) {
4213 assert(!buffer->mpi); // can't be freed while it has a ref
4214 ra_buf_free(p->ra, &buffer->buf);
4215 MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n);
4216 return;
4217 }
4218 }
4219 // not found - must not happen
4220 assert(0);
4221 }
4222
gl_video_get_image(struct gl_video * p,int imgfmt,int w,int h,int stride_align)4223 struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h,
4224 int stride_align)
4225 {
4226 if (!gl_video_check_format(p, imgfmt))
4227 return NULL;
4228
4229 int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
4230 if (size < 0)
4231 return NULL;
4232
4233 int alloc_size = size + stride_align;
4234 void *ptr = gl_video_dr_alloc_buffer(p, alloc_size);
4235 if (!ptr)
4236 return NULL;
4237
4238 // (we expect vo.c to proxy the free callback, so it happens in the same
4239 // thread it was allocated in, removing the need for synchronization)
4240 struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align,
4241 ptr, alloc_size, p,
4242 gl_video_dr_free_buffer);
4243 if (!res)
4244 gl_video_dr_free_buffer(p, ptr);
4245 return res;
4246 }
4247
load_add_hwdec(struct gl_video * p,struct mp_hwdec_devices * devs,const struct ra_hwdec_driver * drv,bool is_auto)4248 static void load_add_hwdec(struct gl_video *p, struct mp_hwdec_devices *devs,
4249 const struct ra_hwdec_driver *drv, bool is_auto)
4250 {
4251 struct ra_hwdec *hwdec =
4252 ra_hwdec_load_driver(p->ra, p->log, p->global, devs, drv, is_auto);
4253 if (hwdec)
4254 MP_TARRAY_APPEND(p, p->hwdecs, p->num_hwdecs, hwdec);
4255 }
4256
gl_video_load_hwdecs(struct gl_video * p,struct mp_hwdec_devices * devs,bool load_all_by_default)4257 void gl_video_load_hwdecs(struct gl_video *p, struct mp_hwdec_devices *devs,
4258 bool load_all_by_default)
4259 {
4260 char *type = p->opts.hwdec_interop;
4261 if (!type || !type[0] || strcmp(type, "auto") == 0) {
4262 if (!load_all_by_default)
4263 return;
4264 type = "all";
4265 }
4266 if (strcmp(type, "no") == 0) {
4267 // do nothing, just block further loading
4268 } else if (strcmp(type, "all") == 0) {
4269 gl_video_load_hwdecs_all(p, devs);
4270 } else {
4271 for (int n = 0; ra_hwdec_drivers[n]; n++) {
4272 const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
4273 if (strcmp(type, drv->name) == 0) {
4274 load_add_hwdec(p, devs, drv, false);
4275 break;
4276 }
4277 }
4278 }
4279 p->hwdec_interop_loading_done = true;
4280 }
4281
gl_video_load_hwdecs_all(struct gl_video * p,struct mp_hwdec_devices * devs)4282 void gl_video_load_hwdecs_all(struct gl_video *p, struct mp_hwdec_devices *devs)
4283 {
4284 if (!p->hwdec_interop_loading_done) {
4285 for (int n = 0; ra_hwdec_drivers[n]; n++)
4286 load_add_hwdec(p, devs, ra_hwdec_drivers[n], true);
4287 p->hwdec_interop_loading_done = true;
4288 }
4289 }
4290