1 /*
2  * This file is part of mpv.
3  *
4  * mpv is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * mpv is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <math.h>
19 
20 #include <libavutil/cpu.h>
21 
22 #include "common/common.h"
23 #include "common/msg.h"
24 #include "csputils.h"
25 #include "misc/thread_pool.h"
26 #include "misc/thread_tools.h"
27 #include "options/m_config.h"
28 #include "options/m_option.h"
29 #include "repack.h"
30 #include "video/fmt-conversion.h"
31 #include "video/img_format.h"
32 #include "zimg.h"
33 
34 static_assert(MP_IMAGE_BYTE_ALIGN >= ZIMG_ALIGN, "");
35 
36 #define HAVE_ZIMG_ALPHA (ZIMG_API_VERSION >= ZIMG_MAKE_API_VERSION(2, 4))
37 
38 static const struct m_opt_choice_alternatives mp_zimg_scalers[] = {
39     {"point",           ZIMG_RESIZE_POINT},
40     {"bilinear",        ZIMG_RESIZE_BILINEAR},
41     {"bicubic",         ZIMG_RESIZE_BICUBIC},
42     {"spline16",        ZIMG_RESIZE_SPLINE16},
43     {"spline36",        ZIMG_RESIZE_SPLINE36},
44     {"lanczos",         ZIMG_RESIZE_LANCZOS},
45     {0}
46 };
47 
48 const struct zimg_opts zimg_opts_defaults = {
49     .scaler = ZIMG_RESIZE_LANCZOS,
50     .scaler_params = {NAN, NAN},
51     .scaler_chroma_params = {NAN, NAN},
52     .scaler_chroma = ZIMG_RESIZE_BILINEAR,
53     .dither = ZIMG_DITHER_RANDOM,
54     .fast = 1,
55 };
56 
57 #define OPT_PARAM(var) OPT_DOUBLE(var), .flags = M_OPT_DEFAULT_NAN
58 
59 #define OPT_BASE_STRUCT struct zimg_opts
60 const struct m_sub_options zimg_conf = {
61     .opts = (struct m_option[]) {
62         {"scaler", OPT_CHOICE_C(scaler, mp_zimg_scalers)},
63         {"scaler-param-a", OPT_PARAM(scaler_params[0])},
64         {"scaler-param-b", OPT_PARAM(scaler_params[1])},
65         {"scaler-chroma", OPT_CHOICE_C(scaler_chroma, mp_zimg_scalers)},
66         {"scaler-chroma-param-a", OPT_PARAM(scaler_chroma_params[0])},
67         {"scaler-chroma-param-b", OPT_PARAM(scaler_chroma_params[1])},
68         {"dither", OPT_CHOICE(dither,
69             {"no",              ZIMG_DITHER_NONE},
70             {"ordered",         ZIMG_DITHER_ORDERED},
71             {"random",          ZIMG_DITHER_RANDOM},
72             {"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})},
73         {"fast", OPT_FLAG(fast)},
74         {"threads", OPT_CHOICE(threads, {"auto", 0}), M_RANGE(1, 64)},
75         {0}
76     },
77     .size = sizeof(struct zimg_opts),
78     .defaults = &zimg_opts_defaults,
79 };
80 
81 struct mp_zimg_state {
82     zimg_filter_graph *graph;
83     void *tmp;
84     void *tmp_alloc;
85     struct mp_zimg_repack *src;
86     struct mp_zimg_repack *dst;
87     int slice_y, slice_h; // y start position, height of target slice
88     double scale_y;
89     struct mp_waiter thread_waiter;
90 };
91 
92 struct mp_zimg_repack {
93     bool pack;                  // if false, this is for unpacking
94     struct mp_image_params fmt; // original mp format (possibly packed format,
95                                 // swapped endian)
96     int zimgfmt;                // zimg equivalent unpacked format
97     int num_planes;             // number of planes involved
98     unsigned zmask[4];          // zmask[mp_index] = zimg mask (using mp index!)
99     int z_planes[4];            // z_planes[zimg_index] = mp_index (or -1)
100 
101     struct mp_repack *repack;   // converting to/from planar
102 
103     // Temporary memory for slice-wise repacking. This may be set even if repack
104     // is not set (then it may be used to avoid alignment issues). This has
105     // about one slice worth of data.
106     struct mp_image *tmp;
107 
108     // Temporary memory for zimg buffer.
109     zimg_image_buffer zbuf;
110     struct mp_image cropped_tmp;
111 
112     int real_w, real_h;         // aligned size
113 };
114 
mp_zimg_update_from_cmdline(struct mp_zimg_context * ctx)115 static void mp_zimg_update_from_cmdline(struct mp_zimg_context *ctx)
116 {
117     m_config_cache_update(ctx->opts_cache);
118 
119     struct zimg_opts *opts = ctx->opts_cache->opts;
120     ctx->opts = *opts;
121 }
122 
mp_to_z_chroma(enum mp_chroma_location cl)123 static zimg_chroma_location_e mp_to_z_chroma(enum mp_chroma_location cl)
124 {
125     switch (cl) {
126     case MP_CHROMA_TOPLEFT:     return ZIMG_CHROMA_TOP_LEFT;
127     case MP_CHROMA_LEFT:        return ZIMG_CHROMA_LEFT;
128     case MP_CHROMA_CENTER:      return ZIMG_CHROMA_CENTER;
129     default:                    return ZIMG_CHROMA_LEFT;
130     }
131 }
132 
mp_to_z_matrix(enum mp_csp csp)133 static zimg_matrix_coefficients_e mp_to_z_matrix(enum mp_csp csp)
134 {
135     switch (csp) {
136     case MP_CSP_BT_601:         return ZIMG_MATRIX_BT470_BG;
137     case MP_CSP_BT_709:         return ZIMG_MATRIX_BT709;
138     case MP_CSP_SMPTE_240M:     return ZIMG_MATRIX_ST240_M;
139     case MP_CSP_BT_2020_NC:     return ZIMG_MATRIX_BT2020_NCL;
140     case MP_CSP_BT_2020_C:      return ZIMG_MATRIX_BT2020_CL;
141     case MP_CSP_RGB:            return ZIMG_MATRIX_RGB;
142     case MP_CSP_XYZ:            return ZIMG_MATRIX_RGB;
143     case MP_CSP_YCGCO:          return ZIMG_MATRIX_YCGCO;
144     default:                    return ZIMG_MATRIX_BT709;
145     }
146 }
147 
mp_to_z_trc(enum mp_csp_trc trc)148 static zimg_transfer_characteristics_e mp_to_z_trc(enum mp_csp_trc trc)
149 {
150     switch (trc) {
151     case MP_CSP_TRC_BT_1886:    return ZIMG_TRANSFER_BT709;
152     case MP_CSP_TRC_SRGB:       return ZIMG_TRANSFER_IEC_61966_2_1;
153     case MP_CSP_TRC_LINEAR:     return ZIMG_TRANSFER_LINEAR;
154     case MP_CSP_TRC_GAMMA22:    return ZIMG_TRANSFER_BT470_M;
155     case MP_CSP_TRC_GAMMA28:    return ZIMG_TRANSFER_BT470_BG;
156     case MP_CSP_TRC_PQ:         return ZIMG_TRANSFER_ST2084;
157     case MP_CSP_TRC_HLG:        return ZIMG_TRANSFER_ARIB_B67;
158     case MP_CSP_TRC_GAMMA18:    // ?
159     case MP_CSP_TRC_GAMMA20:
160     case MP_CSP_TRC_GAMMA24:
161     case MP_CSP_TRC_GAMMA26:
162     case MP_CSP_TRC_PRO_PHOTO:
163     case MP_CSP_TRC_V_LOG:
164     case MP_CSP_TRC_S_LOG1:
165     case MP_CSP_TRC_S_LOG2:     // ?
166     default:                    return ZIMG_TRANSFER_BT709;
167     }
168 }
169 
mp_to_z_prim(enum mp_csp_prim prim)170 static zimg_color_primaries_e mp_to_z_prim(enum mp_csp_prim prim)
171 {
172     switch (prim) {
173     case MP_CSP_PRIM_BT_601_525:return ZIMG_PRIMARIES_ST170_M;
174     case MP_CSP_PRIM_BT_601_625:return ZIMG_PRIMARIES_BT470_BG;
175     case MP_CSP_PRIM_BT_709:    return ZIMG_PRIMARIES_BT709;
176     case MP_CSP_PRIM_BT_2020:   return ZIMG_PRIMARIES_BT2020;
177     case MP_CSP_PRIM_BT_470M:   return ZIMG_PRIMARIES_BT470_M;
178     case MP_CSP_PRIM_CIE_1931:  return ZIMG_PRIMARIES_ST428;
179     case MP_CSP_PRIM_DCI_P3:    return ZIMG_PRIMARIES_ST431_2;
180     case MP_CSP_PRIM_DISPLAY_P3:return ZIMG_PRIMARIES_ST432_1;
181     case MP_CSP_PRIM_APPLE:     // ?
182     case MP_CSP_PRIM_ADOBE:
183     case MP_CSP_PRIM_PRO_PHOTO:
184     case MP_CSP_PRIM_V_GAMUT:
185     case MP_CSP_PRIM_S_GAMUT:   // ?
186     default:                    return ZIMG_PRIMARIES_BT709;
187     }
188 }
189 
destroy_zimg(struct mp_zimg_context * ctx)190 static void destroy_zimg(struct mp_zimg_context *ctx)
191 {
192     for (int n = 0; n < ctx->num_states; n++) {
193         struct mp_zimg_state *st = ctx->states[n];
194         talloc_free(st->tmp_alloc);
195         zimg_filter_graph_free(st->graph);
196         TA_FREEP(&st->src);
197         TA_FREEP(&st->dst);
198         talloc_free(st);
199     }
200     ctx->num_states = 0;
201 }
202 
free_mp_zimg(void * p)203 static void free_mp_zimg(void *p)
204 {
205     struct mp_zimg_context *ctx = p;
206 
207     destroy_zimg(ctx);
208     TA_FREEP(&ctx->tp);
209 }
210 
mp_zimg_alloc(void)211 struct mp_zimg_context *mp_zimg_alloc(void)
212 {
213     struct mp_zimg_context *ctx = talloc_ptrtype(NULL, ctx);
214     *ctx = (struct mp_zimg_context) {
215         .log = mp_null_log,
216     };
217     ctx->opts = *(struct zimg_opts *)zimg_conf.defaults;
218     talloc_set_destructor(ctx, free_mp_zimg);
219     return ctx;
220 }
221 
mp_zimg_enable_cmdline_opts(struct mp_zimg_context * ctx,struct mpv_global * g)222 void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx,
223                                  struct mpv_global *g)
224 {
225     if (ctx->opts_cache)
226         return;
227 
228     ctx->opts_cache = m_config_cache_alloc(ctx, g, &zimg_conf);
229     destroy_zimg(ctx); // force update
230     mp_zimg_update_from_cmdline(ctx); // first update
231 }
232 
repack_entrypoint(void * user,unsigned i,unsigned x0,unsigned x1)233 static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1)
234 {
235     struct mp_zimg_repack *r = user;
236 
237     // If reading is not aligned, just read slightly more data.
238     if (!r->pack)
239         x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1);
240 
241     // mp_repack requirements and zimg guarantees.
242     assert(!(i & (mp_repack_get_align_y(r->repack) - 1)));
243     assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1)));
244 
245     unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX);
246     unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]);
247 
248     repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0);
249 
250     return 0;
251 }
252 
wrap_buffer(struct mp_zimg_state * st,struct mp_zimg_repack * r,struct mp_image * a_mpi)253 static bool wrap_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r,
254                         struct mp_image *a_mpi)
255 {
256     zimg_image_buffer *buf = &r->zbuf;
257     *buf = (zimg_image_buffer){ZIMG_API_VERSION};
258 
259     struct mp_image *mpi = a_mpi;
260     if (r->pack) {
261         mpi = &r->cropped_tmp;
262         *mpi = *a_mpi;
263         mp_image_crop(mpi, 0, st->slice_y, mpi->w, st->slice_y + st->slice_h);
264     }
265 
266     bool direct[MP_MAX_PLANES] = {0};
267 
268     for (int p = 0; p < mpi->num_planes; p++) {
269         // If alignment is good, try to avoid copy.
270         direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) &&
271                     !(mpi->stride[p] % ZIMG_ALIGN);
272     }
273 
274     if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp,
275                                           0, r->pack ? r->tmp : mpi, direct))
276         return false;
277 
278     for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) {
279         // Note: this is really the only place we have to care about plane
280         // permutation (zimg_image_buffer may have a different plane order
281         // than the shadow mpi like r->tmp). We never use the zimg indexes
282         // in other places.
283         int mplane = r->z_planes[n];
284         if (mplane < 0)
285             continue;
286 
287         struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp;
288         buf->plane[n].data = tmpi->planes[mplane];
289         buf->plane[n].stride = tmpi->stride[mplane];
290         buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane];
291     }
292 
293     return true;
294 }
295 
296 // (ctx and st can be NULL for probing.)
setup_format(zimg_image_format * zfmt,struct mp_zimg_repack * r,bool pack,struct mp_image_params * user_fmt,struct mp_zimg_context * ctx,struct mp_zimg_state * st)297 static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
298                          bool pack, struct mp_image_params *user_fmt,
299                          struct mp_zimg_context *ctx,
300                          struct mp_zimg_state *st)
301 {
302     r->fmt = *user_fmt;
303     r->pack = pack;
304 
305     zimg_image_format_default(zfmt, ZIMG_API_VERSION);
306 
307     int rp_flags = 0;
308 
309     // For e.g. RGB565, go to lowest depth on pack for less weird dithering.
310     if (r->pack) {
311         rp_flags |= REPACK_CREATE_ROUND_DOWN;
312     } else {
313         rp_flags |= REPACK_CREATE_EXPAND_8BIT;
314     }
315 
316     r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags);
317     if (!r->repack)
318         return false;
319 
320     int align_x = mp_repack_get_align_x(r->repack);
321 
322     r->zimgfmt = r->pack ? mp_repack_get_format_src(r->repack)
323                          : mp_repack_get_format_dst(r->repack);
324 
325     if (ctx) {
326         talloc_steal(r, r->repack);
327     } else {
328         TA_FREEP(&r->repack);
329     }
330 
331     struct mp_image_params fmt = r->fmt;
332     mp_image_params_guess_csp(&fmt);
333 
334     struct mp_regular_imgfmt desc;
335     if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
336         return false;
337 
338     // Relies on zimg callbacks reading on 64 byte alignment.
339     if (!MP_IS_POWER_OF_2(align_x) || align_x > 64 / desc.component_size)
340         return false;
341 
342     // no weird stuff
343     if (desc.num_planes > 4)
344         return false;
345 
346     for (int n = 0; n < 4; n++)
347         r->z_planes[n] = -1;
348 
349     for (int n = 0; n < desc.num_planes; n++) {
350         if (desc.planes[n].num_components != 1)
351             return false;
352         int c = desc.planes[n].components[0];
353         if (c < 1 || c > 4)
354             return false;
355         if (c < 4) {
356             // Unfortunately, ffmpeg prefers GBR order for planar RGB, while zimg
357             // is sane. This makes it necessary to determine and fix the order.
358             r->z_planes[c - 1] = n;
359         } else {
360             r->z_planes[3] = n; // alpha, always plane 4 in zimg
361 
362 #if HAVE_ZIMG_ALPHA
363             zfmt->alpha = fmt.alpha == MP_ALPHA_PREMUL
364                 ? ZIMG_ALPHA_PREMULTIPLIED : ZIMG_ALPHA_STRAIGHT;
365 #else
366             return false;
367 #endif
368         }
369     }
370 
371     r->num_planes = desc.num_planes;
372 
373     // Take care of input/output size, including slicing.
374     // Note: formats with subsampled chroma may have odd width or height in
375     // mpv and FFmpeg. This is because the width/height is actually a cropping
376     // rectangle. Reconstruct the image allocation size and set the cropping.
377     zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, 1 << desc.chroma_xs);
378     zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, 1 << desc.chroma_ys);
379     if (st) {
380         if (r->pack) {
381             zfmt->height = r->real_h = st->slice_h =
382                 MPMIN(st->slice_y + st->slice_h, r->real_h) - st->slice_y;
383 
384             assert(MP_IS_ALIGNED(r->real_h, 1 << desc.chroma_ys));
385         } else {
386             // Relies on st->dst being initialized first.
387             struct mp_zimg_repack *dst = st->dst;
388 
389             zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w;
390             zfmt->active_region.height = dst->real_h * st->scale_y;
391 
392             zfmt->active_region.top = st->slice_y * st->scale_y;
393         }
394     }
395 
396     zfmt->subsample_w = desc.chroma_xs;
397     zfmt->subsample_h = desc.chroma_ys;
398 
399     zfmt->color_family = ZIMG_COLOR_YUV;
400     if (desc.num_planes <= 2) {
401         zfmt->color_family = ZIMG_COLOR_GREY;
402     } else if (fmt.color.space == MP_CSP_RGB || fmt.color.space == MP_CSP_XYZ) {
403         zfmt->color_family = ZIMG_COLOR_RGB;
404     }
405 
406     if (desc.component_type == MP_COMPONENT_TYPE_UINT &&
407         desc.component_size == 1)
408     {
409         zfmt->pixel_type = ZIMG_PIXEL_BYTE;
410     } else if (desc.component_type == MP_COMPONENT_TYPE_UINT &&
411                desc.component_size == 2)
412     {
413         zfmt->pixel_type = ZIMG_PIXEL_WORD;
414     } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT &&
415                desc.component_size == 2)
416     {
417         zfmt->pixel_type = ZIMG_PIXEL_HALF;
418     } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT &&
419                desc.component_size == 4)
420     {
421         zfmt->pixel_type = ZIMG_PIXEL_FLOAT;
422     } else {
423         return false;
424     }
425 
426     // (Formats like P010 are basically reported as P016.)
427     zfmt->depth = desc.component_size * 8 + MPMIN(0, desc.component_pad);
428 
429     zfmt->pixel_range = fmt.color.levels == MP_CSP_LEVELS_PC ?
430                         ZIMG_RANGE_FULL : ZIMG_RANGE_LIMITED;
431 
432     zfmt->matrix_coefficients = mp_to_z_matrix(fmt.color.space);
433     zfmt->transfer_characteristics = mp_to_z_trc(fmt.color.gamma);
434     zfmt->color_primaries = mp_to_z_prim(fmt.color.primaries);
435     zfmt->chroma_location = mp_to_z_chroma(fmt.chroma_location);
436 
437     if (ctx && ctx->opts.fast) {
438         // mpv's default for RGB output slows down zimg significantly.
439         if (zfmt->transfer_characteristics == ZIMG_TRANSFER_IEC_61966_2_1 &&
440             zfmt->color_family == ZIMG_COLOR_RGB)
441             zfmt->transfer_characteristics = ZIMG_TRANSFER_BT709;
442     }
443 
444     // mpv treats _some_ gray formats as RGB; zimg doesn't like this.
445     if (zfmt->color_family == ZIMG_COLOR_GREY &&
446         zfmt->matrix_coefficients == ZIMG_MATRIX_RGB)
447         zfmt->matrix_coefficients = ZIMG_MATRIX_BT470_BG;
448 
449     return true;
450 }
451 
allocate_buffer(struct mp_zimg_state * st,struct mp_zimg_repack * r)452 static bool allocate_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r)
453 {
454     unsigned lines = 0;
455     int err;
456     if (r->pack) {
457         err = zimg_filter_graph_get_output_buffering(st->graph, &lines);
458     } else {
459         err = zimg_filter_graph_get_input_buffering(st->graph, &lines);
460     }
461 
462     if (err)
463         return false;
464 
465     r->zmask[0] = zimg_select_buffer_mask(lines);
466 
467     // Either ZIMG_BUFFER_MAX, or a power-of-2 slice buffer.
468     assert(r->zmask[0] == ZIMG_BUFFER_MAX || MP_IS_POWER_OF_2(r->zmask[0] + 1));
469 
470     int h = r->zmask[0] == ZIMG_BUFFER_MAX ? r->real_h : r->zmask[0] + 1;
471     if (h >= r->real_h) {
472         h = r->real_h;
473         r->zmask[0] = ZIMG_BUFFER_MAX;
474     }
475 
476     r->tmp = mp_image_alloc(r->zimgfmt, r->real_w, h);
477     talloc_steal(r, r->tmp);
478 
479     if (!r->tmp)
480         return false;
481 
482     // Note: although zimg doesn't require that the chroma plane's zmask is
483     //       divided by the full size zmask, the repack callback requires it,
484     //       since mp_repack can handle only proper slices.
485     for (int n = 1; n < r->tmp->fmt.num_planes; n++) {
486         r->zmask[n] = r->zmask[0];
487         if (r->zmask[0] != ZIMG_BUFFER_MAX)
488             r->zmask[n] = r->zmask[n] >> r->tmp->fmt.ys[n];
489     }
490 
491     return true;
492 }
493 
mp_zimg_state_init(struct mp_zimg_context * ctx,struct mp_zimg_state * st,int slice_y,int slice_h)494 static bool mp_zimg_state_init(struct mp_zimg_context *ctx,
495                                struct mp_zimg_state *st,
496                                int slice_y, int slice_h)
497 {
498     struct zimg_opts *opts = &ctx->opts;
499 
500     st->src = talloc_zero(NULL, struct mp_zimg_repack);
501     st->dst = talloc_zero(NULL, struct mp_zimg_repack);
502 
503     st->scale_y = ctx->src.h / (double)ctx->dst.h;
504     st->slice_y = slice_y;
505     st->slice_h = slice_h;
506 
507     zimg_image_format src_fmt, dst_fmt;
508 
509     // Note: do dst first, because src uses fields from dst.
510     if (!setup_format(&dst_fmt, st->dst, true, &ctx->dst, ctx, st) ||
511         !setup_format(&src_fmt, st->src, false, &ctx->src, ctx, st))
512         return false;
513 
514     zimg_graph_builder_params params;
515     zimg_graph_builder_params_default(&params, ZIMG_API_VERSION);
516 
517     params.resample_filter = opts->scaler;
518     params.filter_param_a = opts->scaler_params[0];
519     params.filter_param_b = opts->scaler_params[1];
520 
521     params.resample_filter_uv = opts->scaler_chroma;
522     params.filter_param_a_uv = opts->scaler_chroma_params[0];
523     params.filter_param_b_uv = opts->scaler_chroma_params[1];
524 
525     params.dither_type = opts->dither;
526 
527     params.cpu_type = ZIMG_CPU_AUTO_64B;
528 
529     if (opts->fast)
530         params.allow_approximate_gamma = 1;
531 
532     if (ctx->src.color.sig_peak > 0)
533         params.nominal_peak_luminance = ctx->src.color.sig_peak;
534 
535     st->graph = zimg_filter_graph_build(&src_fmt, &dst_fmt, &params);
536     if (!st->graph) {
537         char err[128] = {0};
538         zimg_get_last_error(err, sizeof(err) - 1);
539         MP_ERR(ctx, "zimg_filter_graph_build: %s \n", err);
540         return false;
541     }
542 
543     size_t tmp_size;
544     if (!zimg_filter_graph_get_tmp_size(st->graph, &tmp_size)) {
545         tmp_size = MP_ALIGN_UP(tmp_size, ZIMG_ALIGN) + ZIMG_ALIGN;
546         st->tmp_alloc = ta_alloc_size(NULL, tmp_size);
547         if (st->tmp_alloc)
548             st->tmp = (void *)MP_ALIGN_UP((uintptr_t)st->tmp_alloc, ZIMG_ALIGN);
549     }
550 
551     if (!st->tmp_alloc)
552         return false;
553 
554     if (!allocate_buffer(st, st->src) || !allocate_buffer(st, st->dst))
555         return false;
556 
557     return true;
558 }
559 
mp_zimg_config(struct mp_zimg_context * ctx)560 bool mp_zimg_config(struct mp_zimg_context *ctx)
561 {
562     destroy_zimg(ctx);
563 
564     if (ctx->opts_cache)
565         mp_zimg_update_from_cmdline(ctx);
566 
567     int slices = ctx->opts.threads;
568     if (slices < 1)
569         slices = av_cpu_count();
570     slices = MPCLAMP(slices, 1, 64);
571 
572     struct mp_imgfmt_desc dstfmt = mp_imgfmt_get_desc(ctx->dst.imgfmt);
573     if (!dstfmt.align_y)
574         goto fail;
575     int full_h = MP_ALIGN_UP(ctx->dst.h, dstfmt.align_y);
576     int slice_h = (full_h + slices - 1) / slices;
577     slice_h = MP_ALIGN_UP(slice_h, dstfmt.align_y);
578     slice_h = MP_ALIGN_UP(slice_h, 64); // for dithering and minimum slice size
579     slices = (full_h + slice_h - 1) / slice_h;
580 
581     int threads = slices - 1;
582     if (threads != ctx->current_thread_count) {
583         // Just destroy and recreate all - dumb and costly, but rarely happens.
584         TA_FREEP(&ctx->tp);
585         ctx->current_thread_count = 0;
586         if (threads) {
587             MP_VERBOSE(ctx, "using %d threads for scaling\n", threads);
588             ctx->tp = mp_thread_pool_create(NULL, threads, threads, threads);
589             if (!ctx->tp)
590                 goto fail;
591             ctx->current_thread_count = threads;
592         }
593     }
594 
595     for (int n = 0; n < slices; n++) {
596         struct mp_zimg_state *st = talloc_zero(NULL, struct mp_zimg_state);
597         MP_TARRAY_APPEND(ctx, ctx->states, ctx->num_states, st);
598 
599         if (!mp_zimg_state_init(ctx, st, n * slice_h, slice_h))
600             goto fail;
601     }
602 
603     assert(ctx->num_states == slices);
604 
605     return true;
606 
607 fail:
608     destroy_zimg(ctx);
609     return false;
610 }
611 
mp_zimg_config_image_params(struct mp_zimg_context * ctx)612 bool mp_zimg_config_image_params(struct mp_zimg_context *ctx)
613 {
614     if (ctx->num_states) {
615         // All states are the same, so checking only one of them is sufficient.
616         struct mp_zimg_state *st = ctx->states[0];
617         if (st->src && mp_image_params_equal(&ctx->src, &st->src->fmt) &&
618             st->dst && mp_image_params_equal(&ctx->dst, &st->dst->fmt) &&
619             (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache)) &&
620             st->graph)
621             return true;
622     }
623     return mp_zimg_config(ctx);
624 }
625 
do_convert(struct mp_zimg_state * st)626 static void do_convert(struct mp_zimg_state *st)
627 {
628     assert(st->graph);
629 
630     // An annoyance.
631     zimg_image_buffer *zsrc = &st->src->zbuf;
632     zimg_image_buffer_const zsrc_c = {ZIMG_API_VERSION};
633     for (int n = 0; n < MP_ARRAY_SIZE(zsrc_c.plane); n++) {
634         zsrc_c.plane[n].data = zsrc->plane[n].data;
635         zsrc_c.plane[n].stride = zsrc->plane[n].stride;
636         zsrc_c.plane[n].mask = zsrc->plane[n].mask;
637     }
638 
639     // (The API promises to succeed if no user callbacks fail, so no need
640     // to check the return value.)
641     zimg_filter_graph_process(st->graph, &zsrc_c, &st->dst->zbuf, st->tmp,
642                               repack_entrypoint, st->src,
643                               repack_entrypoint, st->dst);
644 }
645 
do_convert_thread(void * ptr)646 static void do_convert_thread(void *ptr)
647 {
648     struct mp_zimg_state *st = ptr;
649 
650     do_convert(st);
651     mp_waiter_wakeup(&st->thread_waiter, 0);
652 }
653 
mp_zimg_convert(struct mp_zimg_context * ctx,struct mp_image * dst,struct mp_image * src)654 bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst,
655                      struct mp_image *src)
656 {
657     ctx->src = src->params;
658     ctx->dst = dst->params;
659 
660     if (!mp_zimg_config_image_params(ctx)) {
661         MP_ERR(ctx, "zimg initialization failed.\n");
662         return false;
663     }
664 
665     for (int n = 0; n < ctx->num_states; n++) {
666         struct mp_zimg_state *st = ctx->states[n];
667 
668         if (!wrap_buffer(st, st->src, src) || !wrap_buffer(st, st->dst, dst)) {
669             MP_ERR(ctx, "zimg repacker initialization failed.\n");
670             return false;
671         }
672     }
673 
674     for (int n = 1; n < ctx->num_states; n++) {
675         struct mp_zimg_state *st = ctx->states[n];
676 
677         st->thread_waiter = (struct mp_waiter)MP_WAITER_INITIALIZER;
678 
679         bool r = mp_thread_pool_run(ctx->tp, do_convert_thread, st);
680         // This is guaranteed by the API; and unrolling would be inconvenient.
681         assert(r);
682     }
683 
684     do_convert(ctx->states[0]);
685 
686     for (int n = 1; n < ctx->num_states; n++) {
687         struct mp_zimg_state *st = ctx->states[n];
688 
689         mp_waiter_wait(&st->thread_waiter);
690     }
691 
692     return true;
693 }
694 
supports_format(int imgfmt,bool out)695 static bool supports_format(int imgfmt, bool out)
696 {
697     struct mp_image_params fmt = {.imgfmt = imgfmt};
698     struct mp_zimg_repack t;
699     zimg_image_format zfmt;
700     return setup_format(&zfmt, &t, out, &fmt, NULL, NULL);
701 }
702 
mp_zimg_supports_in_format(int imgfmt)703 bool mp_zimg_supports_in_format(int imgfmt)
704 {
705     return supports_format(imgfmt, false);
706 }
707 
mp_zimg_supports_out_format(int imgfmt)708 bool mp_zimg_supports_out_format(int imgfmt)
709 {
710     return supports_format(imgfmt, true);
711 }
712