1 /*
2 * This file is part of mpv.
3 *
4 * mpv is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * mpv is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <math.h>
19
20 #include <libavutil/bswap.h>
21 #include <libavutil/pixfmt.h>
22
23 #include "common/common.h"
24 #include "repack.h"
25 #include "video/csputils.h"
26 #include "video/fmt-conversion.h"
27 #include "video/img_format.h"
28 #include "video/mp_image.h"
29
30 enum repack_step_type {
31 REPACK_STEP_FLOAT,
32 REPACK_STEP_REPACK,
33 REPACK_STEP_ENDIAN,
34 };
35
36 struct repack_step {
37 enum repack_step_type type;
38 // 0=input, 1=output
39 struct mp_image *buf[2];
40 bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer
41 struct mp_imgfmt_desc fmt[2];
42 struct mp_image *tmp; // output buffer, if needed
43 };
44
45 struct mp_repack {
46 bool pack; // if false, this is for unpacking
47 int flags;
48 int imgfmt_user; // original mp format (unchanged endian)
49 int imgfmt_a; // original mp format (possibly packed format,
50 // swapped endian)
51 int imgfmt_b; // equivalent unpacked/planar format
52 struct mp_imgfmt_desc fmt_a;// ==imgfmt_a
53 struct mp_imgfmt_desc fmt_b;// ==imgfmt_b
54
55 void (*repack)(struct mp_repack *rp,
56 struct mp_image *a, int a_x, int a_y,
57 struct mp_image *b, int b_x, int b_y, int w);
58
59 bool passthrough_y; // possible luma plane optimization for e.g. nv12
60 int endian_size; // endian swap; 0=none, 2/4=swap word size
61
62 // For packed_repack.
63 int components[4]; // b[n] = mp_image.planes[components[n]]
64 // pack: a is dst, b is src
65 // unpack: a is src, b is dst
66 void (*packed_repack_scanline)(void *a, void *b[], int w);
67
68 // Fringe RGB/YUV.
69 uint8_t comp_size;
70 uint8_t comp_map[6];
71 uint8_t comp_shifts[3];
72 uint8_t *comp_lut;
73 void (*repack_fringe_yuv)(void *dst, void *src[], int w, uint8_t *c);
74
75 // F32 repacking.
76 int f32_comp_size;
77 float f32_m[4], f32_o[4];
78 uint32_t f32_pmax[4];
79 enum mp_csp f32_csp_space;
80 enum mp_csp_levels f32_csp_levels;
81
82 // REPACK_STEP_REPACK: if true, need to copy this plane
83 bool copy_buf[4];
84
85 struct repack_step steps[4];
86 int num_steps;
87
88 bool configured;
89 };
90
91 // depth = number of LSB in use
find_gbrp_format(int depth,int num_planes)92 static int find_gbrp_format(int depth, int num_planes)
93 {
94 if (num_planes != 3 && num_planes != 4)
95 return 0;
96 struct mp_regular_imgfmt desc = {
97 .component_type = MP_COMPONENT_TYPE_UINT,
98 .forced_csp = MP_CSP_RGB,
99 .component_size = depth > 8 ? 2 : 1,
100 .component_pad = depth - (depth > 8 ? 16 : 8),
101 .num_planes = num_planes,
102 .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
103 };
104 return mp_find_regular_imgfmt(&desc);
105 }
106
107 // depth = number of LSB in use
find_yuv_format(int depth,int num_planes)108 static int find_yuv_format(int depth, int num_planes)
109 {
110 if (num_planes < 1 || num_planes > 4)
111 return 0;
112 struct mp_regular_imgfmt desc = {
113 .component_type = MP_COMPONENT_TYPE_UINT,
114 .component_size = depth > 8 ? 2 : 1,
115 .component_pad = depth - (depth > 8 ? 16 : 8),
116 .num_planes = num_planes,
117 .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} },
118 };
119 if (num_planes == 2)
120 desc.planes[1].components[0] = 4;
121 return mp_find_regular_imgfmt(&desc);
122 }
123
124 // Copy one line on the plane p.
copy_plane(struct mp_image * dst,int dst_x,int dst_y,struct mp_image * src,int src_x,int src_y,int w,int p)125 static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
126 struct mp_image *src, int src_x, int src_y,
127 int w, int p)
128 {
129 // Number of lines on this plane.
130 int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
131 size_t size = mp_image_plane_bytes(dst, p, dst_x, w);
132
133 assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);
134
135 for (int y = 0; y < h; y++) {
136 void *pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
137 void *ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
138 memcpy(pd, ps, size);
139 }
140 }
141
142 // Swap endian for one line.
swap_endian(struct mp_image * dst,int dst_x,int dst_y,struct mp_image * src,int src_x,int src_y,int w,int endian_size)143 static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
144 struct mp_image *src, int src_x, int src_y,
145 int w, int endian_size)
146 {
147 assert(src->fmt.num_planes == dst->fmt.num_planes);
148
149 for (int p = 0; p < dst->fmt.num_planes; p++) {
150 int xs = dst->fmt.xs[p];
151 int bpp = dst->fmt.bpp[p] / 8;
152 int words_per_pixel = bpp / endian_size;
153 int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel;
154 // Number of lines on this plane.
155 int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
156
157 assert(src->fmt.bpp[p] == bpp * 8);
158
159 for (int y = 0; y < h; y++) {
160 void *s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
161 void *d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
162 switch (endian_size) {
163 case 2:
164 for (int x = 0; x < num_words; x++)
165 ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]);
166 break;
167 case 4:
168 for (int x = 0; x < num_words; x++)
169 ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]);
170 break;
171 default:
172 assert(0);
173 }
174 }
175 }
176 }
177
178 // PA = PAck, copy planar input to single packed array
179 // UN = UNpack, copy packed input to planar output
180 // Naming convention:
181 // pa_/un_ prefix to identify conversion direction.
182 // Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
183 // (This is unusual; MSB to LSB is more commonly used to describe formats,
184 // but our convention makes more sense for byte access in little endian.)
185 // "c" identifies a color component.
186 // "z" identifies known zero padding.
187 // "x" identifies uninitialized padding.
188 // A component is followed by its size in bits.
189 // Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
190 // Unpackers will often use "x" for padding, because they ignore it, while
191 // packers will use "z" because they write zero.
192
193 #define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \
194 static void name(void *dst, void *src[], int w) { \
195 for (int x = 0; x < w; x++) { \
196 ((packed_t *)dst)[x] = \
197 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
198 ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \
199 ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \
200 ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \
201 } \
202 }
203
204 #define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
205 static void name(void *src, void *dst[], int w) { \
206 for (int x = 0; x < w; x++) { \
207 packed_t c = ((packed_t *)src)[x]; \
208 ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
209 ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
210 ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \
211 ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \
212 } \
213 }
214
215
216 #define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \
217 static void name(void *dst, void *src[], int w) { \
218 for (int x = 0; x < w; x++) { \
219 ((packed_t *)dst)[x] = (pad) | \
220 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
221 ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \
222 ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \
223 } \
224 }
225
226 UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu)
227 PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24)
228 // Not sure if this is a good idea; there may be no alignment guarantee.
229 UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu)
230 PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48)
231
232 #define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \
233 static void name(void *src, void *dst[], int w) { \
234 for (int x = 0; x < w; x++) { \
235 packed_t c = ((packed_t *)src)[x]; \
236 ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
237 ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
238 ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \
239 } \
240 }
241
242 UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu)
243 PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0)
244 UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu)
245 PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0)
246 UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
247 PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0)
248
249 #define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \
250 static void name(void *dst, void *src[], int w) { \
251 for (int x = 0; x < w; x++) { \
252 ((packed_t *)dst)[x] = (pad) | \
253 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
254 ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \
255 } \
256 }
257
258 #define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \
259 static void name(void *src, void *dst[], int w) { \
260 for (int x = 0; x < w; x++) { \
261 packed_t c = ((packed_t *)src)[x]; \
262 ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
263 ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
264 } \
265 }
266
267 UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu)
268 PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0)
269 UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
270 PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
271
272 #define PA_SEQ_3(name, comp_t) \
273 static void name(void *dst, void *src[], int w) { \
274 comp_t *r = dst; \
275 for (int x = 0; x < w; x++) { \
276 *r++ = ((comp_t *)src[0])[x]; \
277 *r++ = ((comp_t *)src[1])[x]; \
278 *r++ = ((comp_t *)src[2])[x]; \
279 } \
280 }
281
282 #define UN_SEQ_3(name, comp_t) \
283 static void name(void *src, void *dst[], int w) { \
284 comp_t *r = src; \
285 for (int x = 0; x < w; x++) { \
286 ((comp_t *)dst[0])[x] = *r++; \
287 ((comp_t *)dst[1])[x] = *r++; \
288 ((comp_t *)dst[2])[x] = *r++; \
289 } \
290 }
291
292 UN_SEQ_3(un_ccc8, uint8_t)
293 PA_SEQ_3(pa_ccc8, uint8_t)
294 UN_SEQ_3(un_ccc16, uint16_t)
295 PA_SEQ_3(pa_ccc16, uint16_t)
296
297 // "regular": single packed plane, all components have same width (except padding)
298 struct regular_repacker {
299 int packed_width; // number of bits of the packed pixel
300 int component_width; // number of bits for a single component
301 int prepadding; // number of bits of LSB padding
302 int num_components; // number of components that can be accessed
303 void (*pa_scanline)(void *a, void *b[], int w);
304 void (*un_scanline)(void *a, void *b[], int w);
305 };
306
307 static const struct regular_repacker regular_repackers[] = {
308 {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8},
309 {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8},
310 {32, 8, 0, 4, pa_cccc8, un_cccc8},
311 {64, 16, 0, 4, pa_cccc16, un_cccc16},
312 {24, 8, 0, 3, pa_ccc8, un_ccc8},
313 {48, 16, 0, 3, pa_ccc16, un_ccc16},
314 {16, 8, 0, 2, pa_cc8, un_cc8},
315 {32, 16, 0, 2, pa_cc16, un_cc16},
316 {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
317 };
318
packed_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)319 static void packed_repack(struct mp_repack *rp,
320 struct mp_image *a, int a_x, int a_y,
321 struct mp_image *b, int b_x, int b_y, int w)
322 {
323 uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
324
325 void *pb[4] = {0};
326 for (int p = 0; p < b->num_planes; p++) {
327 int s = rp->components[p];
328 pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
329 }
330
331 rp->packed_repack_scanline(pa, pb, w);
332 }
333
334 // Tries to set a packer/unpacker for component-wise byte aligned formats.
setup_packed_packer(struct mp_repack * rp)335 static void setup_packed_packer(struct mp_repack *rp)
336 {
337 struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
338 if (!(desc.flags & MP_IMGFLAG_HAS_COMPS) ||
339 !(desc.flags & MP_IMGFLAG_TYPE_UINT) ||
340 !(desc.flags & MP_IMGFLAG_NE) ||
341 desc.num_planes != 1)
342 return;
343
344 int num_real_components = 0;
345 int components[4] = {0};
346 for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
347 if (!desc.comps[n].size)
348 continue;
349 if (desc.comps[n].size != desc.comps[0].size ||
350 desc.comps[n].pad != desc.comps[0].pad ||
351 desc.comps[n].offset % desc.comps[0].size)
352 return;
353 int item = desc.comps[n].offset / desc.comps[0].size;
354 if (item >= 4)
355 return;
356 components[item] = n + 1;
357 num_real_components++;
358 }
359
360 int depth = desc.comps[0].size + MPMIN(0, desc.comps[0].pad);
361
362 static const int reorder_gbrp[] = {0, 3, 1, 2, 4};
363 static const int reorder_yuv[] = {0, 1, 2, 3, 4};
364 int planar_fmt = 0;
365 const int *reorder = NULL;
366 if (desc.flags & MP_IMGFLAG_COLOR_YUV) {
367 planar_fmt = find_yuv_format(depth, num_real_components);
368 reorder = reorder_yuv;
369 } else {
370 planar_fmt = find_gbrp_format(depth, num_real_components);
371 reorder = reorder_gbrp;
372 }
373 if (!planar_fmt)
374 return;
375
376 for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
377 const struct regular_repacker *pa = ®ular_repackers[i];
378
379 // The following may assume little endian (because some repack backends
380 // use word access, while the metadata here uses byte access).
381
382 int prepad = components[0] ? 0 : 8;
383 int first_comp = components[0] ? 0 : 1;
384 void (*repack_cb)(void *pa, void *pb[], int w) =
385 rp->pack ? pa->pa_scanline : pa->un_scanline;
386
387 if (pa->packed_width != desc.bpp[0] ||
388 pa->component_width != depth ||
389 pa->num_components != num_real_components ||
390 pa->prepadding != prepad ||
391 !repack_cb)
392 continue;
393
394 rp->repack = packed_repack;
395 rp->packed_repack_scanline = repack_cb;
396 rp->imgfmt_b = planar_fmt;
397 for (int n = 0; n < num_real_components; n++) {
398 // Determine permutation that maps component order between the two
399 // formats, with has_alpha special case (see above).
400 int c = reorder[components[first_comp + n]];
401 rp->components[n] = c == 4 ? num_real_components - 1 : c - 1;
402 }
403 return;
404 }
405 }
406
407 #define PA_SHIFT_LUT8(name, packed_t) \
408 static void name(void *dst, void *src[], int w, uint8_t *lut, \
409 uint8_t s0, uint8_t s1, uint8_t s2) { \
410 for (int x = 0; x < w; x++) { \
411 ((packed_t *)dst)[x] = \
412 (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \
413 (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) | \
414 (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2); \
415 } \
416 }
417
418
419 #define UN_SHIFT_LUT8(name, packed_t) \
420 static void name(void *src, void *dst[], int w, uint8_t *lut, \
421 uint8_t s0, uint8_t s1, uint8_t s2) { \
422 for (int x = 0; x < w; x++) { \
423 packed_t c = ((packed_t *)src)[x]; \
424 ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \
425 ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1]; \
426 ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2]; \
427 } \
428 }
429
PA_SHIFT_LUT8(pa_shift_lut8_8,uint8_t)430 PA_SHIFT_LUT8(pa_shift_lut8_8, uint8_t)
431 PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
432 UN_SHIFT_LUT8(un_shift_lut8_8, uint8_t)
433 UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)
434
435 static void fringe_rgb_repack(struct mp_repack *rp,
436 struct mp_image *a, int a_x, int a_y,
437 struct mp_image *b, int b_x, int b_y, int w)
438 {
439 void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
440
441 void *pb[4] = {0};
442 for (int p = 0; p < b->num_planes; p++) {
443 int s = rp->components[p];
444 pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
445 }
446
447 assert(rp->comp_size == 1 || rp->comp_size == 2);
448
449 void (*repack)(void *pa, void *pb[], int w, uint8_t *lut,
450 uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
451 if (rp->pack) {
452 repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
453 } else {
454 repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
455 }
456 repack(pa, pb, w, rp->comp_lut,
457 rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]);
458 }
459
setup_fringe_rgb_packer(struct mp_repack * rp)460 static void setup_fringe_rgb_packer(struct mp_repack *rp)
461 {
462 struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
463 if (!(desc.flags & MP_IMGFLAG_HAS_COMPS))
464 return;
465
466 if (desc.bpp[0] > 16 || (desc.bpp[0] % 8u) ||
467 mp_imgfmt_get_forced_csp(rp->imgfmt_a) != MP_CSP_RGB ||
468 desc.num_planes != 1 || desc.comps[3].size)
469 return;
470
471 int depth = desc.comps[0].size;
472 for (int n = 0; n < 3; n++) {
473 struct mp_imgfmt_comp_desc *c = &desc.comps[n];
474
475 if (c->size < 1 || c->size > 8 || c->pad)
476 return;
477
478 if (rp->flags & REPACK_CREATE_ROUND_DOWN) {
479 depth = MPMIN(depth, c->size);
480 } else {
481 depth = MPMAX(depth, c->size);
482 }
483 }
484 if (rp->flags & REPACK_CREATE_EXPAND_8BIT)
485 depth = 8;
486
487 rp->imgfmt_b = find_gbrp_format(depth, 3);
488 if (!rp->imgfmt_b)
489 return;
490 rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3);
491 rp->repack = fringe_rgb_repack;
492 for (int n = 0; n < 3; n++)
493 rp->components[n] = ((int[]){3, 1, 2})[n] - 1;
494
495 for (int n = 0; n < 3; n++) {
496 int bits = desc.comps[n].size;
497 rp->comp_shifts[n] = desc.comps[n].offset;
498 if (rp->comp_lut) {
499 uint8_t *lut = rp->comp_lut + 256 * n;
500 uint8_t zmax = (1 << depth) - 1;
501 uint8_t cmax = (1 << bits) - 1;
502 for (int v = 0; v < 256; v++) {
503 if (rp->pack) {
504 lut[v] = (v * cmax + zmax / 2) / zmax;
505 } else {
506 lut[v] = (v & cmax) * zmax / cmax;
507 }
508 }
509 }
510 }
511
512 rp->comp_size = (desc.bpp[0] + 7) / 8;
513 assert(rp->comp_size == 1 || rp->comp_size == 2);
514
515 if (desc.endian_shift) {
516 assert(rp->comp_size == 2 && (1 << desc.endian_shift) == 2);
517 rp->endian_size = 2;
518 }
519 }
520
unpack_pal(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)521 static void unpack_pal(struct mp_repack *rp,
522 struct mp_image *a, int a_x, int a_y,
523 struct mp_image *b, int b_x, int b_y, int w)
524 {
525 uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y);
526 uint32_t *pal = (void *)a->planes[1];
527
528 uint8_t *dst[4] = {0};
529 for (int p = 0; p < b->num_planes; p++)
530 dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
531
532 for (int x = 0; x < w; x++) {
533 uint32_t c = pal[src[x]];
534 dst[0][x] = (c >> 8) & 0xFF; // G
535 dst[1][x] = (c >> 0) & 0xFF; // B
536 dst[2][x] = (c >> 16) & 0xFF; // R
537 dst[3][x] = (c >> 24) & 0xFF; // A
538 }
539 }
540
bitmap_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)541 static void bitmap_repack(struct mp_repack *rp,
542 struct mp_image *a, int a_x, int a_y,
543 struct mp_image *b, int b_x, int b_y, int w)
544 {
545 uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
546 uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y);
547
548 if (rp->pack) {
549 for (unsigned x = 0; x < w; x += 8) {
550 uint8_t d = 0;
551 int max_b = MPMIN(8, w - x);
552 for (int bp = 0; bp < max_b; bp++)
553 d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp);
554 pa[x / 8] = d;
555 }
556 } else {
557 for (unsigned x = 0; x < w; x += 8) {
558 uint8_t d = pa[x / 8];
559 int max_b = MPMIN(8, w - x);
560 for (int bp = 0; bp < max_b; bp++)
561 pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))];
562 }
563 }
564 }
565
setup_misc_packer(struct mp_repack * rp)566 static void setup_misc_packer(struct mp_repack *rp)
567 {
568 if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) {
569 int grap_fmt = find_gbrp_format(8, 4);
570 if (!grap_fmt)
571 return;
572 rp->imgfmt_b = grap_fmt;
573 rp->repack = unpack_pal;
574 } else {
575 enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);
576 if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
577 rp->comp_lut = talloc_array(rp, uint8_t, 256);
578 rp->imgfmt_b = IMGFMT_Y1;
579 int max = 1;
580 if (rp->flags & REPACK_CREATE_EXPAND_8BIT) {
581 rp->imgfmt_b = IMGFMT_Y8;
582 max = 255;
583 }
584 bool inv = avfmt == AV_PIX_FMT_MONOWHITE;
585 for (int n = 0; n < 256; n++) {
586 rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2))
587 : ((inv ^ !!n) ? max : 0);
588 }
589 rp->repack = bitmap_repack;
590 return;
591 }
592 }
593 }
594
595 #define PA_P422(name, comp_t) \
596 static void name(void *dst, void *src[], int w, uint8_t *c) { \
597 for (int x = 0; x < w; x += 2) { \
598 ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \
599 ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \
600 ((comp_t *)dst)[x * 2 + c[4]] = ((comp_t *)src[1])[x >> 1]; \
601 ((comp_t *)dst)[x * 2 + c[5]] = ((comp_t *)src[2])[x >> 1]; \
602 } \
603 }
604
605
606 #define UN_P422(name, comp_t) \
607 static void name(void *src, void *dst[], int w, uint8_t *c) { \
608 for (int x = 0; x < w; x += 2) { \
609 ((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \
610 ((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \
611 ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[4]]; \
612 ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[5]]; \
613 } \
614 }
615
PA_P422(pa_p422_8,uint8_t)616 PA_P422(pa_p422_8, uint8_t)
617 PA_P422(pa_p422_16, uint16_t)
618 UN_P422(un_p422_8, uint8_t)
619 UN_P422(un_p422_16, uint16_t)
620
621 static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c)
622 {
623 for (int x = 0; x < w; x += 4) {
624 ((uint8_t *)dst)[x / 4 * 6 + c[0]] = ((uint8_t *)src[0])[x + 0];
625 ((uint8_t *)dst)[x / 4 * 6 + c[1]] = ((uint8_t *)src[0])[x + 1];
626 ((uint8_t *)dst)[x / 4 * 6 + c[2]] = ((uint8_t *)src[0])[x + 2];
627 ((uint8_t *)dst)[x / 4 * 6 + c[3]] = ((uint8_t *)src[0])[x + 3];
628 ((uint8_t *)dst)[x / 4 * 6 + c[4]] = ((uint8_t *)src[1])[x >> 2];
629 ((uint8_t *)dst)[x / 4 * 6 + c[5]] = ((uint8_t *)src[2])[x >> 2];
630 }
631 }
632
633
un_p411_8(void * src,void * dst[],int w,uint8_t * c)634 static void un_p411_8(void *src, void *dst[], int w, uint8_t *c)
635 {
636 for (int x = 0; x < w; x += 4) {
637 ((uint8_t *)dst[0])[x + 0] = ((uint8_t *)src)[x / 4 * 6 + c[0]];
638 ((uint8_t *)dst[0])[x + 1] = ((uint8_t *)src)[x / 4 * 6 + c[1]];
639 ((uint8_t *)dst[0])[x + 2] = ((uint8_t *)src)[x / 4 * 6 + c[2]];
640 ((uint8_t *)dst[0])[x + 3] = ((uint8_t *)src)[x / 4 * 6 + c[3]];
641 ((uint8_t *)dst[1])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[4]];
642 ((uint8_t *)dst[2])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[5]];
643 }
644 }
645
fringe_yuv_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)646 static void fringe_yuv_repack(struct mp_repack *rp,
647 struct mp_image *a, int a_x, int a_y,
648 struct mp_image *b, int b_x, int b_y, int w)
649 {
650 void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
651
652 void *pb[4] = {0};
653 for (int p = 0; p < b->num_planes; p++)
654 pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
655
656 rp->repack_fringe_yuv(pa, pb, w, rp->comp_map);
657 }
658
setup_fringe_yuv_packer(struct mp_repack * rp)659 static void setup_fringe_yuv_packer(struct mp_repack *rp)
660 {
661 struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
662 if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV) ||
663 mp_imgfmt_desc_get_num_comps(&desc) != 3 ||
664 desc.align_x > 4)
665 return;
666
667 uint8_t y_loc[4];
668 if (!mp_imgfmt_get_packed_yuv_locations(desc.id, y_loc))
669 return;
670
671 for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
672 if (!desc.comps[n].size)
673 continue;
674 if (desc.comps[n].size != desc.comps[0].size ||
675 desc.comps[n].pad < 0 ||
676 desc.comps[n].offset % desc.comps[0].size)
677 return;
678 if (n == 1 || n == 2) {
679 rp->comp_map[4 + (n - 1)] =
680 desc.comps[n].offset / desc.comps[0].size;
681 }
682 }
683 for (int n = 0; n < desc.align_x; n++) {
684 if (y_loc[n] % desc.comps[0].size)
685 return;
686 rp->comp_map[n] = y_loc[n] / desc.comps[0].size;
687 }
688
689 if (desc.comps[0].size == 8 && desc.align_x == 2) {
690 rp->repack_fringe_yuv = rp->pack ? pa_p422_8 : un_p422_8;
691 } else if (desc.comps[0].size == 16 && desc.align_x == 2) {
692 rp->repack_fringe_yuv = rp->pack ? pa_p422_16 : un_p422_16;
693 } else if (desc.comps[0].size == 8 && desc.align_x == 4) {
694 rp->repack_fringe_yuv = rp->pack ? pa_p411_8 : un_p411_8;
695 }
696
697 if (!rp->repack_fringe_yuv)
698 return;
699
700 struct mp_regular_imgfmt yuvfmt = {
701 .component_type = MP_COMPONENT_TYPE_UINT,
702 // NB: same problem with P010 and not clearing padding.
703 .component_size = desc.comps[0].size / 8u,
704 .num_planes = 3,
705 .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
706 .chroma_xs = desc.chroma_xs,
707 .chroma_ys = 0,
708 };
709 rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt);
710 rp->repack = fringe_yuv_repack;
711
712 if (desc.endian_shift) {
713 rp->endian_size = 1 << desc.endian_shift;
714 assert(rp->endian_size == 2);
715 }
716 }
717
repack_nv(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)718 static void repack_nv(struct mp_repack *rp,
719 struct mp_image *a, int a_x, int a_y,
720 struct mp_image *b, int b_x, int b_y, int w)
721 {
722 int xs = a->fmt.chroma_xs;
723
724 uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y);
725
726 void *pb[2];
727 for (int p = 0; p < 2; p++) {
728 int s = rp->components[p];
729 pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
730 }
731
732 rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs);
733 }
734
setup_nv_packer(struct mp_repack * rp)735 static void setup_nv_packer(struct mp_repack *rp)
736 {
737 struct mp_regular_imgfmt desc;
738 if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a))
739 return;
740
741 // Check for NV.
742 if (desc.num_planes != 2)
743 return;
744 if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
745 return;
746 if (desc.planes[1].num_components != 2)
747 return;
748 int cr0 = desc.planes[1].components[0];
749 int cr1 = desc.planes[1].components[1];
750 if (cr0 > cr1)
751 MPSWAP(int, cr0, cr1);
752 if (cr0 != 2 || cr1 != 3)
753 return;
754
755 // Construct equivalent planar format.
756 struct mp_regular_imgfmt desc2 = desc;
757 desc2.num_planes = 3;
758 desc2.planes[1].num_components = 1;
759 desc2.planes[1].components[0] = 2;
760 desc2.planes[2].num_components = 1;
761 desc2.planes[2].components[0] = 3;
762 // For P010. Strangely this concept exists only for the NV format.
763 if (desc2.component_pad > 0)
764 desc2.component_pad = 0;
765
766 int planar_fmt = mp_find_regular_imgfmt(&desc2);
767 if (!planar_fmt)
768 return;
769
770 for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
771 const struct regular_repacker *pa = ®ular_repackers[i];
772
773 void (*repack_cb)(void *pa, void *pb[], int w) =
774 rp->pack ? pa->pa_scanline : pa->un_scanline;
775
776 if (pa->packed_width != desc.component_size * 2 * 8 ||
777 pa->component_width != desc.component_size * 8 ||
778 pa->num_components != 2 ||
779 pa->prepadding != 0 ||
780 !repack_cb)
781 continue;
782
783 rp->repack = repack_nv;
784 rp->passthrough_y = true;
785 rp->packed_repack_scanline = repack_cb;
786 rp->imgfmt_b = planar_fmt;
787 rp->components[0] = desc.planes[1].components[0] - 1;
788 rp->components[1] = desc.planes[1].components[1] - 1;
789 return;
790 }
791 }
792
793 #define PA_F32(name, packed_t) \
794 static void name(void *dst, float *src, int w, float m, float o, \
795 uint32_t p_max) { \
796 for (int x = 0; x < w; x++) { \
797 ((packed_t *)dst)[x] = \
798 MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max); \
799 } \
800 }
801
802 #define UN_F32(name, packed_t) \
803 static void name(void *src, float *dst, int w, float m, float o, \
804 uint32_t unused) { \
805 for (int x = 0; x < w; x++) \
806 dst[x] = ((packed_t *)src)[x] * m + o; \
807 }
808
PA_F32(pa_f32_8,uint8_t)809 PA_F32(pa_f32_8, uint8_t)
810 UN_F32(un_f32_8, uint8_t)
811 PA_F32(pa_f32_16, uint16_t)
812 UN_F32(un_f32_16, uint16_t)
813
814 // In all this, float counts as "unpacked".
815 static void repack_float(struct mp_repack *rp,
816 struct mp_image *a, int a_x, int a_y,
817 struct mp_image *b, int b_x, int b_y, int w)
818 {
819 assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2);
820
821 void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max)
822 = rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16)
823 : (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16);
824
825 for (int p = 0; p < b->num_planes; p++) {
826 int h = (1 << b->fmt.chroma_ys) - (1 << b->fmt.ys[p]) + 1;
827 for (int y = 0; y < h; y++) {
828 void *pa = mp_image_pixel_ptr_ny(a, p, a_x, a_y + y);
829 void *pb = mp_image_pixel_ptr_ny(b, p, b_x, b_y + y);
830
831 packer(pa, pb, w >> b->fmt.xs[p], rp->f32_m[p], rp->f32_o[p],
832 rp->f32_pmax[p]);
833 }
834 }
835 }
836
update_repack_float(struct mp_repack * rp)837 static void update_repack_float(struct mp_repack *rp)
838 {
839 if (!rp->f32_comp_size)
840 return;
841
842 // Image in input format.
843 struct mp_image *ui = rp->pack ? rp->steps[rp->num_steps - 1].buf[1]
844 : rp->steps[0].buf[0];
845 enum mp_csp csp = ui->params.color.space;
846 enum mp_csp_levels levels = ui->params.color.levels;
847 if (rp->f32_csp_space == csp && rp->f32_csp_levels == levels)
848 return;
849
850 // The fixed point format.
851 struct mp_regular_imgfmt desc = {0};
852 mp_get_regular_imgfmt(&desc, rp->imgfmt_b);
853 assert(desc.component_size);
854
855 int comp_bits = desc.component_size * 8 + MPMIN(desc.component_pad, 0);
856 for (int p = 0; p < desc.num_planes; p++) {
857 double m, o;
858 mp_get_csp_uint_mul(csp, levels, comp_bits, desc.planes[p].components[0],
859 &m, &o);
860 rp->f32_m[p] = rp->pack ? 1.0 / m : m;
861 rp->f32_o[p] = rp->pack ? -o : o;
862 rp->f32_pmax[p] = (1u << comp_bits) - 1;
863 }
864
865 rp->f32_csp_space = csp;
866 rp->f32_csp_levels = levels;
867 }
868
repack_line(struct mp_repack * rp,int dst_x,int dst_y,int src_x,int src_y,int w)869 void repack_line(struct mp_repack *rp, int dst_x, int dst_y,
870 int src_x, int src_y, int w)
871 {
872 assert(rp->configured);
873
874 struct repack_step *first = &rp->steps[0];
875 struct repack_step *last = &rp->steps[rp->num_steps - 1];
876
877 assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0);
878 assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x));
879 assert(src_x + w <= MP_ALIGN_UP(first->buf[0]->w, first->fmt[0].align_x));
880 assert(dst_y < last->buf[1]->h);
881 assert(src_y < first->buf[0]->h);
882 assert(!(dst_x & (last->fmt[1].align_x - 1)));
883 assert(!(src_x & (first->fmt[0].align_x - 1)));
884 assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1)));
885 assert(!(dst_y & (last->fmt[1].align_y - 1)));
886 assert(!(src_y & (first->fmt[0].align_y - 1)));
887
888 for (int n = 0; n < rp->num_steps; n++) {
889 struct repack_step *rs = &rp->steps[n];
890
891 // When writing to temporary buffers, always write to the start (maybe
892 // helps with locality).
893 int sx = rs->user_buf[0] ? src_x : 0;
894 int sy = rs->user_buf[0] ? src_y : 0;
895 int dx = rs->user_buf[1] ? dst_x : 0;
896 int dy = rs->user_buf[1] ? dst_y : 0;
897
898 struct mp_image *buf_a = rs->buf[rp->pack];
899 struct mp_image *buf_b = rs->buf[!rp->pack];
900 int a_x = rp->pack ? dx : sx;
901 int a_y = rp->pack ? dy : sy;
902 int b_x = rp->pack ? sx : dx;
903 int b_y = rp->pack ? sy : dy;
904
905 switch (rs->type) {
906 case REPACK_STEP_REPACK: {
907 if (rp->repack)
908 rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
909
910 for (int p = 0; p < rs->fmt[0].num_planes; p++) {
911 if (rp->copy_buf[p])
912 copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p);
913 }
914 break;
915 }
916 case REPACK_STEP_ENDIAN:
917 swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w,
918 rp->endian_size);
919 break;
920 case REPACK_STEP_FLOAT:
921 repack_float(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
922 break;
923 }
924 }
925 }
926
setup_format_ne(struct mp_repack * rp)927 static bool setup_format_ne(struct mp_repack *rp)
928 {
929 if (!rp->imgfmt_b)
930 setup_nv_packer(rp);
931 if (!rp->imgfmt_b)
932 setup_misc_packer(rp);
933 if (!rp->imgfmt_b)
934 setup_packed_packer(rp);
935 if (!rp->imgfmt_b)
936 setup_fringe_rgb_packer(rp);
937 if (!rp->imgfmt_b)
938 setup_fringe_yuv_packer(rp);
939 if (!rp->imgfmt_b)
940 rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all
941
942 struct mp_regular_imgfmt desc;
943 if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b))
944 return false;
945
946 // no weird stuff
947 if (desc.num_planes > 4)
948 return false;
949
950 // Endian swapping.
951 if (rp->imgfmt_a != rp->imgfmt_user &&
952 rp->imgfmt_a == mp_find_other_endian(rp->imgfmt_user))
953 {
954 struct mp_imgfmt_desc desc_a = mp_imgfmt_get_desc(rp->imgfmt_a);
955 struct mp_imgfmt_desc desc_u = mp_imgfmt_get_desc(rp->imgfmt_user);
956 rp->endian_size = 1 << desc_u.endian_shift;
957 if (!desc_a.endian_shift && rp->endian_size != 2 && rp->endian_size != 4)
958 return false;
959 }
960
961 // Accept only true planar formats (with known components and no padding).
962 for (int n = 0; n < desc.num_planes; n++) {
963 if (desc.planes[n].num_components != 1)
964 return false;
965 int c = desc.planes[n].components[0];
966 if (c < 1 || c > 4)
967 return false;
968 }
969
970 rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a);
971 rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b);
972
973 // This is if we did a pack step.
974
975 if (rp->flags & REPACK_CREATE_PLANAR_F32) {
976 // imgfmt_b with float32 component type.
977 struct mp_regular_imgfmt fdesc = desc;
978 fdesc.component_type = MP_COMPONENT_TYPE_FLOAT;
979 fdesc.component_size = 4;
980 fdesc.component_pad = 0;
981 int ffmt = mp_find_regular_imgfmt(&fdesc);
982 if (!ffmt)
983 return false;
984 if (ffmt != rp->imgfmt_b) {
985 if (desc.component_type != MP_COMPONENT_TYPE_UINT ||
986 (desc.component_size != 1 && desc.component_size != 2))
987 return false;
988 rp->f32_comp_size = desc.component_size;
989 rp->f32_csp_space = MP_CSP_COUNT;
990 rp->f32_csp_levels = MP_CSP_LEVELS_COUNT;
991 rp->steps[rp->num_steps++] = (struct repack_step) {
992 .type = REPACK_STEP_FLOAT,
993 .fmt = {
994 mp_imgfmt_get_desc(ffmt),
995 rp->fmt_b,
996 },
997 };
998 }
999 }
1000
1001 rp->steps[rp->num_steps++] = (struct repack_step) {
1002 .type = REPACK_STEP_REPACK,
1003 .fmt = { rp->fmt_b, rp->fmt_a },
1004 };
1005
1006 if (rp->endian_size) {
1007 rp->steps[rp->num_steps++] = (struct repack_step) {
1008 .type = REPACK_STEP_ENDIAN,
1009 .fmt = {
1010 rp->fmt_a,
1011 mp_imgfmt_get_desc(rp->imgfmt_user),
1012 },
1013 };
1014 }
1015
1016 // Reverse if unpack (to reflect actual data flow)
1017 if (!rp->pack) {
1018 for (int n = 0; n < rp->num_steps / 2; n++) {
1019 MPSWAP(struct repack_step, rp->steps[n],
1020 rp->steps[rp->num_steps - 1 - n]);
1021 }
1022 for (int n = 0; n < rp->num_steps; n++) {
1023 struct repack_step *rs = &rp->steps[n];
1024 MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]);
1025 }
1026 }
1027
1028 for (int n = 0; n < rp->num_steps - 1; n++)
1029 assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id);
1030
1031 return true;
1032 }
1033
reset_params(struct mp_repack * rp)1034 static void reset_params(struct mp_repack *rp)
1035 {
1036 rp->num_steps = 0;
1037 rp->imgfmt_b = 0;
1038 rp->repack = NULL;
1039 rp->passthrough_y = false;
1040 rp->endian_size = 0;
1041 rp->packed_repack_scanline = NULL;
1042 rp->comp_size = 0;
1043 talloc_free(rp->comp_lut);
1044 rp->comp_lut = NULL;
1045 }
1046
setup_format(struct mp_repack * rp)1047 static bool setup_format(struct mp_repack *rp)
1048 {
1049 reset_params(rp);
1050 rp->imgfmt_a = rp->imgfmt_user;
1051 if (setup_format_ne(rp))
1052 return true;
1053 // Try reverse endian.
1054 reset_params(rp);
1055 rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user);
1056 return rp->imgfmt_a && setup_format_ne(rp);
1057 }
1058
mp_repack_create_planar(int imgfmt,bool pack,int flags)1059 struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags)
1060 {
1061 struct mp_repack *rp = talloc_zero(NULL, struct mp_repack);
1062 rp->imgfmt_user = imgfmt;
1063 rp->pack = pack;
1064 rp->flags = flags;
1065
1066 if (!setup_format(rp)) {
1067 talloc_free(rp);
1068 return NULL;
1069 }
1070
1071 return rp;
1072 }
1073
mp_repack_get_format_src(struct mp_repack * rp)1074 int mp_repack_get_format_src(struct mp_repack *rp)
1075 {
1076 return rp->steps[0].fmt[0].id;
1077 }
1078
mp_repack_get_format_dst(struct mp_repack * rp)1079 int mp_repack_get_format_dst(struct mp_repack *rp)
1080 {
1081 return rp->steps[rp->num_steps - 1].fmt[1].id;
1082 }
1083
mp_repack_get_align_x(struct mp_repack * rp)1084 int mp_repack_get_align_x(struct mp_repack *rp)
1085 {
1086 // We really want the LCM between those, but since only one of them is
1087 // packed (or they're the same format), and the chroma subsampling is the
1088 // same for both, only the packed one matters.
1089 return rp->fmt_a.align_x;
1090 }
1091
mp_repack_get_align_y(struct mp_repack * rp)1092 int mp_repack_get_align_y(struct mp_repack *rp)
1093 {
1094 return rp->fmt_a.align_y; // should be the same for packed/planar formats
1095 }
1096
image_realloc(struct mp_image ** img,int fmt,int w,int h)1097 static void image_realloc(struct mp_image **img, int fmt, int w, int h)
1098 {
1099 if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h)
1100 return;
1101 talloc_free(*img);
1102 *img = mp_image_alloc(fmt, w, h);
1103 }
1104
repack_config_buffers(struct mp_repack * rp,int dst_flags,struct mp_image * dst,int src_flags,struct mp_image * src,bool * enable_passthrough)1105 bool repack_config_buffers(struct mp_repack *rp,
1106 int dst_flags, struct mp_image *dst,
1107 int src_flags, struct mp_image *src,
1108 bool *enable_passthrough)
1109 {
1110 struct repack_step *rs_first = &rp->steps[0];
1111 struct repack_step *rs_last = &rp->steps[rp->num_steps - 1];
1112
1113 rp->configured = false;
1114
1115 assert(dst && src);
1116
1117 int buf_w = MPMAX(dst->w, src->w);
1118
1119 assert(dst->imgfmt == rs_last->fmt[1].id);
1120 assert(src->imgfmt == rs_first->fmt[0].id);
1121
1122 // Chain/allocate buffers.
1123
1124 for (int n = 0; n < rp->num_steps; n++)
1125 rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL;
1126
1127 rs_first->buf[0] = src;
1128 rs_last->buf[1] = dst;
1129
1130 for (int n = 0; n < rp->num_steps; n++) {
1131 struct repack_step *rs = &rp->steps[n];
1132
1133 if (!rs->buf[0]) {
1134 assert(n > 0);
1135 rs->buf[0] = rp->steps[n - 1].buf[1];
1136 }
1137
1138 if (rs->buf[1])
1139 continue;
1140
1141 // Note: since repack_line() can have different src/dst offsets, we
1142 // can't do true in-place in general.
1143 bool can_inplace = rs->type == REPACK_STEP_ENDIAN &&
1144 rs->buf[0] != src && rs->buf[0] != dst;
1145 if (can_inplace) {
1146 rs->buf[1] = rs->buf[0];
1147 continue;
1148 }
1149
1150 if (rs != rs_last) {
1151 struct repack_step *next = &rp->steps[n + 1];
1152 if (next->buf[0]) {
1153 rs->buf[1] = next->buf[0];
1154 continue;
1155 }
1156 }
1157
1158 image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y);
1159 if (!rs->tmp)
1160 return false;
1161 talloc_steal(rp, rs->tmp);
1162 rs->buf[1] = rs->tmp;
1163 }
1164
1165 for (int n = 0; n < rp->num_steps; n++) {
1166 struct repack_step *rs = &rp->steps[n];
1167 rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst;
1168 rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst;
1169 }
1170
1171 // If repacking is the only operation. It's also responsible for simply
1172 // copying src to dst if absolutely no filtering is done.
1173 bool may_passthrough =
1174 rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK;
1175
1176 for (int p = 0; p < rp->fmt_b.num_planes; p++) {
1177 // (All repack callbacks copy, except nv12 does not copy luma.)
1178 bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0);
1179
1180 bool can_pt = may_passthrough && !repack_copies_plane &&
1181 enable_passthrough && enable_passthrough[p];
1182
1183 // Copy if needed, unless the repack callback does it anyway.
1184 rp->copy_buf[p] = !repack_copies_plane && !can_pt;
1185
1186 if (enable_passthrough)
1187 enable_passthrough[p] = can_pt && !rp->copy_buf[p];
1188 }
1189
1190 if (enable_passthrough) {
1191 for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++)
1192 enable_passthrough[n] = false;
1193 }
1194
1195 update_repack_float(rp);
1196
1197 rp->configured = true;
1198
1199 return true;
1200 }
1201