1 /*
2  * This file is part of mpv.
3  *
4  * mpv is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * mpv is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <math.h>
19 
20 #include <libavutil/bswap.h>
21 #include <libavutil/pixfmt.h>
22 
23 #include "common/common.h"
24 #include "repack.h"
25 #include "video/csputils.h"
26 #include "video/fmt-conversion.h"
27 #include "video/img_format.h"
28 #include "video/mp_image.h"
29 
30 enum repack_step_type {
31     REPACK_STEP_FLOAT,
32     REPACK_STEP_REPACK,
33     REPACK_STEP_ENDIAN,
34 };
35 
36 struct repack_step {
37     enum repack_step_type type;
38     // 0=input, 1=output
39     struct mp_image *buf[2];
40     bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer
41     struct mp_imgfmt_desc fmt[2];
42     struct mp_image *tmp; // output buffer, if needed
43 };
44 
45 struct mp_repack {
46     bool pack;                  // if false, this is for unpacking
47     int flags;
48     int imgfmt_user;            // original mp format (unchanged endian)
49     int imgfmt_a;               // original mp format (possibly packed format,
50                                 // swapped endian)
51     int imgfmt_b;               // equivalent unpacked/planar format
52     struct mp_imgfmt_desc fmt_a;// ==imgfmt_a
53     struct mp_imgfmt_desc fmt_b;// ==imgfmt_b
54 
55     void (*repack)(struct mp_repack *rp,
56                    struct mp_image *a, int a_x, int a_y,
57                    struct mp_image *b, int b_x, int b_y, int w);
58 
59     bool passthrough_y;         // possible luma plane optimization for e.g. nv12
60     int endian_size;            // endian swap; 0=none, 2/4=swap word size
61 
62     // For packed_repack.
63     int components[4];          // b[n] = mp_image.planes[components[n]]
64     //  pack:   a is dst, b is src
65     //  unpack: a is src, b is dst
66     void (*packed_repack_scanline)(void *a, void *b[], int w);
67 
68     // Fringe RGB/YUV.
69     uint8_t comp_size;
70     uint8_t comp_map[6];
71     uint8_t comp_shifts[3];
72     uint8_t *comp_lut;
73     void (*repack_fringe_yuv)(void *dst, void *src[], int w, uint8_t *c);
74 
75     // F32 repacking.
76     int f32_comp_size;
77     float f32_m[4], f32_o[4];
78     uint32_t f32_pmax[4];
79     enum mp_csp f32_csp_space;
80     enum mp_csp_levels f32_csp_levels;
81 
82     // REPACK_STEP_REPACK: if true, need to copy this plane
83     bool copy_buf[4];
84 
85     struct repack_step steps[4];
86     int num_steps;
87 
88     bool configured;
89 };
90 
91 // depth = number of LSB in use
find_gbrp_format(int depth,int num_planes)92 static int find_gbrp_format(int depth, int num_planes)
93 {
94     if (num_planes != 3 && num_planes != 4)
95         return 0;
96     struct mp_regular_imgfmt desc = {
97         .component_type = MP_COMPONENT_TYPE_UINT,
98         .forced_csp = MP_CSP_RGB,
99         .component_size = depth > 8 ? 2 : 1,
100         .component_pad = depth - (depth > 8 ? 16 : 8),
101         .num_planes = num_planes,
102         .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
103     };
104     return mp_find_regular_imgfmt(&desc);
105 }
106 
107 // depth = number of LSB in use
find_yuv_format(int depth,int num_planes)108 static int find_yuv_format(int depth, int num_planes)
109 {
110     if (num_planes < 1 || num_planes > 4)
111         return 0;
112     struct mp_regular_imgfmt desc = {
113         .component_type = MP_COMPONENT_TYPE_UINT,
114         .component_size = depth > 8 ? 2 : 1,
115         .component_pad = depth - (depth > 8 ? 16 : 8),
116         .num_planes = num_planes,
117         .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} },
118     };
119     if (num_planes == 2)
120         desc.planes[1].components[0] = 4;
121     return mp_find_regular_imgfmt(&desc);
122 }
123 
124 // Copy one line on the plane p.
copy_plane(struct mp_image * dst,int dst_x,int dst_y,struct mp_image * src,int src_x,int src_y,int w,int p)125 static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
126                        struct mp_image *src, int src_x, int src_y,
127                        int w, int p)
128 {
129     // Number of lines on this plane.
130     int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
131     size_t size = mp_image_plane_bytes(dst, p, dst_x, w);
132 
133     assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);
134 
135     for (int y = 0; y < h; y++) {
136         void *pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
137         void *ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
138         memcpy(pd, ps, size);
139     }
140 }
141 
142 // Swap endian for one line.
swap_endian(struct mp_image * dst,int dst_x,int dst_y,struct mp_image * src,int src_x,int src_y,int w,int endian_size)143 static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
144                         struct mp_image *src, int src_x, int src_y,
145                         int w, int endian_size)
146 {
147     assert(src->fmt.num_planes == dst->fmt.num_planes);
148 
149     for (int p = 0; p < dst->fmt.num_planes; p++) {
150         int xs = dst->fmt.xs[p];
151         int bpp = dst->fmt.bpp[p] / 8;
152         int words_per_pixel = bpp / endian_size;
153         int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel;
154         // Number of lines on this plane.
155         int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
156 
157         assert(src->fmt.bpp[p] == bpp * 8);
158 
159         for (int y = 0; y < h; y++) {
160             void *s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
161             void *d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
162             switch (endian_size) {
163             case 2:
164                 for (int x = 0; x < num_words; x++)
165                     ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]);
166                 break;
167             case 4:
168                 for (int x = 0; x < num_words; x++)
169                     ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]);
170                 break;
171             default:
172                 assert(0);
173             }
174         }
175     }
176 }
177 
178 // PA = PAck, copy planar input to single packed array
179 // UN = UNpack, copy packed input to planar output
180 // Naming convention:
181 //  pa_/un_ prefix to identify conversion direction.
182 //  Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
183 //      (This is unusual; MSB to LSB is more commonly used to describe formats,
184 //       but our convention makes more sense for byte access in little endian.)
185 //  "c" identifies a color component.
186 //  "z" identifies known zero padding.
187 //  "x" identifies uninitialized padding.
188 //  A component is followed by its size in bits.
189 //  Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
190 // Unpackers will often use "x" for padding, because they ignore it, while
191 // packers will use "z" because they write zero.
192 
193 #define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3)      \
194     static void name(void *dst, void *src[], int w) {                       \
195         for (int x = 0; x < w; x++) {                                       \
196             ((packed_t *)dst)[x] =                                          \
197                 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
198                 ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
199                 ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) |             \
200                 ((packed_t)((plane_t *)src[3])[x] << (sh_c3));              \
201         }                                                                   \
202     }
203 
204 #define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
205     static void name(void *src, void *dst[], int w) {                       \
206         for (int x = 0; x < w; x++) {                                       \
207             packed_t c = ((packed_t *)src)[x];                              \
208             ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
209             ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
210             ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
211             ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask);               \
212         }                                                                   \
213     }
214 
215 
216 #define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad)        \
217     static void name(void *dst, void *src[], int w) {                       \
218         for (int x = 0; x < w; x++) {                                       \
219             ((packed_t *)dst)[x] = (pad) |                                  \
220                 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
221                 ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
222                 ((packed_t)((plane_t *)src[2])[x] << (sh_c2));              \
223         }                                                                   \
224     }
225 
226 UN_WORD_4(un_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24, 0xFFu)
227 PA_WORD_4(pa_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24)
228 // Not sure if this is a good idea; there may be no alignment guarantee.
229 UN_WORD_4(un_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48, 0xFFFFu)
230 PA_WORD_4(pa_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48)
231 
232 #define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask)       \
233     static void name(void *src, void *dst[], int w) {                       \
234         for (int x = 0; x < w; x++) {                                       \
235             packed_t c = ((packed_t *)src)[x];                              \
236             ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
237             ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
238             ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
239         }                                                                   \
240     }
241 
242 UN_WORD_3(un_ccc8x8,  uint32_t, uint8_t,  0, 8,  16, 0xFFu)
243 PA_WORD_3(pa_ccc8z8,  uint32_t, uint8_t,  0, 8,  16, 0)
244 UN_WORD_3(un_x8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0xFFu)
245 PA_WORD_3(pa_z8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0)
246 UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
247 PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0)
248 
249 #define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad)               \
250     static void name(void *dst, void *src[], int w) {                       \
251         for (int x = 0; x < w; x++) {                                       \
252             ((packed_t *)dst)[x] = (pad) |                                  \
253                 ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
254                 ((packed_t)((plane_t *)src[1])[x] << (sh_c1));              \
255         }                                                                   \
256     }
257 
258 #define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask)              \
259     static void name(void *src, void *dst[], int w) {                       \
260         for (int x = 0; x < w; x++) {                                       \
261             packed_t c = ((packed_t *)src)[x];                              \
262             ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
263             ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
264         }                                                                   \
265     }
266 
267 UN_WORD_2(un_cc8,  uint16_t, uint8_t,  0, 8,  0xFFu)
268 PA_WORD_2(pa_cc8,  uint16_t, uint8_t,  0, 8,  0)
269 UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
270 PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
271 
272 #define PA_SEQ_3(name, comp_t)                                              \
273     static void name(void *dst, void *src[], int w) {                       \
274         comp_t *r = dst;                                                    \
275         for (int x = 0; x < w; x++) {                                       \
276             *r++ = ((comp_t *)src[0])[x];                                   \
277             *r++ = ((comp_t *)src[1])[x];                                   \
278             *r++ = ((comp_t *)src[2])[x];                                   \
279         }                                                                   \
280     }
281 
282 #define UN_SEQ_3(name, comp_t)                                              \
283     static void name(void *src, void *dst[], int w) {                       \
284         comp_t *r = src;                                                    \
285         for (int x = 0; x < w; x++) {                                       \
286             ((comp_t *)dst[0])[x] = *r++;                                   \
287             ((comp_t *)dst[1])[x] = *r++;                                   \
288             ((comp_t *)dst[2])[x] = *r++;                                   \
289         }                                                                   \
290     }
291 
292 UN_SEQ_3(un_ccc8,  uint8_t)
293 PA_SEQ_3(pa_ccc8,  uint8_t)
294 UN_SEQ_3(un_ccc16, uint16_t)
295 PA_SEQ_3(pa_ccc16, uint16_t)
296 
297 // "regular": single packed plane, all components have same width (except padding)
298 struct regular_repacker {
299     int packed_width;       // number of bits of the packed pixel
300     int component_width;    // number of bits for a single component
301     int prepadding;         // number of bits of LSB padding
302     int num_components;     // number of components that can be accessed
303     void (*pa_scanline)(void *a, void *b[], int w);
304     void (*un_scanline)(void *a, void *b[], int w);
305 };
306 
307 static const struct regular_repacker regular_repackers[] = {
308     {32, 8,  0, 3, pa_ccc8z8,  un_ccc8x8},
309     {32, 8,  8, 3, pa_z8ccc8,  un_x8ccc8},
310     {32, 8,  0, 4, pa_cccc8,   un_cccc8},
311     {64, 16, 0, 4, pa_cccc16,  un_cccc16},
312     {24, 8,  0, 3, pa_ccc8,    un_ccc8},
313     {48, 16, 0, 3, pa_ccc16,   un_ccc16},
314     {16, 8,  0, 2, pa_cc8,     un_cc8},
315     {32, 16, 0, 2, pa_cc16,    un_cc16},
316     {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
317 };
318 
packed_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)319 static void packed_repack(struct mp_repack *rp,
320                           struct mp_image *a, int a_x, int a_y,
321                           struct mp_image *b, int b_x, int b_y, int w)
322 {
323     uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
324 
325     void *pb[4] = {0};
326     for (int p = 0; p < b->num_planes; p++) {
327         int s = rp->components[p];
328         pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
329     }
330 
331     rp->packed_repack_scanline(pa, pb, w);
332 }
333 
334 // Tries to set a packer/unpacker for component-wise byte aligned formats.
setup_packed_packer(struct mp_repack * rp)335 static void setup_packed_packer(struct mp_repack *rp)
336 {
337     struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
338     if (!(desc.flags & MP_IMGFLAG_HAS_COMPS) ||
339         !(desc.flags & MP_IMGFLAG_TYPE_UINT) ||
340         !(desc.flags & MP_IMGFLAG_NE) ||
341         desc.num_planes != 1)
342         return;
343 
344     int num_real_components = 0;
345     int components[4] = {0};
346     for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
347         if (!desc.comps[n].size)
348             continue;
349         if (desc.comps[n].size != desc.comps[0].size ||
350             desc.comps[n].pad != desc.comps[0].pad ||
351             desc.comps[n].offset % desc.comps[0].size)
352             return;
353         int item = desc.comps[n].offset / desc.comps[0].size;
354         if (item >= 4)
355             return;
356         components[item] = n + 1;
357         num_real_components++;
358     }
359 
360     int depth = desc.comps[0].size + MPMIN(0, desc.comps[0].pad);
361 
362     static const int reorder_gbrp[] = {0, 3, 1, 2, 4};
363     static const int reorder_yuv[] = {0, 1, 2, 3, 4};
364     int planar_fmt = 0;
365     const int *reorder = NULL;
366     if (desc.flags & MP_IMGFLAG_COLOR_YUV) {
367         planar_fmt = find_yuv_format(depth, num_real_components);
368         reorder = reorder_yuv;
369     } else {
370         planar_fmt = find_gbrp_format(depth, num_real_components);
371         reorder = reorder_gbrp;
372     }
373     if (!planar_fmt)
374         return;
375 
376     for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
377         const struct regular_repacker *pa = &regular_repackers[i];
378 
379         // The following may assume little endian (because some repack backends
380         // use word access, while the metadata here uses byte access).
381 
382         int prepad = components[0] ? 0 : 8;
383         int first_comp = components[0] ? 0 : 1;
384         void (*repack_cb)(void *pa, void *pb[], int w) =
385             rp->pack ? pa->pa_scanline : pa->un_scanline;
386 
387         if (pa->packed_width != desc.bpp[0] ||
388             pa->component_width != depth ||
389             pa->num_components != num_real_components ||
390             pa->prepadding != prepad ||
391             !repack_cb)
392             continue;
393 
394         rp->repack = packed_repack;
395         rp->packed_repack_scanline = repack_cb;
396         rp->imgfmt_b = planar_fmt;
397         for (int n = 0; n < num_real_components; n++) {
398             // Determine permutation that maps component order between the two
399             // formats, with has_alpha special case (see above).
400             int c = reorder[components[first_comp + n]];
401             rp->components[n] = c == 4 ? num_real_components - 1 : c - 1;
402         }
403         return;
404     }
405 }
406 
407 #define PA_SHIFT_LUT8(name, packed_t)                                       \
408     static void name(void *dst, void *src[], int w, uint8_t *lut,           \
409                      uint8_t s0, uint8_t s1, uint8_t s2) {                  \
410         for (int x = 0; x < w; x++) {                                       \
411             ((packed_t *)dst)[x] =                                          \
412                 (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) |             \
413                 (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) |             \
414                 (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2);              \
415         }                                                                   \
416     }
417 
418 
419 #define UN_SHIFT_LUT8(name, packed_t)                                       \
420     static void name(void *src, void *dst[], int w, uint8_t *lut,           \
421                      uint8_t s0, uint8_t s1, uint8_t s2) {                  \
422         for (int x = 0; x < w; x++) {                                       \
423             packed_t c = ((packed_t *)src)[x];                              \
424             ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0];     \
425             ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1];     \
426             ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2];     \
427         }                                                                   \
428     }
429 
PA_SHIFT_LUT8(pa_shift_lut8_8,uint8_t)430 PA_SHIFT_LUT8(pa_shift_lut8_8,  uint8_t)
431 PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
432 UN_SHIFT_LUT8(un_shift_lut8_8,  uint8_t)
433 UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)
434 
435 static void fringe_rgb_repack(struct mp_repack *rp,
436                               struct mp_image *a, int a_x, int a_y,
437                               struct mp_image *b, int b_x, int b_y, int w)
438 {
439     void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
440 
441     void *pb[4] = {0};
442     for (int p = 0; p < b->num_planes; p++) {
443         int s = rp->components[p];
444         pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
445     }
446 
447     assert(rp->comp_size == 1 || rp->comp_size == 2);
448 
449     void (*repack)(void *pa, void *pb[], int w, uint8_t *lut,
450                    uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
451     if (rp->pack) {
452         repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
453     } else {
454         repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
455     }
456     repack(pa, pb, w, rp->comp_lut,
457            rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]);
458 }
459 
setup_fringe_rgb_packer(struct mp_repack * rp)460 static void setup_fringe_rgb_packer(struct mp_repack *rp)
461 {
462     struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
463     if (!(desc.flags & MP_IMGFLAG_HAS_COMPS))
464         return;
465 
466     if (desc.bpp[0] > 16 || (desc.bpp[0] % 8u) ||
467         mp_imgfmt_get_forced_csp(rp->imgfmt_a) != MP_CSP_RGB ||
468         desc.num_planes != 1 || desc.comps[3].size)
469         return;
470 
471     int depth = desc.comps[0].size;
472     for (int n = 0; n < 3; n++) {
473         struct mp_imgfmt_comp_desc *c = &desc.comps[n];
474 
475         if (c->size < 1 || c->size > 8 || c->pad)
476             return;
477 
478         if (rp->flags & REPACK_CREATE_ROUND_DOWN) {
479             depth = MPMIN(depth, c->size);
480         } else {
481             depth = MPMAX(depth, c->size);
482         }
483     }
484     if (rp->flags & REPACK_CREATE_EXPAND_8BIT)
485         depth = 8;
486 
487     rp->imgfmt_b = find_gbrp_format(depth, 3);
488     if (!rp->imgfmt_b)
489         return;
490     rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3);
491     rp->repack = fringe_rgb_repack;
492     for (int n = 0; n < 3; n++)
493         rp->components[n] = ((int[]){3, 1, 2})[n] - 1;
494 
495     for (int n = 0; n < 3; n++) {
496         int bits = desc.comps[n].size;
497         rp->comp_shifts[n] = desc.comps[n].offset;
498         if (rp->comp_lut) {
499             uint8_t *lut = rp->comp_lut + 256 * n;
500             uint8_t zmax = (1 << depth) - 1;
501             uint8_t cmax = (1 << bits) - 1;
502             for (int v = 0; v < 256; v++) {
503                 if (rp->pack) {
504                     lut[v] = (v * cmax + zmax / 2) / zmax;
505                 } else {
506                     lut[v] = (v & cmax) * zmax / cmax;
507                 }
508             }
509         }
510     }
511 
512     rp->comp_size = (desc.bpp[0] + 7) / 8;
513     assert(rp->comp_size == 1 || rp->comp_size == 2);
514 
515     if (desc.endian_shift) {
516         assert(rp->comp_size == 2 && (1 << desc.endian_shift) == 2);
517         rp->endian_size = 2;
518     }
519 }
520 
unpack_pal(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)521 static void unpack_pal(struct mp_repack *rp,
522                        struct mp_image *a, int a_x, int a_y,
523                        struct mp_image *b, int b_x, int b_y, int w)
524 {
525     uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y);
526     uint32_t *pal = (void *)a->planes[1];
527 
528     uint8_t *dst[4] = {0};
529     for (int p = 0; p < b->num_planes; p++)
530         dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
531 
532     for (int x = 0; x < w; x++) {
533         uint32_t c = pal[src[x]];
534         dst[0][x] = (c >>  8) & 0xFF; // G
535         dst[1][x] = (c >>  0) & 0xFF; // B
536         dst[2][x] = (c >> 16) & 0xFF; // R
537         dst[3][x] = (c >> 24) & 0xFF; // A
538     }
539 }
540 
bitmap_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)541 static void bitmap_repack(struct mp_repack *rp,
542                           struct mp_image *a, int a_x, int a_y,
543                           struct mp_image *b, int b_x, int b_y, int w)
544 {
545     uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
546     uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y);
547 
548     if (rp->pack) {
549         for (unsigned x = 0; x < w; x += 8) {
550             uint8_t d = 0;
551             int max_b = MPMIN(8, w - x);
552             for (int bp = 0; bp < max_b; bp++)
553                 d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp);
554             pa[x / 8] = d;
555         }
556     } else {
557         for (unsigned x = 0; x < w; x += 8) {
558             uint8_t d = pa[x / 8];
559             int max_b = MPMIN(8, w - x);
560             for (int bp = 0; bp < max_b; bp++)
561                 pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))];
562         }
563     }
564 }
565 
setup_misc_packer(struct mp_repack * rp)566 static void setup_misc_packer(struct mp_repack *rp)
567 {
568     if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) {
569         int grap_fmt = find_gbrp_format(8, 4);
570         if (!grap_fmt)
571             return;
572         rp->imgfmt_b = grap_fmt;
573         rp->repack = unpack_pal;
574     } else {
575         enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);
576         if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
577             rp->comp_lut = talloc_array(rp, uint8_t, 256);
578             rp->imgfmt_b = IMGFMT_Y1;
579             int max = 1;
580             if (rp->flags & REPACK_CREATE_EXPAND_8BIT) {
581                 rp->imgfmt_b = IMGFMT_Y8;
582                 max = 255;
583             }
584             bool inv = avfmt == AV_PIX_FMT_MONOWHITE;
585             for (int n = 0; n < 256; n++) {
586                 rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2))
587                                            : ((inv ^ !!n) ? max : 0);
588             }
589             rp->repack = bitmap_repack;
590             return;
591         }
592     }
593 }
594 
595 #define PA_P422(name, comp_t)                                               \
596     static void name(void *dst, void *src[], int w, uint8_t *c) {           \
597         for (int x = 0; x < w; x += 2) {                                    \
598             ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0];      \
599             ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1];      \
600             ((comp_t *)dst)[x * 2 + c[4]] = ((comp_t *)src[1])[x >> 1];     \
601             ((comp_t *)dst)[x * 2 + c[5]] = ((comp_t *)src[2])[x >> 1];     \
602         }                                                                   \
603     }
604 
605 
606 #define UN_P422(name, comp_t)                                               \
607     static void name(void *src, void *dst[], int w, uint8_t *c) {           \
608         for (int x = 0; x < w; x += 2) {                                    \
609             ((comp_t *)dst[0])[x + 0]  = ((comp_t *)src)[x * 2 + c[0]];     \
610             ((comp_t *)dst[0])[x + 1]  = ((comp_t *)src)[x * 2 + c[1]];     \
611             ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[4]];     \
612             ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[5]];     \
613         }                                                                   \
614     }
615 
PA_P422(pa_p422_8,uint8_t)616 PA_P422(pa_p422_8,  uint8_t)
617 PA_P422(pa_p422_16, uint16_t)
618 UN_P422(un_p422_8,  uint8_t)
619 UN_P422(un_p422_16, uint16_t)
620 
621 static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c)
622 {
623     for (int x = 0; x < w; x += 4) {
624         ((uint8_t *)dst)[x / 4 * 6 + c[0]] = ((uint8_t *)src[0])[x + 0];
625         ((uint8_t *)dst)[x / 4 * 6 + c[1]] = ((uint8_t *)src[0])[x + 1];
626         ((uint8_t *)dst)[x / 4 * 6 + c[2]] = ((uint8_t *)src[0])[x + 2];
627         ((uint8_t *)dst)[x / 4 * 6 + c[3]] = ((uint8_t *)src[0])[x + 3];
628         ((uint8_t *)dst)[x / 4 * 6 + c[4]] = ((uint8_t *)src[1])[x >> 2];
629         ((uint8_t *)dst)[x / 4 * 6 + c[5]] = ((uint8_t *)src[2])[x >> 2];
630     }
631 }
632 
633 
un_p411_8(void * src,void * dst[],int w,uint8_t * c)634 static void un_p411_8(void *src, void *dst[], int w, uint8_t *c)
635 {
636     for (int x = 0; x < w; x += 4) {
637         ((uint8_t *)dst[0])[x + 0]  = ((uint8_t *)src)[x / 4 * 6 + c[0]];
638         ((uint8_t *)dst[0])[x + 1]  = ((uint8_t *)src)[x / 4 * 6 + c[1]];
639         ((uint8_t *)dst[0])[x + 2]  = ((uint8_t *)src)[x / 4 * 6 + c[2]];
640         ((uint8_t *)dst[0])[x + 3]  = ((uint8_t *)src)[x / 4 * 6 + c[3]];
641         ((uint8_t *)dst[1])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[4]];
642         ((uint8_t *)dst[2])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[5]];
643     }
644 }
645 
fringe_yuv_repack(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)646 static void fringe_yuv_repack(struct mp_repack *rp,
647                               struct mp_image *a, int a_x, int a_y,
648                               struct mp_image *b, int b_x, int b_y, int w)
649 {
650     void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
651 
652     void *pb[4] = {0};
653     for (int p = 0; p < b->num_planes; p++)
654         pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
655 
656     rp->repack_fringe_yuv(pa, pb, w, rp->comp_map);
657 }
658 
setup_fringe_yuv_packer(struct mp_repack * rp)659 static void setup_fringe_yuv_packer(struct mp_repack *rp)
660 {
661     struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
662     if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV) ||
663         mp_imgfmt_desc_get_num_comps(&desc) != 3 ||
664         desc.align_x > 4)
665         return;
666 
667     uint8_t y_loc[4];
668     if (!mp_imgfmt_get_packed_yuv_locations(desc.id, y_loc))
669         return;
670 
671     for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
672         if (!desc.comps[n].size)
673             continue;
674         if (desc.comps[n].size != desc.comps[0].size ||
675             desc.comps[n].pad < 0 ||
676             desc.comps[n].offset % desc.comps[0].size)
677             return;
678         if (n == 1 || n == 2) {
679             rp->comp_map[4 + (n - 1)] =
680                 desc.comps[n].offset / desc.comps[0].size;
681         }
682     }
683     for (int n = 0; n < desc.align_x; n++) {
684         if (y_loc[n] % desc.comps[0].size)
685             return;
686         rp->comp_map[n] = y_loc[n] / desc.comps[0].size;
687     }
688 
689     if (desc.comps[0].size == 8 && desc.align_x == 2) {
690         rp->repack_fringe_yuv = rp->pack ? pa_p422_8 : un_p422_8;
691     } else if (desc.comps[0].size == 16 && desc.align_x == 2) {
692         rp->repack_fringe_yuv = rp->pack ? pa_p422_16 : un_p422_16;
693     } else if (desc.comps[0].size == 8 && desc.align_x == 4) {
694         rp->repack_fringe_yuv = rp->pack ? pa_p411_8 : un_p411_8;
695     }
696 
697     if (!rp->repack_fringe_yuv)
698         return;
699 
700     struct mp_regular_imgfmt yuvfmt = {
701         .component_type = MP_COMPONENT_TYPE_UINT,
702         // NB: same problem with P010 and not clearing padding.
703         .component_size = desc.comps[0].size / 8u,
704         .num_planes = 3,
705         .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
706         .chroma_xs = desc.chroma_xs,
707         .chroma_ys = 0,
708     };
709     rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt);
710     rp->repack = fringe_yuv_repack;
711 
712     if (desc.endian_shift) {
713         rp->endian_size = 1 << desc.endian_shift;
714         assert(rp->endian_size == 2);
715     }
716 }
717 
repack_nv(struct mp_repack * rp,struct mp_image * a,int a_x,int a_y,struct mp_image * b,int b_x,int b_y,int w)718 static void repack_nv(struct mp_repack *rp,
719                       struct mp_image *a, int a_x, int a_y,
720                       struct mp_image *b, int b_x, int b_y, int w)
721 {
722     int xs = a->fmt.chroma_xs;
723 
724     uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y);
725 
726     void *pb[2];
727     for (int p = 0; p < 2; p++) {
728         int s = rp->components[p];
729         pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
730     }
731 
732     rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs);
733 }
734 
setup_nv_packer(struct mp_repack * rp)735 static void setup_nv_packer(struct mp_repack *rp)
736 {
737     struct mp_regular_imgfmt desc;
738     if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a))
739         return;
740 
741     // Check for NV.
742     if (desc.num_planes != 2)
743         return;
744     if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
745         return;
746     if (desc.planes[1].num_components != 2)
747         return;
748     int cr0 = desc.planes[1].components[0];
749     int cr1 = desc.planes[1].components[1];
750     if (cr0 > cr1)
751         MPSWAP(int, cr0, cr1);
752     if (cr0 != 2 || cr1 != 3)
753         return;
754 
755     // Construct equivalent planar format.
756     struct mp_regular_imgfmt desc2 = desc;
757     desc2.num_planes = 3;
758     desc2.planes[1].num_components = 1;
759     desc2.planes[1].components[0] = 2;
760     desc2.planes[2].num_components = 1;
761     desc2.planes[2].components[0] = 3;
762     // For P010. Strangely this concept exists only for the NV format.
763     if (desc2.component_pad > 0)
764         desc2.component_pad = 0;
765 
766     int planar_fmt = mp_find_regular_imgfmt(&desc2);
767     if (!planar_fmt)
768         return;
769 
770     for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
771         const struct regular_repacker *pa = &regular_repackers[i];
772 
773         void (*repack_cb)(void *pa, void *pb[], int w) =
774             rp->pack ? pa->pa_scanline : pa->un_scanline;
775 
776         if (pa->packed_width != desc.component_size * 2 * 8 ||
777             pa->component_width != desc.component_size * 8 ||
778             pa->num_components != 2 ||
779             pa->prepadding != 0 ||
780             !repack_cb)
781             continue;
782 
783         rp->repack = repack_nv;
784         rp->passthrough_y = true;
785         rp->packed_repack_scanline = repack_cb;
786         rp->imgfmt_b = planar_fmt;
787         rp->components[0] = desc.planes[1].components[0] - 1;
788         rp->components[1] = desc.planes[1].components[1] - 1;
789         return;
790     }
791 }
792 
793 #define PA_F32(name, packed_t)                                              \
794     static void name(void *dst, float *src, int w, float m, float o,        \
795                      uint32_t p_max) {                                      \
796         for (int x = 0; x < w; x++) {                                       \
797             ((packed_t *)dst)[x] =                                          \
798                 MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max);       \
799         }                                                                   \
800     }
801 
802 #define UN_F32(name, packed_t)                                              \
803     static void name(void *src, float *dst, int w, float m, float o,        \
804                      uint32_t unused) {                                     \
805         for (int x = 0; x < w; x++)                                         \
806             dst[x] = ((packed_t *)src)[x] * m + o;                          \
807     }
808 
PA_F32(pa_f32_8,uint8_t)809 PA_F32(pa_f32_8, uint8_t)
810 UN_F32(un_f32_8, uint8_t)
811 PA_F32(pa_f32_16, uint16_t)
812 UN_F32(un_f32_16, uint16_t)
813 
814 // In all this, float counts as "unpacked".
815 static void repack_float(struct mp_repack *rp,
816                          struct mp_image *a, int a_x, int a_y,
817                          struct mp_image *b, int b_x, int b_y, int w)
818 {
819     assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2);
820 
821     void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max)
822         = rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16)
823                    : (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16);
824 
825     for (int p = 0; p < b->num_planes; p++) {
826         int h = (1 << b->fmt.chroma_ys) - (1 << b->fmt.ys[p]) + 1;
827         for (int y = 0; y < h; y++) {
828             void *pa = mp_image_pixel_ptr_ny(a, p, a_x, a_y + y);
829             void *pb = mp_image_pixel_ptr_ny(b, p, b_x, b_y + y);
830 
831             packer(pa, pb, w >> b->fmt.xs[p], rp->f32_m[p], rp->f32_o[p],
832                    rp->f32_pmax[p]);
833         }
834     }
835 }
836 
update_repack_float(struct mp_repack * rp)837 static void update_repack_float(struct mp_repack *rp)
838 {
839     if (!rp->f32_comp_size)
840         return;
841 
842     // Image in input format.
843     struct mp_image *ui =  rp->pack ? rp->steps[rp->num_steps - 1].buf[1]
844                                     : rp->steps[0].buf[0];
845     enum mp_csp csp = ui->params.color.space;
846     enum mp_csp_levels levels = ui->params.color.levels;
847     if (rp->f32_csp_space == csp && rp->f32_csp_levels == levels)
848         return;
849 
850     // The fixed point format.
851     struct mp_regular_imgfmt desc = {0};
852     mp_get_regular_imgfmt(&desc, rp->imgfmt_b);
853     assert(desc.component_size);
854 
855     int comp_bits = desc.component_size * 8 + MPMIN(desc.component_pad, 0);
856     for (int p = 0; p < desc.num_planes; p++) {
857         double m, o;
858         mp_get_csp_uint_mul(csp, levels, comp_bits, desc.planes[p].components[0],
859                             &m, &o);
860         rp->f32_m[p] = rp->pack ? 1.0 / m : m;
861         rp->f32_o[p] = rp->pack ? -o      : o;
862         rp->f32_pmax[p] = (1u << comp_bits) - 1;
863     }
864 
865     rp->f32_csp_space = csp;
866     rp->f32_csp_levels = levels;
867 }
868 
repack_line(struct mp_repack * rp,int dst_x,int dst_y,int src_x,int src_y,int w)869 void repack_line(struct mp_repack *rp, int dst_x, int dst_y,
870                  int src_x, int src_y, int w)
871 {
872     assert(rp->configured);
873 
874     struct repack_step *first = &rp->steps[0];
875     struct repack_step *last = &rp->steps[rp->num_steps - 1];
876 
877     assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0);
878     assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x));
879     assert(src_x + w <= MP_ALIGN_UP(first->buf[0]->w, first->fmt[0].align_x));
880     assert(dst_y < last->buf[1]->h);
881     assert(src_y < first->buf[0]->h);
882     assert(!(dst_x & (last->fmt[1].align_x - 1)));
883     assert(!(src_x & (first->fmt[0].align_x - 1)));
884     assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1)));
885     assert(!(dst_y & (last->fmt[1].align_y - 1)));
886     assert(!(src_y & (first->fmt[0].align_y - 1)));
887 
888     for (int n = 0; n < rp->num_steps; n++) {
889         struct repack_step *rs = &rp->steps[n];
890 
891         // When writing to temporary buffers, always write to the start (maybe
892         // helps with locality).
893         int sx = rs->user_buf[0] ? src_x : 0;
894         int sy = rs->user_buf[0] ? src_y : 0;
895         int dx = rs->user_buf[1] ? dst_x : 0;
896         int dy = rs->user_buf[1] ? dst_y : 0;
897 
898         struct mp_image *buf_a = rs->buf[rp->pack];
899         struct mp_image *buf_b = rs->buf[!rp->pack];
900         int a_x = rp->pack ? dx : sx;
901         int a_y = rp->pack ? dy : sy;
902         int b_x = rp->pack ? sx : dx;
903         int b_y = rp->pack ? sy : dy;
904 
905         switch (rs->type) {
906         case REPACK_STEP_REPACK: {
907             if (rp->repack)
908                 rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
909 
910             for (int p = 0; p < rs->fmt[0].num_planes; p++) {
911                 if (rp->copy_buf[p])
912                     copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p);
913             }
914             break;
915         }
916         case REPACK_STEP_ENDIAN:
917             swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w,
918                         rp->endian_size);
919             break;
920         case REPACK_STEP_FLOAT:
921             repack_float(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
922             break;
923         }
924     }
925 }
926 
setup_format_ne(struct mp_repack * rp)927 static bool setup_format_ne(struct mp_repack *rp)
928 {
929     if (!rp->imgfmt_b)
930         setup_nv_packer(rp);
931     if (!rp->imgfmt_b)
932         setup_misc_packer(rp);
933     if (!rp->imgfmt_b)
934         setup_packed_packer(rp);
935     if (!rp->imgfmt_b)
936         setup_fringe_rgb_packer(rp);
937     if (!rp->imgfmt_b)
938         setup_fringe_yuv_packer(rp);
939     if (!rp->imgfmt_b)
940         rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all
941 
942     struct mp_regular_imgfmt desc;
943     if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b))
944         return false;
945 
946     // no weird stuff
947     if (desc.num_planes > 4)
948         return false;
949 
950     // Endian swapping.
951     if (rp->imgfmt_a != rp->imgfmt_user &&
952         rp->imgfmt_a == mp_find_other_endian(rp->imgfmt_user))
953     {
954         struct mp_imgfmt_desc desc_a = mp_imgfmt_get_desc(rp->imgfmt_a);
955         struct mp_imgfmt_desc desc_u = mp_imgfmt_get_desc(rp->imgfmt_user);
956         rp->endian_size = 1 << desc_u.endian_shift;
957         if (!desc_a.endian_shift && rp->endian_size != 2 && rp->endian_size != 4)
958             return false;
959     }
960 
961     // Accept only true planar formats (with known components and no padding).
962     for (int n = 0; n < desc.num_planes; n++) {
963         if (desc.planes[n].num_components != 1)
964             return false;
965         int c = desc.planes[n].components[0];
966         if (c < 1 || c > 4)
967             return false;
968     }
969 
970     rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a);
971     rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b);
972 
973     // This is if we did a pack step.
974 
975     if (rp->flags & REPACK_CREATE_PLANAR_F32) {
976         // imgfmt_b with float32 component type.
977         struct mp_regular_imgfmt fdesc = desc;
978         fdesc.component_type = MP_COMPONENT_TYPE_FLOAT;
979         fdesc.component_size = 4;
980         fdesc.component_pad = 0;
981         int ffmt = mp_find_regular_imgfmt(&fdesc);
982         if (!ffmt)
983             return false;
984         if (ffmt != rp->imgfmt_b) {
985             if (desc.component_type != MP_COMPONENT_TYPE_UINT ||
986                 (desc.component_size != 1 && desc.component_size != 2))
987                 return false;
988             rp->f32_comp_size = desc.component_size;
989             rp->f32_csp_space = MP_CSP_COUNT;
990             rp->f32_csp_levels = MP_CSP_LEVELS_COUNT;
991             rp->steps[rp->num_steps++] = (struct repack_step) {
992                 .type = REPACK_STEP_FLOAT,
993                 .fmt = {
994                     mp_imgfmt_get_desc(ffmt),
995                     rp->fmt_b,
996                 },
997             };
998         }
999     }
1000 
1001     rp->steps[rp->num_steps++] = (struct repack_step) {
1002         .type = REPACK_STEP_REPACK,
1003         .fmt = { rp->fmt_b, rp->fmt_a },
1004     };
1005 
1006     if (rp->endian_size) {
1007         rp->steps[rp->num_steps++] = (struct repack_step) {
1008             .type = REPACK_STEP_ENDIAN,
1009             .fmt = {
1010                 rp->fmt_a,
1011                 mp_imgfmt_get_desc(rp->imgfmt_user),
1012             },
1013         };
1014     }
1015 
1016     // Reverse if unpack (to reflect actual data flow)
1017     if (!rp->pack) {
1018         for (int n = 0; n < rp->num_steps / 2; n++) {
1019             MPSWAP(struct repack_step, rp->steps[n],
1020                    rp->steps[rp->num_steps - 1 - n]);
1021         }
1022         for (int n = 0; n < rp->num_steps; n++) {
1023             struct repack_step *rs = &rp->steps[n];
1024             MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]);
1025         }
1026     }
1027 
1028     for (int n = 0; n < rp->num_steps - 1; n++)
1029         assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id);
1030 
1031     return true;
1032 }
1033 
reset_params(struct mp_repack * rp)1034 static void reset_params(struct mp_repack *rp)
1035 {
1036     rp->num_steps = 0;
1037     rp->imgfmt_b = 0;
1038     rp->repack = NULL;
1039     rp->passthrough_y = false;
1040     rp->endian_size = 0;
1041     rp->packed_repack_scanline = NULL;
1042     rp->comp_size = 0;
1043     talloc_free(rp->comp_lut);
1044     rp->comp_lut = NULL;
1045 }
1046 
setup_format(struct mp_repack * rp)1047 static bool setup_format(struct mp_repack *rp)
1048 {
1049     reset_params(rp);
1050     rp->imgfmt_a = rp->imgfmt_user;
1051     if (setup_format_ne(rp))
1052         return true;
1053     // Try reverse endian.
1054     reset_params(rp);
1055     rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user);
1056     return rp->imgfmt_a && setup_format_ne(rp);
1057 }
1058 
mp_repack_create_planar(int imgfmt,bool pack,int flags)1059 struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags)
1060 {
1061     struct mp_repack *rp = talloc_zero(NULL, struct mp_repack);
1062     rp->imgfmt_user = imgfmt;
1063     rp->pack = pack;
1064     rp->flags = flags;
1065 
1066     if (!setup_format(rp)) {
1067         talloc_free(rp);
1068         return NULL;
1069     }
1070 
1071     return rp;
1072 }
1073 
mp_repack_get_format_src(struct mp_repack * rp)1074 int mp_repack_get_format_src(struct mp_repack *rp)
1075 {
1076     return rp->steps[0].fmt[0].id;
1077 }
1078 
mp_repack_get_format_dst(struct mp_repack * rp)1079 int mp_repack_get_format_dst(struct mp_repack *rp)
1080 {
1081     return rp->steps[rp->num_steps - 1].fmt[1].id;
1082 }
1083 
mp_repack_get_align_x(struct mp_repack * rp)1084 int mp_repack_get_align_x(struct mp_repack *rp)
1085 {
1086     // We really want the LCM between those, but since only one of them is
1087     // packed (or they're the same format), and the chroma subsampling is the
1088     // same for both, only the packed one matters.
1089     return rp->fmt_a.align_x;
1090 }
1091 
mp_repack_get_align_y(struct mp_repack * rp)1092 int mp_repack_get_align_y(struct mp_repack *rp)
1093 {
1094     return rp->fmt_a.align_y; // should be the same for packed/planar formats
1095 }
1096 
image_realloc(struct mp_image ** img,int fmt,int w,int h)1097 static void image_realloc(struct mp_image **img, int fmt, int w, int h)
1098 {
1099     if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h)
1100         return;
1101     talloc_free(*img);
1102     *img = mp_image_alloc(fmt, w, h);
1103 }
1104 
repack_config_buffers(struct mp_repack * rp,int dst_flags,struct mp_image * dst,int src_flags,struct mp_image * src,bool * enable_passthrough)1105 bool repack_config_buffers(struct mp_repack *rp,
1106                            int dst_flags, struct mp_image *dst,
1107                            int src_flags, struct mp_image *src,
1108                            bool *enable_passthrough)
1109 {
1110     struct repack_step *rs_first = &rp->steps[0];
1111     struct repack_step *rs_last = &rp->steps[rp->num_steps - 1];
1112 
1113     rp->configured = false;
1114 
1115     assert(dst && src);
1116 
1117     int buf_w = MPMAX(dst->w, src->w);
1118 
1119     assert(dst->imgfmt == rs_last->fmt[1].id);
1120     assert(src->imgfmt == rs_first->fmt[0].id);
1121 
1122     // Chain/allocate buffers.
1123 
1124     for (int n = 0; n < rp->num_steps; n++)
1125         rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL;
1126 
1127     rs_first->buf[0] = src;
1128     rs_last->buf[1] = dst;
1129 
1130     for (int n = 0; n < rp->num_steps; n++) {
1131         struct repack_step *rs = &rp->steps[n];
1132 
1133         if (!rs->buf[0]) {
1134             assert(n > 0);
1135             rs->buf[0] = rp->steps[n - 1].buf[1];
1136         }
1137 
1138         if (rs->buf[1])
1139             continue;
1140 
1141         // Note: since repack_line() can have different src/dst offsets, we
1142         //       can't do true in-place in general.
1143         bool can_inplace = rs->type == REPACK_STEP_ENDIAN &&
1144                            rs->buf[0] != src && rs->buf[0] != dst;
1145         if (can_inplace) {
1146             rs->buf[1] = rs->buf[0];
1147             continue;
1148         }
1149 
1150         if (rs != rs_last) {
1151             struct repack_step *next = &rp->steps[n + 1];
1152             if (next->buf[0]) {
1153                 rs->buf[1] = next->buf[0];
1154                 continue;
1155             }
1156         }
1157 
1158         image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y);
1159         if (!rs->tmp)
1160             return false;
1161         talloc_steal(rp, rs->tmp);
1162         rs->buf[1] = rs->tmp;
1163     }
1164 
1165     for (int n = 0; n < rp->num_steps; n++) {
1166         struct repack_step *rs = &rp->steps[n];
1167         rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst;
1168         rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst;
1169     }
1170 
1171     // If repacking is the only operation. It's also responsible for simply
1172     // copying src to dst if absolutely no filtering is done.
1173     bool may_passthrough =
1174         rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK;
1175 
1176     for (int p = 0; p < rp->fmt_b.num_planes; p++) {
1177         // (All repack callbacks copy, except nv12 does not copy luma.)
1178         bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0);
1179 
1180         bool can_pt = may_passthrough && !repack_copies_plane &&
1181                       enable_passthrough && enable_passthrough[p];
1182 
1183         // Copy if needed, unless the repack callback does it anyway.
1184         rp->copy_buf[p] = !repack_copies_plane && !can_pt;
1185 
1186         if (enable_passthrough)
1187             enable_passthrough[p] = can_pt && !rp->copy_buf[p];
1188     }
1189 
1190     if (enable_passthrough) {
1191         for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++)
1192             enable_passthrough[n] = false;
1193     }
1194 
1195     update_repack_float(rp);
1196 
1197     rp->configured = true;
1198 
1199     return true;
1200 }
1201