1 /*
2  * This file is part of mpv.
3  *
4  * mpv is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * mpv is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stddef.h>
19 #include <stdbool.h>
20 #include <assert.h>
21 #include <math.h>
22 #include <inttypes.h>
23 
24 #include "common/common.h"
25 #include "draw_bmp.h"
26 #include "img_convert.h"
27 #include "video/mp_image.h"
28 #include "video/repack.h"
29 #include "video/sws_utils.h"
30 #include "video/img_format.h"
31 #include "video/csputils.h"
32 
33 const bool mp_draw_sub_formats[SUBBITMAP_COUNT] = {
34     [SUBBITMAP_LIBASS] = true,
35     [SUBBITMAP_RGBA] = true,
36 };
37 
38 struct part {
39     int change_id;
40     // Sub-bitmaps scaled to final sizes.
41     int num_imgs;
42     struct mp_image **imgs;
43 };
44 
45 // Must be a power of 2. Height is 1, but mark_rect() effectively operates on
46 // multiples of chroma sized macro-pixels. (E.g. 4:2:0 -> every second line is
47 // the same as the previous one, and x0%2==x1%2==0.)
48 #define SLICE_W 256u
49 
50 // Whether to scale in tiles. Faster, but can't use correct chroma position.
51 // Should be a runtime option. SLICE_W is used as tile width. The tile size
52 // should probably be small; too small or too big will cause overhead when
53 // scaling.
54 #define SCALE_IN_TILES 1
55 #define TILE_H 4u
56 
57 struct slice {
58     uint16_t x0, x1;
59 };
60 
61 struct mp_draw_sub_cache
62 {
63     struct mpv_global *global;
64 
65     // Possibly cached parts. Also implies what's in the video_overlay.
66     struct part parts[MAX_OSD_PARTS];
67     int64_t change_id;
68 
69     struct mp_image_params params;  // target image params
70 
71     int w, h;                       // like params.w/h, but rounded up to chroma
72     unsigned align_x, align_y;      // alignment for all video pixels
73 
74     struct mp_image *rgba_overlay;  // all OSD in RGBA
75     struct mp_image *video_overlay; // rgba_overlay converted to video colorspace
76     struct mp_image *alpha_overlay; // alpha plane ref. to video_overlay
77     struct mp_image *calpha_overlay; // alpha_overlay scaled to chroma plane size
78 
79     unsigned s_w;                   // number of slices per line
80     struct slice *slices;           // slices[y * s_w + x / SLICE_W]
81     bool any_osd;
82 
83     struct mp_sws_context *rgba_to_overlay; // scaler for rgba -> video csp.
84     struct mp_sws_context *alpha_to_calpha; // scaler for overlay -> calpha
85     bool scale_in_tiles;
86 
87     struct mp_sws_context *sub_scale; // scaler for SUBBITMAP_RGBA
88 
89     struct mp_repack *overlay_to_f32; // convert video_overlay to float
90     struct mp_image *overlay_tmp;   // slice in float32
91 
92     struct mp_repack *calpha_to_f32; // convert video_overlay to float
93     struct mp_image *calpha_tmp;    // slice in float32
94 
95     struct mp_repack *video_to_f32; // convert video to float
96     struct mp_repack *video_from_f32; // convert float back to video
97     struct mp_image *video_tmp;     // slice in float32
98 
99     struct mp_sws_context *premul;  // video -> premultiplied video
100     struct mp_sws_context *unpremul; // reverse
101     struct mp_image *premul_tmp;
102 
103     // Function that works on the _f32 data.
104     void (*blend_line)(void *dst, void *src, void *src_a, int w);
105 
106     struct mp_image res_overlay;    // returned by mp_draw_sub_overlay()
107 };
108 
blend_line_f32(void * dst,void * src,void * src_a,int w)109 static void blend_line_f32(void *dst, void *src, void *src_a, int w)
110 {
111     float *dst_f = dst;
112     float *src_f = src;
113     float *src_a_f = src_a;
114 
115     for (int x = 0; x < w; x++)
116         dst_f[x] = src_f[x] + dst_f[x] * (1.0f - src_a_f[x]);
117 }
118 
blend_line_u8(void * dst,void * src,void * src_a,int w)119 static void blend_line_u8(void *dst, void *src, void *src_a, int w)
120 {
121     uint8_t *dst_i = dst;
122     uint8_t *src_i = src;
123     uint8_t *src_a_i = src_a;
124 
125     for (int x = 0; x < w; x++)
126         dst_i[x] = src_i[x] + dst_i[x] * (255u - src_a_i[x]) / 255u;
127 }
128 
blend_slice(struct mp_draw_sub_cache * p)129 static void blend_slice(struct mp_draw_sub_cache *p)
130 {
131     struct mp_image *ov = p->overlay_tmp;
132     struct mp_image *ca = p->calpha_tmp;
133     struct mp_image *vid = p->video_tmp;
134 
135     for (int plane = 0; plane < vid->num_planes; plane++) {
136         int xs = vid->fmt.xs[plane];
137         int ys = vid->fmt.ys[plane];
138         int h = (1 << vid->fmt.chroma_ys) - (1 << ys) + 1;
139         int cw = mp_chroma_div_up(vid->w, xs);
140         for (int y = 0; y < h; y++) {
141             p->blend_line(mp_image_pixel_ptr_ny(vid, plane, 0, y),
142                           mp_image_pixel_ptr_ny(ov, plane, 0, y),
143                           xs || ys ? mp_image_pixel_ptr_ny(ca, 0, 0, y)
144                             : mp_image_pixel_ptr_ny(ov, ov->num_planes - 1, 0, y),
145                           cw);
146         }
147     }
148 }
149 
blend_overlay_with_video(struct mp_draw_sub_cache * p,struct mp_image * dst)150 static bool blend_overlay_with_video(struct mp_draw_sub_cache *p,
151                                      struct mp_image *dst)
152 {
153     if (!repack_config_buffers(p->video_to_f32, 0, p->video_tmp, 0, dst, NULL))
154         return false;
155     if (!repack_config_buffers(p->video_from_f32, 0, dst, 0, p->video_tmp, NULL))
156         return false;
157 
158     int xs = dst->fmt.chroma_xs;
159     int ys = dst->fmt.chroma_ys;
160 
161     for (int y = 0; y < dst->h; y += p->align_y) {
162         struct slice *line = &p->slices[y * p->s_w];
163 
164         for (int sx = 0; sx < p->s_w; sx++) {
165             struct slice *s = &line[sx];
166 
167             int w = s->x1 - s->x0;
168             if (w <= 0)
169                 continue;
170             int x = sx * SLICE_W + s->x0;
171 
172             assert(MP_IS_ALIGNED(x, p->align_x));
173             assert(MP_IS_ALIGNED(w, p->align_x));
174             assert(x + w <= p->w);
175 
176             repack_line(p->overlay_to_f32, 0, 0, x, y, w);
177             repack_line(p->video_to_f32, 0, 0, x, y, w);
178             if (p->calpha_to_f32)
179                 repack_line(p->calpha_to_f32, 0, 0, x >> xs, y >> ys, w >> xs);
180 
181             blend_slice(p);
182 
183             repack_line(p->video_from_f32, x, y, 0, 0, w);
184         }
185     }
186 
187     return true;
188 }
189 
convert_overlay_part(struct mp_draw_sub_cache * p,int x0,int y0,int w,int h)190 static bool convert_overlay_part(struct mp_draw_sub_cache *p,
191                                  int x0, int y0, int w, int h)
192 {
193     struct mp_image src = *p->rgba_overlay;
194     struct mp_image dst = *p->video_overlay;
195 
196     mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
197     mp_image_crop(&dst, x0, y0, x0 + w, y0 + h);
198 
199     if (mp_sws_scale(p->rgba_to_overlay, &dst, &src) < 0)
200         return false;
201 
202     if (p->calpha_overlay) {
203         src = *p->alpha_overlay;
204         dst = *p->calpha_overlay;
205 
206         int xs = p->video_overlay->fmt.chroma_xs;
207         int ys = p->video_overlay->fmt.chroma_ys;
208         mp_image_crop(&src, x0, y0, x0 + w, y0 + h);
209         mp_image_crop(&dst, x0 >> xs, y0 >> ys, (x0 + w) >> xs, (y0 + h) >> ys);
210 
211         if (mp_sws_scale(p->alpha_to_calpha, &dst, &src) < 0)
212             return false;
213     }
214 
215     return true;
216 }
217 
convert_to_video_overlay(struct mp_draw_sub_cache * p)218 static bool convert_to_video_overlay(struct mp_draw_sub_cache *p)
219 {
220     if (!p->video_overlay)
221         return true;
222 
223     if (p->scale_in_tiles) {
224         int t_h = p->rgba_overlay->h / TILE_H;
225         for (int ty = 0; ty < t_h; ty++) {
226             for (int sx = 0; sx < p->s_w; sx++) {
227                 struct slice *s = &p->slices[ty * TILE_H * p->s_w + sx];
228                 bool pixels_set = false;
229                 for (int y = 0; y < TILE_H; y++) {
230                     if (s[0].x0 < s[0].x1) {
231                         pixels_set = true;
232                         break;
233                     }
234                     s += p->s_w;
235                 }
236                 if (!pixels_set)
237                     continue;
238                 if (!convert_overlay_part(p, sx * SLICE_W, ty * TILE_H,
239                                           SLICE_W, TILE_H))
240                     return false;
241             }
242         }
243     } else {
244         if (!convert_overlay_part(p, 0, 0, p->rgba_overlay->w, p->rgba_overlay->h))
245             return false;
246     }
247 
248     return true;
249 }
250 
251 // Mark the given rectangle of pixels as possibly non-transparent.
252 // The rectangle must have been pre-clipped.
mark_rect(struct mp_draw_sub_cache * p,int x0,int y0,int x1,int y1)253 static void mark_rect(struct mp_draw_sub_cache *p, int x0, int y0, int x1, int y1)
254 {
255     x0 = MP_ALIGN_DOWN(x0, p->align_x);
256     y0 = MP_ALIGN_DOWN(y0, p->align_y);
257     x1 = MP_ALIGN_UP(x1, p->align_x);
258     y1 = MP_ALIGN_UP(y1, p->align_y);
259 
260     assert(x0 >= 0 && x0 <= x1 && x1 <= p->w);
261     assert(y0 >= 0 && y0 <= y1 && y1 <= p->h);
262 
263     int sx0 = x0 / SLICE_W;
264     int sx1 = x1 / SLICE_W;
265 
266     for (int y = y0; y < y1; y++) {
267         struct slice *line = &p->slices[y * p->s_w];
268 
269         struct slice *s0 = &line[sx0];
270         struct slice *s1 = &line[sx1];
271 
272         s0->x0 = MPMIN(s0->x0, x0 % SLICE_W);
273         s1->x1 = MPMAX(s1->x1, x1 % SLICE_W);
274 
275         if (s0 != s1) {
276             s0->x1 = SLICE_W;
277             s1->x0 = 0;
278 
279             for (int x = sx0 + 1; x < sx1; x++) {
280                 struct slice *s = &line[x];
281                 s->x0 = 0;
282                 s->x1 = SLICE_W;
283             }
284         }
285 
286         p->any_osd = true;
287     }
288 }
289 
draw_ass_rgba(uint8_t * dst,ptrdiff_t dst_stride,uint8_t * src,ptrdiff_t src_stride,int w,int h,uint32_t color)290 static void draw_ass_rgba(uint8_t *dst, ptrdiff_t dst_stride,
291                           uint8_t *src, ptrdiff_t src_stride,
292                           int w, int h, uint32_t color)
293 {
294     const unsigned int r = (color >> 24) & 0xff;
295     const unsigned int g = (color >> 16) & 0xff;
296     const unsigned int b = (color >>  8) & 0xff;
297     const unsigned int a = 0xff - (color & 0xff);
298 
299     for (int y = 0; y < h; y++) {
300         uint32_t *dstrow = (uint32_t *) dst;
301         for (int x = 0; x < w; x++) {
302             const unsigned int v = src[x];
303             unsigned int aa = a * v;
304             uint32_t dstpix = dstrow[x];
305             unsigned int dstb =  dstpix        & 0xFF;
306             unsigned int dstg = (dstpix >>  8) & 0xFF;
307             unsigned int dstr = (dstpix >> 16) & 0xFF;
308             unsigned int dsta = (dstpix >> 24) & 0xFF;
309             dstb = (v * b * a   + dstb * (255 * 255 - aa)) / (255 * 255);
310             dstg = (v * g * a   + dstg * (255 * 255 - aa)) / (255 * 255);
311             dstr = (v * r * a   + dstr * (255 * 255 - aa)) / (255 * 255);
312             dsta = (aa * 255    + dsta * (255 * 255 - aa)) / (255 * 255);
313             dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
314         }
315         dst += dst_stride;
316         src += src_stride;
317     }
318 }
319 
render_ass(struct mp_draw_sub_cache * p,struct sub_bitmaps * sb)320 static void render_ass(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
321 {
322     assert(sb->format == SUBBITMAP_LIBASS);
323 
324     for (int i = 0; i < sb->num_parts; i++) {
325         struct sub_bitmap *s = &sb->parts[i];
326 
327         draw_ass_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, s->x, s->y),
328                       p->rgba_overlay->stride[0], s->bitmap, s->stride,
329                       s->w, s->h, s->libass.color);
330 
331         mark_rect(p, s->x, s->y, s->x + s->w, s->y + s->h);
332     }
333 }
334 
draw_rgba(uint8_t * dst,ptrdiff_t dst_stride,uint8_t * src,ptrdiff_t src_stride,int w,int h)335 static void draw_rgba(uint8_t *dst, ptrdiff_t dst_stride,
336                       uint8_t *src, ptrdiff_t src_stride, int w, int h)
337 {
338     for (int y = 0; y < h; y++) {
339         uint32_t *srcrow = (uint32_t *)src;
340         uint32_t *dstrow = (uint32_t *)dst;
341         for (int x = 0; x < w; x++) {
342             uint32_t srcpix = srcrow[x];
343             uint32_t dstpix = dstrow[x];
344             unsigned int srcb =  srcpix        & 0xFF;
345             unsigned int srcg = (srcpix >>  8) & 0xFF;
346             unsigned int srcr = (srcpix >> 16) & 0xFF;
347             unsigned int srca = (srcpix >> 24) & 0xFF;
348             unsigned int dstb =  dstpix        & 0xFF;
349             unsigned int dstg = (dstpix >>  8) & 0xFF;
350             unsigned int dstr = (dstpix >> 16) & 0xFF;
351             unsigned int dsta = (dstpix >> 24) & 0xFF;
352             dstb = srcb + dstb * (255 * 255 - srca) / (255 * 255);
353             dstg = srcg + dstg * (255 * 255 - srca) / (255 * 255);
354             dstr = srcr + dstr * (255 * 255 - srca) / (255 * 255);
355             dsta = srca + dsta * (255 * 255 - srca) / (255 * 255);
356             dstrow[x] = dstb | (dstg << 8) | (dstr << 16) | (dsta << 24);
357         }
358         dst += dst_stride;
359         src += src_stride;
360     }
361 }
362 
render_rgba(struct mp_draw_sub_cache * p,struct part * part,struct sub_bitmaps * sb)363 static bool render_rgba(struct mp_draw_sub_cache *p, struct part *part,
364                         struct sub_bitmaps *sb)
365 {
366     assert(sb->format == SUBBITMAP_RGBA);
367 
368     if (part->change_id != sb->change_id) {
369         for (int n = 0; n < part->num_imgs; n++)
370             talloc_free(part->imgs[n]);
371         part->num_imgs = sb->num_parts;
372         MP_TARRAY_GROW(p, part->imgs, part->num_imgs);
373         for (int n = 0; n < part->num_imgs; n++)
374             part->imgs[n] = NULL;
375 
376         part->change_id = sb->change_id;
377     }
378 
379     for (int i = 0; i < sb->num_parts; i++) {
380         struct sub_bitmap *s = &sb->parts[i];
381 
382         // Clipping is rare but necessary.
383         int sx0 = s->x;
384         int sy0 = s->y;
385         int sx1 = s->x + s->dw;
386         int sy1 = s->y + s->dh;
387 
388         int x0 = MPCLAMP(sx0, 0, p->w);
389         int y0 = MPCLAMP(sy0, 0, p->h);
390         int x1 = MPCLAMP(sx1, 0, p->w);
391         int y1 = MPCLAMP(sy1, 0, p->h);
392 
393         int dw = x1 - x0;
394         int dh = y1 - y0;
395         if (dw <= 0 || dh <= 0)
396             continue;
397 
398         // We clip the source instead of the scaled image, because that might
399         // avoid excessive memory usage when applying a ridiculous scale factor,
400         // even if that stretches it to up to 1 pixel due to integer rounding.
401         int sx = 0;
402         int sy = 0;
403         int sw = s->w;
404         int sh = s->h;
405         if (x0 != sx0 || y0 != sy0 || x1 != sx1 || y1 != sy1) {
406             double fx = s->dw / (double)s->w;
407             double fy = s->dh / (double)s->h;
408             sx = MPCLAMP((x0 - sx0) / fx, 0, s->w);
409             sy = MPCLAMP((y0 - sy0) / fy, 0, s->h);
410             sw = MPCLAMP(dw / fx, 1, s->w);
411             sh = MPCLAMP(dh / fy, 1, s->h);
412         }
413 
414         assert(sx >= 0 && sw > 0 && sx + sw <= s->w);
415         assert(sy >= 0 && sh > 0 && sy + sh <= s->h);
416 
417         ptrdiff_t s_stride = s->stride;
418         void *s_ptr = (char *)s->bitmap + s_stride * sy + sx * 4;
419 
420         if (dw != sw || dh != sh) {
421             struct mp_image *scaled = part->imgs[i];
422 
423             if (!scaled) {
424                 struct mp_image src_img = {0};
425                 mp_image_setfmt(&src_img, IMGFMT_BGRA);
426                 mp_image_set_size(&src_img, sw, sh);
427                 src_img.planes[0] = s_ptr;
428                 src_img.stride[0] = s_stride;
429                 src_img.params.alpha = MP_ALPHA_PREMUL;
430 
431                 scaled = mp_image_alloc(IMGFMT_BGRA, dw, dh);
432                 if (!scaled)
433                     return false;
434                 part->imgs[i] = talloc_steal(p, scaled);
435                 mp_image_copy_attributes(scaled, &src_img);
436 
437                 if (mp_sws_scale(p->sub_scale, scaled, &src_img) < 0)
438                     return false;
439             }
440 
441             assert(scaled->w == dw);
442             assert(scaled->h == dh);
443 
444             s_stride = scaled->stride[0];
445             s_ptr = scaled->planes[0];
446         }
447 
448         draw_rgba(mp_image_pixel_ptr(p->rgba_overlay, 0, x0, y0),
449                   p->rgba_overlay->stride[0], s_ptr, s_stride, dw, dh);
450 
451         mark_rect(p, x0, y0, x1, y1);
452     }
453 
454     return true;
455 }
456 
render_sb(struct mp_draw_sub_cache * p,struct sub_bitmaps * sb)457 static bool render_sb(struct mp_draw_sub_cache *p, struct sub_bitmaps *sb)
458 {
459     struct part *part = &p->parts[sb->render_index];
460 
461     switch (sb->format) {
462     case SUBBITMAP_LIBASS:
463         render_ass(p, sb);
464         return true;
465     case SUBBITMAP_RGBA:
466         return render_rgba(p, part, sb);
467     }
468 
469     return false;
470 }
471 
clear_rgba_overlay(struct mp_draw_sub_cache * p)472 static void clear_rgba_overlay(struct mp_draw_sub_cache *p)
473 {
474     assert(p->rgba_overlay->imgfmt == IMGFMT_BGRA);
475 
476     for (int y = 0; y < p->rgba_overlay->h; y++) {
477         uint32_t *px = mp_image_pixel_ptr(p->rgba_overlay, 0, 0, y);
478         struct slice *line = &p->slices[y * p->s_w];
479 
480         for (int sx = 0; sx < p->s_w; sx++) {
481             struct slice *s = &line[sx];
482 
483             if (s->x0 <= s->x1) {
484                 memset(px + s->x0, 0, (s->x1 - s->x0) * 4);
485                 *s = (struct slice){SLICE_W, 0};
486             }
487 
488             px += SLICE_W;
489         }
490     }
491 
492     p->any_osd = false;
493 }
494 
alloc_scaler(struct mp_draw_sub_cache * p)495 static struct mp_sws_context *alloc_scaler(struct mp_draw_sub_cache *p)
496 {
497     struct mp_sws_context *s = mp_sws_alloc(p);
498     mp_sws_enable_cmdline_opts(s, p->global);
499     return s;
500 }
501 
init_general(struct mp_draw_sub_cache * p)502 static void init_general(struct mp_draw_sub_cache *p)
503 {
504     p->sub_scale = alloc_scaler(p);
505 
506     p->s_w = MP_ALIGN_UP(p->rgba_overlay->w, SLICE_W) / SLICE_W;
507 
508     p->slices = talloc_zero_array(p, struct slice, p->s_w * p->rgba_overlay->h);
509 
510     mp_image_clear(p->rgba_overlay, 0, 0, p->w, p->h);
511     clear_rgba_overlay(p);
512 }
513 
reinit_to_video(struct mp_draw_sub_cache * p)514 static bool reinit_to_video(struct mp_draw_sub_cache *p)
515 {
516     struct mp_image_params *params = &p->params;
517     mp_image_params_guess_csp(params);
518 
519     bool need_premul = params->alpha != MP_ALPHA_PREMUL &&
520         (mp_imgfmt_get_desc(params->imgfmt).flags & MP_IMGFLAG_ALPHA);
521 
522     // Intermediate format for video_overlay. Requirements:
523     //  - same subsampling as video
524     //  - uses video colorspace
525     //  - has alpha
526     //  - repacker support (to the format used in p->blend_line)
527     //  - probably 8 bit per component rather than something wasteful or strange
528     struct mp_regular_imgfmt vfdesc = {0};
529 
530     int rflags = REPACK_CREATE_EXPAND_8BIT;
531     bool use_shortcut = false;
532 
533     p->video_to_f32 = mp_repack_create_planar(params->imgfmt, false, rflags);
534     talloc_steal(p, p->video_to_f32);
535     if (!p->video_to_f32)
536         return false;
537     mp_get_regular_imgfmt(&vfdesc, mp_repack_get_format_dst(p->video_to_f32));
538     assert(vfdesc.num_planes); // must have succeeded
539 
540     if (params->color.space == MP_CSP_RGB && vfdesc.num_planes >= 3) {
541         use_shortcut = true;
542 
543         if (vfdesc.component_type == MP_COMPONENT_TYPE_UINT &&
544             vfdesc.component_size == 1 && vfdesc.component_pad == 0)
545             p->blend_line = blend_line_u8;
546     }
547 
548     // If no special blender is available, blend in float.
549     if (!p->blend_line) {
550         TA_FREEP(&p->video_to_f32);
551 
552         rflags |= REPACK_CREATE_PLANAR_F32;
553 
554         p->video_to_f32 = mp_repack_create_planar(params->imgfmt, false, rflags);
555         talloc_steal(p, p->video_to_f32);
556         if (!p->video_to_f32)
557             return false;
558 
559         mp_get_regular_imgfmt(&vfdesc, mp_repack_get_format_dst(p->video_to_f32));
560         assert(vfdesc.component_type == MP_COMPONENT_TYPE_FLOAT);
561 
562         p->blend_line = blend_line_f32;
563     }
564 
565     p->scale_in_tiles = SCALE_IN_TILES;
566 
567     int vid_f32_fmt = mp_repack_get_format_dst(p->video_to_f32);
568 
569     p->video_from_f32 = mp_repack_create_planar(params->imgfmt, true, rflags);
570     talloc_steal(p, p->video_from_f32);
571     if (!p->video_from_f32)
572         return false;
573 
574     assert(mp_repack_get_format_dst(p->video_to_f32) ==
575            mp_repack_get_format_src(p->video_from_f32));
576 
577     int overlay_fmt = 0;
578     if (use_shortcut) {
579         // No point in doing anything fancy.
580         overlay_fmt = IMGFMT_BGRA;
581         p->scale_in_tiles = false;
582     } else {
583         struct mp_regular_imgfmt odesc = vfdesc;
584         // Just use 8 bit as well (should be fine, may use less memory).
585         odesc.component_type = MP_COMPONENT_TYPE_UINT;
586         odesc.component_size = 1;
587         odesc.component_pad = 0;
588 
589         // Ensure there's alpha.
590         if (odesc.planes[odesc.num_planes - 1].components[0] != 4) {
591             if (odesc.num_planes >= 4)
592                 return false; // wat
593             odesc.planes[odesc.num_planes++] =
594                 (struct mp_regular_imgfmt_plane){1, {4}};
595         }
596 
597         overlay_fmt = mp_find_regular_imgfmt(&odesc);
598         p->scale_in_tiles = odesc.chroma_xs || odesc.chroma_ys;
599     }
600     if (!overlay_fmt)
601         return false;
602 
603     p->overlay_to_f32 = mp_repack_create_planar(overlay_fmt, false, rflags);
604     talloc_steal(p, p->overlay_to_f32);
605     if (!p->overlay_to_f32)
606         return false;
607 
608     int render_fmt = mp_repack_get_format_dst(p->overlay_to_f32);
609 
610     struct mp_regular_imgfmt ofdesc = {0};
611     mp_get_regular_imgfmt(&ofdesc, render_fmt);
612 
613     if (ofdesc.planes[ofdesc.num_planes - 1].components[0] != 4)
614         return false;
615 
616     // The formats must be the same, minus possible lack of alpha in vfdesc.
617     if (ofdesc.num_planes != vfdesc.num_planes &&
618         ofdesc.num_planes - 1 != vfdesc.num_planes)
619         return false;
620     for (int n = 0; n < vfdesc.num_planes; n++) {
621         if (vfdesc.planes[n].components[0] != ofdesc.planes[n].components[0])
622             return false;
623     }
624 
625     p->align_x = mp_repack_get_align_x(p->video_to_f32);
626     p->align_y = mp_repack_get_align_y(p->video_to_f32);
627 
628     assert(p->align_x >= mp_repack_get_align_x(p->overlay_to_f32));
629     assert(p->align_y >= mp_repack_get_align_y(p->overlay_to_f32));
630 
631     if (p->align_x > SLICE_W || p->align_y > TILE_H)
632         return false;
633 
634     p->w = MP_ALIGN_UP(params->w, p->align_x);
635     int slice_h = p->align_y;
636     p->h = MP_ALIGN_UP(params->h, slice_h);
637 
638     // Size of the overlay. If scaling in tiles, round up to tiles, so we don't
639     // need to reinit the scale for right/bottom tiles.
640     int w = p->w;
641     int h = p->h;
642     if (p->scale_in_tiles) {
643         w = MP_ALIGN_UP(w, SLICE_W);
644         h = MP_ALIGN_UP(h, TILE_H);
645     }
646 
647     p->rgba_overlay = talloc_steal(p, mp_image_alloc(IMGFMT_BGRA, w, h));
648     p->overlay_tmp = talloc_steal(p, mp_image_alloc(render_fmt, SLICE_W, slice_h));
649     p->video_tmp = talloc_steal(p, mp_image_alloc(vid_f32_fmt, SLICE_W, slice_h));
650     if (!p->rgba_overlay || !p->overlay_tmp || !p->video_tmp)
651         return false;
652 
653     mp_image_params_guess_csp(&p->rgba_overlay->params);
654     p->rgba_overlay->params.alpha = MP_ALPHA_PREMUL;
655 
656     p->overlay_tmp->params.color = params->color;
657     p->video_tmp->params.color = params->color;
658 
659     if (p->rgba_overlay->imgfmt == overlay_fmt) {
660         if (!repack_config_buffers(p->overlay_to_f32, 0, p->overlay_tmp,
661                                    0, p->rgba_overlay, NULL))
662             return false;
663     } else {
664         // Generally non-RGB.
665         p->video_overlay = talloc_steal(p, mp_image_alloc(overlay_fmt, w, h));
666         if (!p->video_overlay)
667             return false;
668 
669         p->video_overlay->params.color = params->color;
670         p->video_overlay->params.chroma_location = params->chroma_location;
671         p->video_overlay->params.alpha = MP_ALPHA_PREMUL;
672 
673         if (p->scale_in_tiles)
674             p->video_overlay->params.chroma_location = MP_CHROMA_CENTER;
675 
676         p->rgba_to_overlay = alloc_scaler(p);
677         p->rgba_to_overlay->allow_zimg = true;
678         if (!mp_sws_supports_formats(p->rgba_to_overlay,
679                             p->video_overlay->imgfmt, p->rgba_overlay->imgfmt))
680             return false;
681 
682         if (!repack_config_buffers(p->overlay_to_f32, 0, p->overlay_tmp,
683                                    0, p->video_overlay, NULL))
684             return false;
685 
686         // Setup a scaled alpha plane if chroma-subsampling is present.
687         int xs = p->video_overlay->fmt.chroma_xs;
688         int ys = p->video_overlay->fmt.chroma_ys;
689         if (xs || ys) {
690             // Require float so format selection becomes simpler (maybe).
691             assert(rflags & REPACK_CREATE_PLANAR_F32);
692 
693             // For extracting the alpha plane, construct a gray format that is
694             // compatible with the alpha one.
695             struct mp_regular_imgfmt odesc = {0};
696             mp_get_regular_imgfmt(&odesc, overlay_fmt);
697             assert(odesc.component_size);
698             int aplane = odesc.num_planes - 1;
699             assert(odesc.planes[aplane].num_components == 1);
700             assert(odesc.planes[aplane].components[0] == 4);
701             struct mp_regular_imgfmt cadesc = odesc;
702             cadesc.num_planes = 1;
703             cadesc.planes[0] = (struct mp_regular_imgfmt_plane){1, {1}};
704             cadesc.chroma_xs = cadesc.chroma_ys = 0;
705 
706             int calpha_fmt = mp_find_regular_imgfmt(&cadesc);
707             if (!calpha_fmt)
708                 return false;
709 
710             // Unscaled alpha plane from p->video_overlay.
711             p->alpha_overlay = talloc_zero(p, struct mp_image);
712             mp_image_setfmt(p->alpha_overlay, calpha_fmt);
713             mp_image_set_size(p->alpha_overlay, w, h);
714             p->alpha_overlay->planes[0] = p->video_overlay->planes[aplane];
715             p->alpha_overlay->stride[0] = p->video_overlay->stride[aplane];
716 
717             // Full range gray always has the same range as alpha.
718             p->alpha_overlay->params.color.levels = MP_CSP_LEVELS_PC;
719             mp_image_params_guess_csp(&p->alpha_overlay->params);
720 
721             p->calpha_overlay =
722                 talloc_steal(p, mp_image_alloc(calpha_fmt, w >> xs, h >> ys));
723             if (!p->calpha_overlay)
724                 return false;
725             p->calpha_overlay->params.color = p->alpha_overlay->params.color;
726 
727             p->calpha_to_f32 = mp_repack_create_planar(calpha_fmt, false, rflags);
728             talloc_steal(p, p->calpha_to_f32);
729             if (!p->calpha_to_f32)
730                 return false;
731 
732             int af32_fmt = mp_repack_get_format_dst(p->calpha_to_f32);
733             p->calpha_tmp = talloc_steal(p, mp_image_alloc(af32_fmt, SLICE_W, 1));
734             if (!p->calpha_tmp)
735                 return false;
736 
737             if (!repack_config_buffers(p->calpha_to_f32, 0, p->calpha_tmp,
738                                        0, p->calpha_overlay, NULL))
739                 return false;
740 
741             p->alpha_to_calpha = alloc_scaler(p);
742             if (!mp_sws_supports_formats(p->alpha_to_calpha,
743                                          calpha_fmt, calpha_fmt))
744                 return false;
745         }
746     }
747 
748     if (need_premul) {
749         p->premul = alloc_scaler(p);
750         p->unpremul = alloc_scaler(p);
751         p->premul_tmp = mp_image_alloc(params->imgfmt, params->w, params->h);
752         talloc_steal(p, p->premul_tmp);
753         if (!p->premul_tmp)
754             return false;
755         mp_image_set_params(p->premul_tmp, params);
756         p->premul_tmp->params.alpha = MP_ALPHA_PREMUL;
757 
758         // Only zimg supports this.
759         p->premul->force_scaler = MP_SWS_ZIMG;
760         p->unpremul->force_scaler = MP_SWS_ZIMG;
761     }
762 
763     init_general(p);
764 
765     return true;
766 }
767 
reinit_to_overlay(struct mp_draw_sub_cache * p)768 static bool reinit_to_overlay(struct mp_draw_sub_cache *p)
769 {
770     p->align_x = 1;
771     p->align_y = 1;
772 
773     p->w = p->params.w;
774     p->h = p->params.h;
775 
776     p->rgba_overlay = talloc_steal(p, mp_image_alloc(IMGFMT_BGRA, p->w, p->h));
777     if (!p->rgba_overlay)
778         return false;
779 
780     mp_image_params_guess_csp(&p->rgba_overlay->params);
781     p->rgba_overlay->params.alpha = MP_ALPHA_PREMUL;
782 
783     // Some non-sense with the intention to somewhat isolate the returned image.
784     mp_image_setfmt(&p->res_overlay, p->rgba_overlay->imgfmt);
785     mp_image_set_size(&p->res_overlay, p->rgba_overlay->w, p->rgba_overlay->h);
786     mp_image_copy_attributes(&p->res_overlay, p->rgba_overlay);
787     p->res_overlay.planes[0] = p->rgba_overlay->planes[0];
788     p->res_overlay.stride[0] = p->rgba_overlay->stride[0];
789 
790     init_general(p);
791 
792     // Mark all dirty (for full reinit of user state).
793     for (int y = 0; y < p->rgba_overlay->h; y++) {
794         for (int sx = 0; sx < p->s_w; sx++)
795             p->slices[y * p->s_w + sx] = (struct slice){0, SLICE_W};
796     }
797 
798     return true;
799 }
800 
check_reinit(struct mp_draw_sub_cache * p,struct mp_image_params * params,bool to_video)801 static bool check_reinit(struct mp_draw_sub_cache *p,
802                          struct mp_image_params *params, bool to_video)
803 {
804     if (!mp_image_params_equal(&p->params, params) || !p->rgba_overlay) {
805         talloc_free_children(p);
806         *p = (struct mp_draw_sub_cache){.global = p->global, .params = *params};
807         if (!(to_video ? reinit_to_video(p) : reinit_to_overlay(p))) {
808             talloc_free_children(p);
809             *p = (struct mp_draw_sub_cache){.global = p->global};
810             return false;
811         }
812     }
813     return true;
814 }
815 
mp_draw_sub_get_dbg_info(struct mp_draw_sub_cache * p)816 char *mp_draw_sub_get_dbg_info(struct mp_draw_sub_cache *p)
817 {
818     assert(p);
819 
820     return talloc_asprintf(NULL,
821         "align=%d:%d ov=%-7s, ov_f=%s, v_f=%s, a=%s, ca=%s, ca_f=%s",
822         p->align_x, p->align_y,
823         mp_imgfmt_to_name(p->video_overlay ? p->video_overlay->imgfmt : 0),
824         mp_imgfmt_to_name(p->overlay_tmp->imgfmt),
825         mp_imgfmt_to_name(p->video_tmp->imgfmt),
826         mp_imgfmt_to_name(p->alpha_overlay ? p->alpha_overlay->imgfmt : 0),
827         mp_imgfmt_to_name(p->calpha_overlay ? p->calpha_overlay->imgfmt : 0),
828         mp_imgfmt_to_name(p->calpha_tmp ? p->calpha_tmp->imgfmt : 0));
829 }
830 
mp_draw_sub_alloc(void * ta_parent,struct mpv_global * g)831 struct mp_draw_sub_cache *mp_draw_sub_alloc(void *ta_parent, struct mpv_global *g)
832 {
833     struct mp_draw_sub_cache *c = talloc_zero(ta_parent, struct mp_draw_sub_cache);
834     c->global = g;
835     return c;
836 }
837 
mp_draw_sub_bitmaps(struct mp_draw_sub_cache * p,struct mp_image * dst,struct sub_bitmap_list * sbs_list)838 bool mp_draw_sub_bitmaps(struct mp_draw_sub_cache *p, struct mp_image *dst,
839                          struct sub_bitmap_list *sbs_list)
840 {
841     bool ok = false;
842 
843     // dst must at least be as large as the bounding box, or you may get memory
844     // corruption.
845     assert(dst->w >= sbs_list->w);
846     assert(dst->h >= sbs_list->h);
847 
848     if (!check_reinit(p, &dst->params, true))
849         return false;
850 
851     if (p->change_id != sbs_list->change_id) {
852         p->change_id = sbs_list->change_id;
853 
854         clear_rgba_overlay(p);
855 
856         for (int n = 0; n < sbs_list->num_items; n++) {
857             if (!render_sb(p, sbs_list->items[n]))
858                 goto done;
859         }
860 
861         if (!convert_to_video_overlay(p))
862             goto done;
863     }
864 
865     if (p->any_osd) {
866         struct mp_image *target = dst;
867         if (p->premul_tmp) {
868             if (mp_sws_scale(p->premul, p->premul_tmp, dst) < 0)
869                 goto done;
870             target = p->premul_tmp;
871         }
872 
873         if (!blend_overlay_with_video(p, target))
874             goto done;
875 
876         if (target != dst) {
877             if (mp_sws_scale(p->unpremul, dst, p->premul_tmp) < 0)
878                 goto done;
879         }
880     }
881 
882     ok = true;
883 
884 done:
885     return ok;
886 }
887 
888 // Bounding boxes for mp_draw_sub_overlay() API. For simplicity, each rectangle
889 // covers a fixed tile on the screen, starts out empty, but is not extended
890 // beyond the tile. In the simplest case, there's only 1 rect/tile for everything.
891 struct rc_grid {
892     unsigned w, h;                  // size in grid tiles
893     unsigned r_w, r_h;              // size of a grid tile in pixels
894     struct mp_rect *rcs;            // rcs[x * w + y]
895 };
896 
init_rc_grid(struct rc_grid * gr,struct mp_draw_sub_cache * p,struct mp_rect * rcs,int max_rcs)897 static void init_rc_grid(struct rc_grid *gr, struct mp_draw_sub_cache *p,
898                          struct mp_rect *rcs, int max_rcs)
899 {
900     *gr = (struct rc_grid){ .w = max_rcs ? 1 : 0, .h = max_rcs ? 1 : 0,
901                             .rcs = rcs, .r_w = p->s_w * SLICE_W, .r_h = p->h, };
902 
903     // Dumb iteration to figure out max. size because I'm stupid.
904     bool more = true;
905     while (more) {
906         more = false;
907         if (gr->r_h >= 128) {
908             if (gr->w * gr->h * 2 > max_rcs)
909                 break;
910             gr->h *= 2;
911             gr->r_h = (p->h + gr->h - 1) / gr->h;
912             more = true;
913         }
914         if (gr->r_w >= SLICE_W * 2) {
915             if (gr->w * gr->h * 2 > max_rcs)
916                 break;
917             gr->w *= 2;
918             gr->r_w = (p->s_w + gr->w - 1) / gr->w * SLICE_W;
919             more = true;
920         }
921     }
922 
923     assert(gr->r_h * gr->h >= p->h);
924     assert(!(gr->r_w & (SLICE_W - 1)));
925     assert(gr->r_w * gr->w >= p->w);
926 
927     // Init with empty (degenerate) rectangles.
928     for (int y = 0; y < gr->h; y++) {
929         for (int x = 0; x < gr->w; x++) {
930             struct mp_rect *rc = &gr->rcs[y * gr->w + x];
931             rc->x1 = x * gr->r_w;
932             rc->y1 = y * gr->r_h;
933             rc->x0 = rc->x1 + gr->r_w;
934             rc->y0 = rc->y1 + gr->r_h;
935         }
936     }
937 }
938 
939 // Extend given grid with contents of p->slices.
mark_rcs(struct mp_draw_sub_cache * p,struct rc_grid * gr)940 static void mark_rcs(struct mp_draw_sub_cache *p, struct rc_grid *gr)
941 {
942     for (int y = 0; y < p->h; y++) {
943         struct slice *line = &p->slices[y * p->s_w];
944         struct mp_rect *rcs = &gr->rcs[y / gr->r_h * gr->w];
945 
946         for (int sx = 0; sx < p->s_w; sx++) {
947             struct slice *s = &line[sx];
948             if (s->x0 < s->x1) {
949                 unsigned xpos = sx * SLICE_W;
950                 struct mp_rect *rc = &rcs[xpos / gr->r_w];
951                 rc->y0 = MPMIN(rc->y0, y);
952                 rc->y1 = MPMAX(rc->y1, y + 1);
953                 rc->x0 = MPMIN(rc->x0, xpos + s->x0);
954                 rc->x1 = MPMAX(rc->x1, xpos + s->x1);
955             }
956         }
957     }
958 }
959 
960 // Remove empty RCs, and return rc count.
return_rcs(struct rc_grid * gr)961 static int return_rcs(struct rc_grid *gr)
962 {
963     int num = 0, cnt = gr->w * gr->h;
964     for (int n = 0; n < cnt; n++) {
965         struct mp_rect *rc = &gr->rcs[n];
966         if (rc->x0 < rc->x1 && rc->y0 < rc->y1)
967             gr->rcs[num++] = *rc;
968     }
969     return num;
970 }
971 
mp_draw_sub_overlay(struct mp_draw_sub_cache * p,struct sub_bitmap_list * sbs_list,struct mp_rect * act_rcs,int max_act_rcs,int * num_act_rcs,struct mp_rect * mod_rcs,int max_mod_rcs,int * num_mod_rcs)972 struct mp_image *mp_draw_sub_overlay(struct mp_draw_sub_cache *p,
973                                      struct sub_bitmap_list *sbs_list,
974                                      struct mp_rect *act_rcs,
975                                      int max_act_rcs,
976                                      int *num_act_rcs,
977                                      struct mp_rect *mod_rcs,
978                                      int max_mod_rcs,
979                                      int *num_mod_rcs)
980 {
981     *num_act_rcs = 0;
982     *num_mod_rcs = 0;
983 
984     struct mp_image_params params = {.w = sbs_list->w, .h = sbs_list->h};
985     if (!check_reinit(p, &params, false))
986         return NULL;
987 
988     struct rc_grid gr_act, gr_mod;
989     init_rc_grid(&gr_act, p, act_rcs, max_act_rcs);
990     init_rc_grid(&gr_mod, p, mod_rcs, max_mod_rcs);
991 
992     if (p->change_id != sbs_list->change_id) {
993         p->change_id = sbs_list->change_id;
994 
995         mark_rcs(p, &gr_mod);
996 
997         clear_rgba_overlay(p);
998 
999         for (int n = 0; n < sbs_list->num_items; n++) {
1000             if (!render_sb(p, sbs_list->items[n])) {
1001                 p->change_id = 0;
1002                 return NULL;
1003             }
1004         }
1005 
1006         mark_rcs(p, &gr_mod);
1007     }
1008 
1009     mark_rcs(p, &gr_act);
1010 
1011     *num_act_rcs = return_rcs(&gr_act);
1012     *num_mod_rcs = return_rcs(&gr_mod);
1013 
1014     return &p->res_overlay;
1015 }
1016 
1017 // vim: ts=4 sw=4 et tw=80
1018