1 /*
2 * Apple ProRes encoder
3 *
4 * Copyright (c) 2012 Konstantin Shishkov
5 *
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
28 #include "avcodec.h"
29 #include "fdctdsp.h"
30 #include "put_bits.h"
31 #include "profiles.h"
32 #include "bytestream.h"
33 #include "internal.h"
34 #include "proresdata.h"
35
36 #define CFACTOR_Y422 2
37 #define CFACTOR_Y444 3
38
39 #define MAX_MBS_PER_SLICE 8
40
41 #define MAX_PLANES 4
42
43 enum {
44 PRORES_PROFILE_AUTO = -1,
45 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_LT,
47 PRORES_PROFILE_STANDARD,
48 PRORES_PROFILE_HQ,
49 PRORES_PROFILE_4444,
50 PRORES_PROFILE_4444XQ,
51 };
52
53 enum {
54 QUANT_MAT_PROXY = 0,
55 QUANT_MAT_PROXY_CHROMA,
56 QUANT_MAT_LT,
57 QUANT_MAT_STANDARD,
58 QUANT_MAT_HQ,
59 QUANT_MAT_XQ_LUMA,
60 QUANT_MAT_DEFAULT,
61 };
62
63 static const uint8_t prores_quant_matrices[][64] = {
64 { // proxy
65 4, 7, 9, 11, 13, 14, 15, 63,
66 7, 7, 11, 12, 14, 15, 63, 63,
67 9, 11, 13, 14, 15, 63, 63, 63,
68 11, 11, 13, 14, 63, 63, 63, 63,
69 11, 13, 14, 63, 63, 63, 63, 63,
70 13, 14, 63, 63, 63, 63, 63, 63,
71 13, 63, 63, 63, 63, 63, 63, 63,
72 63, 63, 63, 63, 63, 63, 63, 63,
73 },
74 { // proxy chromas
75 4, 7, 9, 11, 13, 14, 63, 63,
76 7, 7, 11, 12, 14, 63, 63, 63,
77 9, 11, 13, 14, 63, 63, 63, 63,
78 11, 11, 13, 14, 63, 63, 63, 63,
79 11, 13, 14, 63, 63, 63, 63, 63,
80 13, 14, 63, 63, 63, 63, 63, 63,
81 13, 63, 63, 63, 63, 63, 63, 63,
82 63, 63, 63, 63, 63, 63, 63, 63
83 },
84 { // LT
85 4, 5, 6, 7, 9, 11, 13, 15,
86 5, 5, 7, 8, 11, 13, 15, 17,
87 6, 7, 9, 11, 13, 15, 15, 17,
88 7, 7, 9, 11, 13, 15, 17, 19,
89 7, 9, 11, 13, 14, 16, 19, 23,
90 9, 11, 13, 14, 16, 19, 23, 29,
91 9, 11, 13, 15, 17, 21, 28, 35,
92 11, 13, 16, 17, 21, 28, 35, 41,
93 },
94 { // standard
95 4, 4, 5, 5, 6, 7, 7, 9,
96 4, 4, 5, 6, 7, 7, 9, 9,
97 5, 5, 6, 7, 7, 9, 9, 10,
98 5, 5, 6, 7, 7, 9, 9, 10,
99 5, 6, 7, 7, 8, 9, 10, 12,
100 6, 7, 7, 8, 9, 10, 12, 15,
101 6, 7, 7, 9, 10, 11, 14, 17,
102 7, 7, 9, 10, 11, 14, 17, 21,
103 },
104 { // high quality
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 5,
109 4, 4, 4, 4, 4, 4, 5, 5,
110 4, 4, 4, 4, 4, 5, 5, 6,
111 4, 4, 4, 4, 5, 5, 6, 7,
112 4, 4, 4, 4, 5, 6, 7, 7,
113 },
114 { // XQ luma
115 2, 2, 2, 2, 2, 2, 2, 2,
116 2, 2, 2, 2, 2, 2, 2, 2,
117 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 3, 3,
120 2, 2, 2, 2, 2, 3, 3, 3,
121 2, 2, 2, 2, 3, 3, 3, 4,
122 2, 2, 2, 2, 3, 3, 4, 4,
123 },
124 { // codec default
125 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4,
133 },
134 };
135
136 #define NUM_MB_LIMITS 4
137 static const int prores_mb_limits[NUM_MB_LIMITS] = {
138 1620, // up to 720x576
139 2700, // up to 960x720
140 6075, // up to 1440x1080
141 9216, // up to 2048x1152
142 };
143
144 static const struct prores_profile {
145 const char *full_name;
146 uint32_t tag;
147 int min_quant;
148 int max_quant;
149 int br_tab[NUM_MB_LIMITS];
150 int quant;
151 int quant_chroma;
152 } prores_profile_info[6] = {
153 {
154 .full_name = "proxy",
155 .tag = MKTAG('a', 'p', 'c', 'o'),
156 .min_quant = 4,
157 .max_quant = 8,
158 .br_tab = { 300, 242, 220, 194 },
159 .quant = QUANT_MAT_PROXY,
160 .quant_chroma = QUANT_MAT_PROXY_CHROMA,
161 },
162 {
163 .full_name = "LT",
164 .tag = MKTAG('a', 'p', 'c', 's'),
165 .min_quant = 1,
166 .max_quant = 9,
167 .br_tab = { 720, 560, 490, 440 },
168 .quant = QUANT_MAT_LT,
169 .quant_chroma = QUANT_MAT_LT,
170 },
171 {
172 .full_name = "standard",
173 .tag = MKTAG('a', 'p', 'c', 'n'),
174 .min_quant = 1,
175 .max_quant = 6,
176 .br_tab = { 1050, 808, 710, 632 },
177 .quant = QUANT_MAT_STANDARD,
178 .quant_chroma = QUANT_MAT_STANDARD,
179 },
180 {
181 .full_name = "high quality",
182 .tag = MKTAG('a', 'p', 'c', 'h'),
183 .min_quant = 1,
184 .max_quant = 6,
185 .br_tab = { 1566, 1216, 1070, 950 },
186 .quant = QUANT_MAT_HQ,
187 .quant_chroma = QUANT_MAT_HQ,
188 },
189 {
190 .full_name = "4444",
191 .tag = MKTAG('a', 'p', '4', 'h'),
192 .min_quant = 1,
193 .max_quant = 6,
194 .br_tab = { 2350, 1828, 1600, 1425 },
195 .quant = QUANT_MAT_HQ,
196 .quant_chroma = QUANT_MAT_HQ,
197 },
198 {
199 .full_name = "4444XQ",
200 .tag = MKTAG('a', 'p', '4', 'x'),
201 .min_quant = 1,
202 .max_quant = 6,
203 .br_tab = { 3525, 2742, 2400, 2137 },
204 .quant = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
205 .quant_chroma = QUANT_MAT_HQ,
206 }
207 };
208
209 #define TRELLIS_WIDTH 16
210 #define SCORE_LIMIT INT_MAX / 2
211
212 struct TrellisNode {
213 int prev_node;
214 int quant;
215 int bits;
216 int score;
217 };
218
219 #define MAX_STORED_Q 16
220
221 typedef struct ProresThreadData {
222 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
223 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
224 int16_t custom_q[64];
225 int16_t custom_chroma_q[64];
226 struct TrellisNode *nodes;
227 } ProresThreadData;
228
229 typedef struct ProresContext {
230 AVClass *class;
231 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
232 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
233 int16_t quants[MAX_STORED_Q][64];
234 int16_t quants_chroma[MAX_STORED_Q][64];
235 int16_t custom_q[64];
236 int16_t custom_chroma_q[64];
237 const uint8_t *quant_mat;
238 const uint8_t *quant_chroma_mat;
239 const uint8_t *scantable;
240
241 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
242 ptrdiff_t linesize, int16_t *block);
243 FDCTDSPContext fdsp;
244
245 const AVFrame *pic;
246 int mb_width, mb_height;
247 int mbs_per_slice;
248 int num_chroma_blocks, chroma_factor;
249 int slices_width;
250 int slices_per_picture;
251 int pictures_per_frame; // 1 for progressive, 2 for interlaced
252 int cur_picture_idx;
253 int num_planes;
254 int bits_per_mb;
255 int force_quant;
256 int alpha_bits;
257 int warn;
258
259 char *vendor;
260 int quant_sel;
261
262 int frame_size_upper_bound;
263
264 int profile;
265 const struct prores_profile *profile_info;
266
267 int *slice_q;
268
269 ProresThreadData *tdata;
270 } ProresContext;
271
get_slice_data(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int x,int y,int w,int h,int16_t * blocks,uint16_t * emu_buf,int mbs_per_slice,int blocks_per_mb,int is_chroma)272 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
273 ptrdiff_t linesize, int x, int y, int w, int h,
274 int16_t *blocks, uint16_t *emu_buf,
275 int mbs_per_slice, int blocks_per_mb, int is_chroma)
276 {
277 const uint16_t *esrc;
278 const int mb_width = 4 * blocks_per_mb;
279 ptrdiff_t elinesize;
280 int i, j, k;
281
282 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
283 if (x >= w) {
284 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
285 * sizeof(*blocks));
286 return;
287 }
288 if (x + mb_width <= w && y + 16 <= h) {
289 esrc = src;
290 elinesize = linesize;
291 } else {
292 int bw, bh, pix;
293
294 esrc = emu_buf;
295 elinesize = 16 * sizeof(*emu_buf);
296
297 bw = FFMIN(w - x, mb_width);
298 bh = FFMIN(h - y, 16);
299
300 for (j = 0; j < bh; j++) {
301 memcpy(emu_buf + j * 16,
302 (const uint8_t*)src + j * linesize,
303 bw * sizeof(*src));
304 pix = emu_buf[j * 16 + bw - 1];
305 for (k = bw; k < mb_width; k++)
306 emu_buf[j * 16 + k] = pix;
307 }
308 for (; j < 16; j++)
309 memcpy(emu_buf + j * 16,
310 emu_buf + (bh - 1) * 16,
311 mb_width * sizeof(*emu_buf));
312 }
313 if (!is_chroma) {
314 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
315 blocks += 64;
316 if (blocks_per_mb > 2) {
317 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
318 blocks += 64;
319 }
320 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
321 blocks += 64;
322 if (blocks_per_mb > 2) {
323 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
324 blocks += 64;
325 }
326 } else {
327 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
328 blocks += 64;
329 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
330 blocks += 64;
331 if (blocks_per_mb > 2) {
332 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
333 blocks += 64;
334 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
335 blocks += 64;
336 }
337 }
338
339 x += mb_width;
340 }
341 }
342
get_alpha_data(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int x,int y,int w,int h,int16_t * blocks,int mbs_per_slice,int abits)343 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
344 ptrdiff_t linesize, int x, int y, int w, int h,
345 int16_t *blocks, int mbs_per_slice, int abits)
346 {
347 const int slice_width = 16 * mbs_per_slice;
348 int i, j, copy_w, copy_h;
349
350 copy_w = FFMIN(w - x, slice_width);
351 copy_h = FFMIN(h - y, 16);
352 for (i = 0; i < copy_h; i++) {
353 memcpy(blocks, src, copy_w * sizeof(*src));
354 if (abits == 8)
355 for (j = 0; j < copy_w; j++)
356 blocks[j] >>= 2;
357 else
358 for (j = 0; j < copy_w; j++)
359 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
360 for (j = copy_w; j < slice_width; j++)
361 blocks[j] = blocks[copy_w - 1];
362 blocks += slice_width;
363 src += linesize >> 1;
364 }
365 for (; i < 16; i++) {
366 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
367 blocks += slice_width;
368 }
369 }
370
371 /**
372 * Write an unsigned rice/exp golomb codeword.
373 */
encode_vlc_codeword(PutBitContext * pb,unsigned codebook,int val)374 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
375 {
376 unsigned int rice_order, exp_order, switch_bits, switch_val;
377 int exponent;
378
379 /* number of prefix bits to switch between Rice and expGolomb */
380 switch_bits = (codebook & 3) + 1;
381 rice_order = codebook >> 5; /* rice code order */
382 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
383
384 switch_val = switch_bits << rice_order;
385
386 if (val >= switch_val) {
387 val -= switch_val - (1 << exp_order);
388 exponent = av_log2(val);
389
390 put_bits(pb, exponent - exp_order + switch_bits, 0);
391 put_bits(pb, exponent + 1, val);
392 } else {
393 exponent = val >> rice_order;
394
395 if (exponent)
396 put_bits(pb, exponent, 0);
397 put_bits(pb, 1, 1);
398 if (rice_order)
399 put_sbits(pb, rice_order, val);
400 }
401 }
402
403 #define GET_SIGN(x) ((x) >> 31)
404 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
405
encode_dcs(PutBitContext * pb,int16_t * blocks,int blocks_per_slice,int scale)406 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
407 int blocks_per_slice, int scale)
408 {
409 int i;
410 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
411
412 prev_dc = (blocks[0] - 0x4000) / scale;
413 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
414 sign = 0;
415 codebook = 3;
416 blocks += 64;
417
418 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
419 dc = (blocks[0] - 0x4000) / scale;
420 delta = dc - prev_dc;
421 new_sign = GET_SIGN(delta);
422 delta = (delta ^ sign) - sign;
423 code = MAKE_CODE(delta);
424 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
425 codebook = (code + (code & 1)) >> 1;
426 codebook = FFMIN(codebook, 3);
427 sign = new_sign;
428 prev_dc = dc;
429 }
430 }
431
encode_acs(PutBitContext * pb,int16_t * blocks,int blocks_per_slice,int plane_size_factor,const uint8_t * scan,const int16_t * qmat)432 static void encode_acs(PutBitContext *pb, int16_t *blocks,
433 int blocks_per_slice,
434 int plane_size_factor,
435 const uint8_t *scan, const int16_t *qmat)
436 {
437 int idx, i;
438 int run, level, run_cb, lev_cb;
439 int max_coeffs, abs_level;
440
441 max_coeffs = blocks_per_slice << 6;
442 run_cb = ff_prores_run_to_cb_index[4];
443 lev_cb = ff_prores_lev_to_cb_index[2];
444 run = 0;
445
446 for (i = 1; i < 64; i++) {
447 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
448 level = blocks[idx] / qmat[scan[i]];
449 if (level) {
450 abs_level = FFABS(level);
451 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
452 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
453 abs_level - 1);
454 put_sbits(pb, 1, GET_SIGN(level));
455
456 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
457 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
458 run = 0;
459 } else {
460 run++;
461 }
462 }
463 }
464 }
465
encode_slice_plane(ProresContext * ctx,PutBitContext * pb,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int16_t * blocks,int blocks_per_mb,int plane_size_factor,const int16_t * qmat)466 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
467 const uint16_t *src, ptrdiff_t linesize,
468 int mbs_per_slice, int16_t *blocks,
469 int blocks_per_mb, int plane_size_factor,
470 const int16_t *qmat)
471 {
472 int blocks_per_slice, saved_pos;
473
474 saved_pos = put_bits_count(pb);
475 blocks_per_slice = mbs_per_slice * blocks_per_mb;
476
477 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
478 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
479 ctx->scantable, qmat);
480 flush_put_bits(pb);
481
482 return (put_bits_count(pb) - saved_pos) >> 3;
483 }
484
put_alpha_diff(PutBitContext * pb,int cur,int prev,int abits)485 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
486 {
487 const int dbits = (abits == 8) ? 4 : 7;
488 const int dsize = 1 << dbits - 1;
489 int diff = cur - prev;
490
491 diff = av_mod_uintp2(diff, abits);
492 if (diff >= (1 << abits) - dsize)
493 diff -= 1 << abits;
494 if (diff < -dsize || diff > dsize || !diff) {
495 put_bits(pb, 1, 1);
496 put_bits(pb, abits, diff);
497 } else {
498 put_bits(pb, 1, 0);
499 put_bits(pb, dbits - 1, FFABS(diff) - 1);
500 put_bits(pb, 1, diff < 0);
501 }
502 }
503
put_alpha_run(PutBitContext * pb,int run)504 static void put_alpha_run(PutBitContext *pb, int run)
505 {
506 if (run) {
507 put_bits(pb, 1, 0);
508 if (run < 0x10)
509 put_bits(pb, 4, run);
510 else
511 put_bits(pb, 15, run);
512 } else {
513 put_bits(pb, 1, 1);
514 }
515 }
516
517 // todo alpha quantisation for high quants
encode_alpha_plane(ProresContext * ctx,PutBitContext * pb,int mbs_per_slice,uint16_t * blocks,int quant)518 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
519 int mbs_per_slice, uint16_t *blocks,
520 int quant)
521 {
522 const int abits = ctx->alpha_bits;
523 const int mask = (1 << abits) - 1;
524 const int num_coeffs = mbs_per_slice * 256;
525 int saved_pos = put_bits_count(pb);
526 int prev = mask, cur;
527 int idx = 0;
528 int run = 0;
529
530 cur = blocks[idx++];
531 put_alpha_diff(pb, cur, prev, abits);
532 prev = cur;
533 do {
534 cur = blocks[idx++];
535 if (cur != prev) {
536 put_alpha_run (pb, run);
537 put_alpha_diff(pb, cur, prev, abits);
538 prev = cur;
539 run = 0;
540 } else {
541 run++;
542 }
543 } while (idx < num_coeffs);
544 if (run)
545 put_alpha_run(pb, run);
546 flush_put_bits(pb);
547 return (put_bits_count(pb) - saved_pos) >> 3;
548 }
549
encode_slice(AVCodecContext * avctx,const AVFrame * pic,PutBitContext * pb,int sizes[4],int x,int y,int quant,int mbs_per_slice)550 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
551 PutBitContext *pb,
552 int sizes[4], int x, int y, int quant,
553 int mbs_per_slice)
554 {
555 ProresContext *ctx = avctx->priv_data;
556 int i, xp, yp;
557 int total_size = 0;
558 const uint16_t *src;
559 int slice_width_factor = av_log2(mbs_per_slice);
560 int num_cblocks, pwidth, line_add;
561 ptrdiff_t linesize;
562 int plane_factor, is_chroma;
563 uint16_t *qmat;
564 uint16_t *qmat_chroma;
565
566 if (ctx->pictures_per_frame == 1)
567 line_add = 0;
568 else
569 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
570
571 if (ctx->force_quant) {
572 qmat = ctx->quants[0];
573 qmat_chroma = ctx->quants_chroma[0];
574 } else if (quant < MAX_STORED_Q) {
575 qmat = ctx->quants[quant];
576 qmat_chroma = ctx->quants_chroma[quant];
577 } else {
578 qmat = ctx->custom_q;
579 qmat_chroma = ctx->custom_chroma_q;
580 for (i = 0; i < 64; i++) {
581 qmat[i] = ctx->quant_mat[i] * quant;
582 qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
583 }
584 }
585
586 for (i = 0; i < ctx->num_planes; i++) {
587 is_chroma = (i == 1 || i == 2);
588 plane_factor = slice_width_factor + 2;
589 if (is_chroma)
590 plane_factor += ctx->chroma_factor - 3;
591 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
592 xp = x << 4;
593 yp = y << 4;
594 num_cblocks = 4;
595 pwidth = avctx->width;
596 } else {
597 xp = x << 3;
598 yp = y << 4;
599 num_cblocks = 2;
600 pwidth = avctx->width >> 1;
601 }
602
603 linesize = pic->linesize[i] * ctx->pictures_per_frame;
604 src = (const uint16_t*)(pic->data[i] + yp * linesize +
605 line_add * pic->linesize[i]) + xp;
606
607 if (i < 3) {
608 get_slice_data(ctx, src, linesize, xp, yp,
609 pwidth, avctx->height / ctx->pictures_per_frame,
610 ctx->blocks[0], ctx->emu_buf,
611 mbs_per_slice, num_cblocks, is_chroma);
612 if (!is_chroma) {/* luma quant */
613 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
614 mbs_per_slice, ctx->blocks[0],
615 num_cblocks, plane_factor,
616 qmat);
617 } else { /* chroma plane */
618 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
619 mbs_per_slice, ctx->blocks[0],
620 num_cblocks, plane_factor,
621 qmat_chroma);
622 }
623 } else {
624 get_alpha_data(ctx, src, linesize, xp, yp,
625 pwidth, avctx->height / ctx->pictures_per_frame,
626 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
627 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
628 ctx->blocks[0], quant);
629 }
630 total_size += sizes[i];
631 if (put_bits_left(pb) < 0) {
632 av_log(avctx, AV_LOG_ERROR,
633 "Underestimated required buffer size.\n");
634 return AVERROR_BUG;
635 }
636 }
637 return total_size;
638 }
639
estimate_vlc(unsigned codebook,int val)640 static inline int estimate_vlc(unsigned codebook, int val)
641 {
642 unsigned int rice_order, exp_order, switch_bits, switch_val;
643 int exponent;
644
645 /* number of prefix bits to switch between Rice and expGolomb */
646 switch_bits = (codebook & 3) + 1;
647 rice_order = codebook >> 5; /* rice code order */
648 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
649
650 switch_val = switch_bits << rice_order;
651
652 if (val >= switch_val) {
653 val -= switch_val - (1 << exp_order);
654 exponent = av_log2(val);
655
656 return exponent * 2 - exp_order + switch_bits + 1;
657 } else {
658 return (val >> rice_order) + rice_order + 1;
659 }
660 }
661
estimate_dcs(int * error,int16_t * blocks,int blocks_per_slice,int scale)662 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
663 int scale)
664 {
665 int i;
666 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
667 int bits;
668
669 prev_dc = (blocks[0] - 0x4000) / scale;
670 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
671 sign = 0;
672 codebook = 3;
673 blocks += 64;
674 *error += FFABS(blocks[0] - 0x4000) % scale;
675
676 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
677 dc = (blocks[0] - 0x4000) / scale;
678 *error += FFABS(blocks[0] - 0x4000) % scale;
679 delta = dc - prev_dc;
680 new_sign = GET_SIGN(delta);
681 delta = (delta ^ sign) - sign;
682 code = MAKE_CODE(delta);
683 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
684 codebook = (code + (code & 1)) >> 1;
685 codebook = FFMIN(codebook, 3);
686 sign = new_sign;
687 prev_dc = dc;
688 }
689
690 return bits;
691 }
692
estimate_acs(int * error,int16_t * blocks,int blocks_per_slice,int plane_size_factor,const uint8_t * scan,const int16_t * qmat)693 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
694 int plane_size_factor,
695 const uint8_t *scan, const int16_t *qmat)
696 {
697 int idx, i;
698 int run, level, run_cb, lev_cb;
699 int max_coeffs, abs_level;
700 int bits = 0;
701
702 max_coeffs = blocks_per_slice << 6;
703 run_cb = ff_prores_run_to_cb_index[4];
704 lev_cb = ff_prores_lev_to_cb_index[2];
705 run = 0;
706
707 for (i = 1; i < 64; i++) {
708 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
709 level = blocks[idx] / qmat[scan[i]];
710 *error += FFABS(blocks[idx]) % qmat[scan[i]];
711 if (level) {
712 abs_level = FFABS(level);
713 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
714 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
715 abs_level - 1) + 1;
716
717 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
718 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
719 run = 0;
720 } else {
721 run++;
722 }
723 }
724 }
725
726 return bits;
727 }
728
estimate_slice_plane(ProresContext * ctx,int * error,int plane,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int blocks_per_mb,int plane_size_factor,const int16_t * qmat,ProresThreadData * td)729 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
730 const uint16_t *src, ptrdiff_t linesize,
731 int mbs_per_slice,
732 int blocks_per_mb, int plane_size_factor,
733 const int16_t *qmat, ProresThreadData *td)
734 {
735 int blocks_per_slice;
736 int bits;
737
738 blocks_per_slice = mbs_per_slice * blocks_per_mb;
739
740 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
741 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
742 plane_size_factor, ctx->scantable, qmat);
743
744 return FFALIGN(bits, 8);
745 }
746
est_alpha_diff(int cur,int prev,int abits)747 static int est_alpha_diff(int cur, int prev, int abits)
748 {
749 const int dbits = (abits == 8) ? 4 : 7;
750 const int dsize = 1 << dbits - 1;
751 int diff = cur - prev;
752
753 diff = av_mod_uintp2(diff, abits);
754 if (diff >= (1 << abits) - dsize)
755 diff -= 1 << abits;
756 if (diff < -dsize || diff > dsize || !diff)
757 return abits + 1;
758 else
759 return dbits + 1;
760 }
761
estimate_alpha_plane(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int16_t * blocks)762 static int estimate_alpha_plane(ProresContext *ctx,
763 const uint16_t *src, ptrdiff_t linesize,
764 int mbs_per_slice, int16_t *blocks)
765 {
766 const int abits = ctx->alpha_bits;
767 const int mask = (1 << abits) - 1;
768 const int num_coeffs = mbs_per_slice * 256;
769 int prev = mask, cur;
770 int idx = 0;
771 int run = 0;
772 int bits;
773
774 cur = blocks[idx++];
775 bits = est_alpha_diff(cur, prev, abits);
776 prev = cur;
777 do {
778 cur = blocks[idx++];
779 if (cur != prev) {
780 if (!run)
781 bits++;
782 else if (run < 0x10)
783 bits += 4;
784 else
785 bits += 15;
786 bits += est_alpha_diff(cur, prev, abits);
787 prev = cur;
788 run = 0;
789 } else {
790 run++;
791 }
792 } while (idx < num_coeffs);
793
794 if (run) {
795 if (run < 0x10)
796 bits += 4;
797 else
798 bits += 15;
799 }
800
801 return bits;
802 }
803
find_slice_quant(AVCodecContext * avctx,int trellis_node,int x,int y,int mbs_per_slice,ProresThreadData * td)804 static int find_slice_quant(AVCodecContext *avctx,
805 int trellis_node, int x, int y, int mbs_per_slice,
806 ProresThreadData *td)
807 {
808 ProresContext *ctx = avctx->priv_data;
809 int i, q, pq, xp, yp;
810 const uint16_t *src;
811 int slice_width_factor = av_log2(mbs_per_slice);
812 int num_cblocks[MAX_PLANES], pwidth;
813 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
814 const int min_quant = ctx->profile_info->min_quant;
815 const int max_quant = ctx->profile_info->max_quant;
816 int error, bits, bits_limit;
817 int mbs, prev, cur, new_score;
818 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
819 int overquant;
820 uint16_t *qmat;
821 uint16_t *qmat_chroma;
822 int linesize[4], line_add;
823 int alpha_bits = 0;
824
825 if (ctx->pictures_per_frame == 1)
826 line_add = 0;
827 else
828 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
829 mbs = x + mbs_per_slice;
830
831 for (i = 0; i < ctx->num_planes; i++) {
832 is_chroma[i] = (i == 1 || i == 2);
833 plane_factor[i] = slice_width_factor + 2;
834 if (is_chroma[i])
835 plane_factor[i] += ctx->chroma_factor - 3;
836 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
837 xp = x << 4;
838 yp = y << 4;
839 num_cblocks[i] = 4;
840 pwidth = avctx->width;
841 } else {
842 xp = x << 3;
843 yp = y << 4;
844 num_cblocks[i] = 2;
845 pwidth = avctx->width >> 1;
846 }
847
848 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
849 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
850 line_add * ctx->pic->linesize[i]) + xp;
851
852 if (i < 3) {
853 get_slice_data(ctx, src, linesize[i], xp, yp,
854 pwidth, avctx->height / ctx->pictures_per_frame,
855 td->blocks[i], td->emu_buf,
856 mbs_per_slice, num_cblocks[i], is_chroma[i]);
857 } else {
858 get_alpha_data(ctx, src, linesize[i], xp, yp,
859 pwidth, avctx->height / ctx->pictures_per_frame,
860 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
861 }
862 }
863
864 for (q = min_quant; q < max_quant + 2; q++) {
865 td->nodes[trellis_node + q].prev_node = -1;
866 td->nodes[trellis_node + q].quant = q;
867 }
868
869 if (ctx->alpha_bits)
870 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
871 mbs_per_slice, td->blocks[3]);
872 // todo: maybe perform coarser quantising to fit into frame size when needed
873 for (q = min_quant; q <= max_quant; q++) {
874 bits = alpha_bits;
875 error = 0;
876 bits += estimate_slice_plane(ctx, &error, 0,
877 src, linesize[0],
878 mbs_per_slice,
879 num_cblocks[0], plane_factor[0],
880 ctx->quants[q], td); /* estimate luma plane */
881 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
882 bits += estimate_slice_plane(ctx, &error, i,
883 src, linesize[i],
884 mbs_per_slice,
885 num_cblocks[i], plane_factor[i],
886 ctx->quants_chroma[q], td);
887 }
888 if (bits > 65000 * 8)
889 error = SCORE_LIMIT;
890
891 slice_bits[q] = bits;
892 slice_score[q] = error;
893 }
894 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
895 slice_bits[max_quant + 1] = slice_bits[max_quant];
896 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
897 overquant = max_quant;
898 } else {
899 for (q = max_quant + 1; q < 128; q++) {
900 bits = alpha_bits;
901 error = 0;
902 if (q < MAX_STORED_Q) {
903 qmat = ctx->quants[q];
904 qmat_chroma = ctx->quants_chroma[q];
905 } else {
906 qmat = td->custom_q;
907 qmat_chroma = td->custom_chroma_q;
908 for (i = 0; i < 64; i++) {
909 qmat[i] = ctx->quant_mat[i] * q;
910 qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
911 }
912 }
913 bits += estimate_slice_plane(ctx, &error, 0,
914 src, linesize[0],
915 mbs_per_slice,
916 num_cblocks[0], plane_factor[0],
917 qmat, td);/* estimate luma plane */
918 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
919 bits += estimate_slice_plane(ctx, &error, i,
920 src, linesize[i],
921 mbs_per_slice,
922 num_cblocks[i], plane_factor[i],
923 qmat_chroma, td);
924 }
925 if (bits <= ctx->bits_per_mb * mbs_per_slice)
926 break;
927 }
928
929 slice_bits[max_quant + 1] = bits;
930 slice_score[max_quant + 1] = error;
931 overquant = q;
932 }
933 td->nodes[trellis_node + max_quant + 1].quant = overquant;
934
935 bits_limit = mbs * ctx->bits_per_mb;
936 for (pq = min_quant; pq < max_quant + 2; pq++) {
937 prev = trellis_node - TRELLIS_WIDTH + pq;
938
939 for (q = min_quant; q < max_quant + 2; q++) {
940 cur = trellis_node + q;
941
942 bits = td->nodes[prev].bits + slice_bits[q];
943 error = slice_score[q];
944 if (bits > bits_limit)
945 error = SCORE_LIMIT;
946
947 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
948 new_score = td->nodes[prev].score + error;
949 else
950 new_score = SCORE_LIMIT;
951 if (td->nodes[cur].prev_node == -1 ||
952 td->nodes[cur].score >= new_score) {
953
954 td->nodes[cur].bits = bits;
955 td->nodes[cur].score = new_score;
956 td->nodes[cur].prev_node = prev;
957 }
958 }
959 }
960
961 error = td->nodes[trellis_node + min_quant].score;
962 pq = trellis_node + min_quant;
963 for (q = min_quant + 1; q < max_quant + 2; q++) {
964 if (td->nodes[trellis_node + q].score <= error) {
965 error = td->nodes[trellis_node + q].score;
966 pq = trellis_node + q;
967 }
968 }
969
970 return pq;
971 }
972
find_quant_thread(AVCodecContext * avctx,void * arg,int jobnr,int threadnr)973 static int find_quant_thread(AVCodecContext *avctx, void *arg,
974 int jobnr, int threadnr)
975 {
976 ProresContext *ctx = avctx->priv_data;
977 ProresThreadData *td = ctx->tdata + threadnr;
978 int mbs_per_slice = ctx->mbs_per_slice;
979 int x, y = jobnr, mb, q = 0;
980
981 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
982 while (ctx->mb_width - x < mbs_per_slice)
983 mbs_per_slice >>= 1;
984 q = find_slice_quant(avctx,
985 (mb + 1) * TRELLIS_WIDTH, x, y,
986 mbs_per_slice, td);
987 }
988
989 for (x = ctx->slices_width - 1; x >= 0; x--) {
990 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
991 q = td->nodes[q].prev_node;
992 }
993
994 return 0;
995 }
996
encode_frame(AVCodecContext * avctx,AVPacket * pkt,const AVFrame * pic,int * got_packet)997 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
998 const AVFrame *pic, int *got_packet)
999 {
1000 ProresContext *ctx = avctx->priv_data;
1001 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
1002 uint8_t *picture_size_pos;
1003 PutBitContext pb;
1004 int x, y, i, mb, q = 0;
1005 int sizes[4] = { 0 };
1006 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
1007 int frame_size, picture_size, slice_size;
1008 int pkt_size, ret;
1009 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1010 uint8_t frame_flags;
1011
1012 ctx->pic = pic;
1013 pkt_size = ctx->frame_size_upper_bound;
1014
1015 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1016 return ret;
1017
1018 orig_buf = pkt->data;
1019
1020 // frame atom
1021 orig_buf += 4; // frame size
1022 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
1023 buf = orig_buf;
1024
1025 // frame header
1026 tmp = buf;
1027 buf += 2; // frame header size will be stored here
1028 bytestream_put_be16 (&buf, 0); // version 1
1029 bytestream_put_buffer(&buf, ctx->vendor, 4);
1030 bytestream_put_be16 (&buf, avctx->width);
1031 bytestream_put_be16 (&buf, avctx->height);
1032
1033 frame_flags = ctx->chroma_factor << 6;
1034 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1035 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1036 bytestream_put_byte (&buf, frame_flags);
1037
1038 bytestream_put_byte (&buf, 0); // reserved
1039 bytestream_put_byte (&buf, pic->color_primaries);
1040 bytestream_put_byte (&buf, pic->color_trc);
1041 bytestream_put_byte (&buf, pic->colorspace);
1042 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
1043 bytestream_put_byte (&buf, 0); // reserved
1044 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1045 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
1046 // luma quantisation matrix
1047 for (i = 0; i < 64; i++)
1048 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1049 // chroma quantisation matrix
1050 for (i = 0; i < 64; i++)
1051 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1052 } else {
1053 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
1054 }
1055 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
1056
1057 for (ctx->cur_picture_idx = 0;
1058 ctx->cur_picture_idx < ctx->pictures_per_frame;
1059 ctx->cur_picture_idx++) {
1060 // picture header
1061 picture_size_pos = buf + 1;
1062 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1063 buf += 4; // picture data size will be stored here
1064 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1065 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1066
1067 // seek table - will be filled during slice encoding
1068 slice_sizes = buf;
1069 buf += ctx->slices_per_picture * 2;
1070
1071 // slices
1072 if (!ctx->force_quant) {
1073 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1074 ctx->mb_height);
1075 if (ret)
1076 return ret;
1077 }
1078
1079 for (y = 0; y < ctx->mb_height; y++) {
1080 int mbs_per_slice = ctx->mbs_per_slice;
1081 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1082 q = ctx->force_quant ? ctx->force_quant
1083 : ctx->slice_q[mb + y * ctx->slices_width];
1084
1085 while (ctx->mb_width - x < mbs_per_slice)
1086 mbs_per_slice >>= 1;
1087
1088 bytestream_put_byte(&buf, slice_hdr_size << 3);
1089 slice_hdr = buf;
1090 buf += slice_hdr_size - 1;
1091 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1092 uint8_t *start = pkt->data;
1093 // Recompute new size according to max_slice_size
1094 // and deduce delta
1095 int delta = 200 + (ctx->pictures_per_frame *
1096 ctx->slices_per_picture + 1) *
1097 max_slice_size - pkt_size;
1098
1099 delta = FFMAX(delta, 2 * max_slice_size);
1100 ctx->frame_size_upper_bound += delta;
1101
1102 if (!ctx->warn) {
1103 avpriv_request_sample(avctx,
1104 "Packet too small: is %i,"
1105 " needs %i (slice: %i). "
1106 "Correct allocation",
1107 pkt_size, delta, max_slice_size);
1108 ctx->warn = 1;
1109 }
1110
1111 ret = av_grow_packet(pkt, delta);
1112 if (ret < 0)
1113 return ret;
1114
1115 pkt_size += delta;
1116 // restore pointers
1117 orig_buf = pkt->data + (orig_buf - start);
1118 buf = pkt->data + (buf - start);
1119 picture_size_pos = pkt->data + (picture_size_pos - start);
1120 slice_sizes = pkt->data + (slice_sizes - start);
1121 slice_hdr = pkt->data + (slice_hdr - start);
1122 tmp = pkt->data + (tmp - start);
1123 }
1124 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1125 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1126 mbs_per_slice);
1127 if (ret < 0)
1128 return ret;
1129
1130 bytestream_put_byte(&slice_hdr, q);
1131 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1132 for (i = 0; i < ctx->num_planes - 1; i++) {
1133 bytestream_put_be16(&slice_hdr, sizes[i]);
1134 slice_size += sizes[i];
1135 }
1136 bytestream_put_be16(&slice_sizes, slice_size);
1137 buf += slice_size - slice_hdr_size;
1138 if (max_slice_size < slice_size)
1139 max_slice_size = slice_size;
1140 }
1141 }
1142
1143 picture_size = buf - (picture_size_pos - 1);
1144 bytestream_put_be32(&picture_size_pos, picture_size);
1145 }
1146
1147 orig_buf -= 8;
1148 frame_size = buf - orig_buf;
1149 bytestream_put_be32(&orig_buf, frame_size);
1150
1151 pkt->size = frame_size;
1152 pkt->flags |= AV_PKT_FLAG_KEY;
1153 *got_packet = 1;
1154
1155 return 0;
1156 }
1157
encode_close(AVCodecContext * avctx)1158 static av_cold int encode_close(AVCodecContext *avctx)
1159 {
1160 ProresContext *ctx = avctx->priv_data;
1161 int i;
1162
1163 if (ctx->tdata) {
1164 for (i = 0; i < avctx->thread_count; i++)
1165 av_freep(&ctx->tdata[i].nodes);
1166 }
1167 av_freep(&ctx->tdata);
1168 av_freep(&ctx->slice_q);
1169
1170 return 0;
1171 }
1172
prores_fdct(FDCTDSPContext * fdsp,const uint16_t * src,ptrdiff_t linesize,int16_t * block)1173 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1174 ptrdiff_t linesize, int16_t *block)
1175 {
1176 int x, y;
1177 const uint16_t *tsrc = src;
1178
1179 for (y = 0; y < 8; y++) {
1180 for (x = 0; x < 8; x++)
1181 block[y * 8 + x] = tsrc[x];
1182 tsrc += linesize >> 1;
1183 }
1184 fdsp->fdct(block);
1185 }
1186
encode_init(AVCodecContext * avctx)1187 static av_cold int encode_init(AVCodecContext *avctx)
1188 {
1189 ProresContext *ctx = avctx->priv_data;
1190 int mps;
1191 int i, j;
1192 int min_quant, max_quant;
1193 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1194
1195 avctx->bits_per_raw_sample = 10;
1196 #if FF_API_CODED_FRAME
1197 FF_DISABLE_DEPRECATION_WARNINGS
1198 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1199 avctx->coded_frame->key_frame = 1;
1200 FF_ENABLE_DEPRECATION_WARNINGS
1201 #endif
1202
1203 ctx->fdct = prores_fdct;
1204 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1205 : ff_prores_progressive_scan;
1206 ff_fdctdsp_init(&ctx->fdsp, avctx);
1207
1208 mps = ctx->mbs_per_slice;
1209 if (mps & (mps - 1)) {
1210 av_log(avctx, AV_LOG_ERROR,
1211 "there should be an integer power of two MBs per slice\n");
1212 return AVERROR(EINVAL);
1213 }
1214 if (ctx->profile == PRORES_PROFILE_AUTO) {
1215 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1216 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1217 !(desc->log2_chroma_w + desc->log2_chroma_h))
1218 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1219 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1220 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1221 ? "4:4:4:4 profile because of the used input colorspace"
1222 : "HQ profile to keep best quality");
1223 }
1224 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1225 if (ctx->profile != PRORES_PROFILE_4444 &&
1226 ctx->profile != PRORES_PROFILE_4444XQ) {
1227 // force alpha and warn
1228 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1229 "encode alpha. Override with -profile if needed.\n");
1230 ctx->alpha_bits = 0;
1231 }
1232 if (ctx->alpha_bits & 7) {
1233 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1234 return AVERROR(EINVAL);
1235 }
1236 avctx->bits_per_coded_sample = 32;
1237 } else {
1238 ctx->alpha_bits = 0;
1239 }
1240
1241 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1242 ? CFACTOR_Y422
1243 : CFACTOR_Y444;
1244 ctx->profile_info = prores_profile_info + ctx->profile;
1245 ctx->num_planes = 3 + !!ctx->alpha_bits;
1246
1247 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1248
1249 if (interlaced)
1250 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1251 else
1252 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1253
1254 ctx->slices_width = ctx->mb_width / mps;
1255 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1256 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1257 ctx->pictures_per_frame = 1 + interlaced;
1258
1259 if (ctx->quant_sel == -1) {
1260 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1261 ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1262 } else {
1263 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1264 ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1265 }
1266
1267 if (strlen(ctx->vendor) != 4) {
1268 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1269 return AVERROR_INVALIDDATA;
1270 }
1271
1272 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1273 if (!ctx->force_quant) {
1274 if (!ctx->bits_per_mb) {
1275 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1276 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1277 ctx->pictures_per_frame)
1278 break;
1279 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1280 if (ctx->alpha_bits)
1281 ctx->bits_per_mb *= 20;
1282 } else if (ctx->bits_per_mb < 128) {
1283 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1284 return AVERROR_INVALIDDATA;
1285 }
1286
1287 min_quant = ctx->profile_info->min_quant;
1288 max_quant = ctx->profile_info->max_quant;
1289 for (i = min_quant; i < MAX_STORED_Q; i++) {
1290 for (j = 0; j < 64; j++) {
1291 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1292 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1293 }
1294 }
1295
1296 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1297 if (!ctx->slice_q) {
1298 encode_close(avctx);
1299 return AVERROR(ENOMEM);
1300 }
1301
1302 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1303 if (!ctx->tdata) {
1304 encode_close(avctx);
1305 return AVERROR(ENOMEM);
1306 }
1307
1308 for (j = 0; j < avctx->thread_count; j++) {
1309 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1310 * TRELLIS_WIDTH
1311 * sizeof(*ctx->tdata->nodes));
1312 if (!ctx->tdata[j].nodes) {
1313 encode_close(avctx);
1314 return AVERROR(ENOMEM);
1315 }
1316 for (i = min_quant; i < max_quant + 2; i++) {
1317 ctx->tdata[j].nodes[i].prev_node = -1;
1318 ctx->tdata[j].nodes[i].bits = 0;
1319 ctx->tdata[j].nodes[i].score = 0;
1320 }
1321 }
1322 } else {
1323 int ls = 0;
1324 int ls_chroma = 0;
1325
1326 if (ctx->force_quant > 64) {
1327 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1328 return AVERROR_INVALIDDATA;
1329 }
1330
1331 for (j = 0; j < 64; j++) {
1332 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1333 ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1334 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1335 ls_chroma += av_log2((1 << 11) / ctx->quants_chroma[0][j]) * 2 + 1;
1336 }
1337
1338 ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1339 if (ctx->chroma_factor == CFACTOR_Y444)
1340 ctx->bits_per_mb += ls_chroma * 4;
1341 }
1342
1343 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1344 ctx->slices_per_picture + 1) *
1345 (2 + 2 * ctx->num_planes +
1346 (mps * ctx->bits_per_mb) / 8)
1347 + 200;
1348
1349 if (ctx->alpha_bits) {
1350 // The alpha plane is run-coded and might exceed the bit budget.
1351 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1352 ctx->slices_per_picture + 1) *
1353 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1354 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1355 }
1356
1357 avctx->codec_tag = ctx->profile_info->tag;
1358
1359 av_log(avctx, AV_LOG_DEBUG,
1360 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1361 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1362 interlaced ? "yes" : "no", ctx->bits_per_mb);
1363 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1364 ctx->frame_size_upper_bound);
1365
1366 return 0;
1367 }
1368
1369 #define OFFSET(x) offsetof(ProresContext, x)
1370 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1371
1372 static const AVOption options[] = {
1373 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1374 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1375 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1376 { .i64 = PRORES_PROFILE_AUTO },
1377 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1378 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1379 0, 0, VE, "profile" },
1380 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1381 0, 0, VE, "profile" },
1382 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1383 0, 0, VE, "profile" },
1384 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1385 0, 0, VE, "profile" },
1386 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1387 0, 0, VE, "profile" },
1388 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1389 0, 0, VE, "profile" },
1390 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1391 0, 0, VE, "profile" },
1392 { "vendor", "vendor ID", OFFSET(vendor),
1393 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1394 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1395 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1396 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1397 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1398 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1399 0, 0, VE, "quant_mat" },
1400 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1401 0, 0, VE, "quant_mat" },
1402 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1403 0, 0, VE, "quant_mat" },
1404 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1405 0, 0, VE, "quant_mat" },
1406 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1407 0, 0, VE, "quant_mat" },
1408 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1409 0, 0, VE, "quant_mat" },
1410 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1411 { .i64 = 16 }, 0, 16, VE },
1412 { NULL }
1413 };
1414
1415 static const AVClass proresenc_class = {
1416 .class_name = "ProRes encoder",
1417 .item_name = av_default_item_name,
1418 .option = options,
1419 .version = LIBAVUTIL_VERSION_INT,
1420 };
1421
1422 AVCodec ff_prores_ks_encoder = {
1423 .name = "prores_ks",
1424 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1425 .type = AVMEDIA_TYPE_VIDEO,
1426 .id = AV_CODEC_ID_PRORES,
1427 .priv_data_size = sizeof(ProresContext),
1428 .init = encode_init,
1429 .close = encode_close,
1430 .encode2 = encode_frame,
1431 .capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
1432 .pix_fmts = (const enum AVPixelFormat[]) {
1433 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1434 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1435 },
1436 .priv_class = &proresenc_class,
1437 .profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1438 };
1439