1 /*
2 * GPAC - Multimedia Framework C SDK
3 *
4 * Authors: Jean Le Feuvre
5 * Copyright (c) Telecom ParisTech 2018
6 * All rights reserved
7 *
8 * This file is part of GPAC / ffmpeg encode filter
9 *
10 * GPAC is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * GPAC is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; see the file COPYING. If not, write to
22 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 */
25
26 #include <gpac/setup.h>
27 #include <gpac/bitstream.h>
28 #include <gpac/avparse.h>
29
30 #ifdef GPAC_HAS_FFMPEG
31
32 #include "ff_common.h"
33
34 #define ENC_BUF_ALLOC_SAFE 10000
35
36 typedef struct _gf_ffenc_ctx
37 {
38 //opts
39 Bool all_intra;
40 char *c, *ffc;
41 Bool ls;
42 u32 pfmt;
43 GF_Fraction fintra;
44
45 //internal data
46 Bool initialized;
47
48 u32 gop_size;
49 u32 target_rate;
50
51 AVCodecContext *encoder;
52 //decode options
53 AVDictionary *options;
54
55 GF_FilterPid *in_pid, *out_pid;
56 //media type
57 u32 type;
58 u32 timescale;
59
60 u32 nb_frames_out, nb_frames_in;
61 u64 time_spent;
62
63 Bool low_delay;
64
65 GF_Err (*process)(GF_Filter *filter, struct _gf_ffenc_ctx *ctx);
66 //gpac one
67 u32 codecid;
68 //done flushing encoder (eg sent NULL frames)
69 u32 flush_done;
70 //frame used by both video and audio encoder
71 AVFrame *frame;
72
73 //encoding buffer - we allocate ENC_BUF_ALLOC_SAFE+WxH for the video (some image codecs in ffmpeg require more than WxH for headers), ENC_BUF_ALLOC_SAFE+nb_ch*samplerate for the audio
74 //this should be enough to hold any lossless compression formats
75 char *enc_buffer;
76 u32 enc_buffer_size;
77
78 Bool init_cts_setup;
79
80 //video state
81 u32 width, height, stride, stride_uv, nb_planes, uv_height;
82 //ffmpeg one
83 enum AVPixelFormat pixel_fmt;
84 u64 cts_first_frame_plus_one;
85
86 //audio state
87 u32 channels, sample_rate, channel_layout, bytes_per_sample;
88 //ffmpeg one
89 u32 sample_fmt;
90 //we store input audio frame in this buffer untill we have enough data for one encoder frame
91 //we also store the remaining of a consumed frame here, so that input packet is realeased ASAP
92 char *audio_buffer;
93 u32 audio_buffer_size;
94 u32 samples_in_audio_buffer;
95 //cts of first byte in frame
96 u64 first_byte_cts;
97 Bool planar_audio;
98
99 //shift of TS - ffmpeg may give pkt-> PTS < frame->PTS to indicate discard samples
100 //we convert back to frame PTS but signal discard samples at the PID level
101 s32 ts_shift;
102
103 GF_List *src_packets;
104
105 GF_BitStream *sdbs;
106
107 Bool reconfig_pending;
108 Bool infmt_negociate;
109 Bool remap_ts;
110 Bool force_reconfig;
111
112 u32 dsi_crc;
113
114 u32 gpac_pixel_fmt;
115 u32 gpac_audio_fmt;
116
117 Bool fintra_setup;
118 u64 orig_ts;
119 u32 nb_forced;
120
121 } GF_FFEncodeCtx;
122
123 static GF_Err ffenc_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove);
124
ffenc_initialize(GF_Filter * filter)125 static GF_Err ffenc_initialize(GF_Filter *filter)
126 {
127 GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
128 ctx->initialized = GF_TRUE;
129 ctx->src_packets = gf_list_new();
130 ctx->sdbs = gf_bs_new((u8*)ctx, 1, GF_BITSTREAM_READ);
131
132 ffmpeg_setup_logs(GF_LOG_CODEC);
133 return GF_OK;
134 }
135
ffenc_finalize(GF_Filter * filter)136 static void ffenc_finalize(GF_Filter *filter)
137 {
138 GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
139 if (ctx->options) av_dict_free(&ctx->options);
140 if (ctx->frame) av_frame_free(&ctx->frame);
141 if (ctx->enc_buffer) gf_free(ctx->enc_buffer);
142 if (ctx->audio_buffer) gf_free(ctx->audio_buffer);
143
144 while (gf_list_count(ctx->src_packets)) {
145 GF_FilterPacket *pck = gf_list_pop_back(ctx->src_packets);
146 gf_filter_pck_unref(pck);
147 }
148 gf_list_del(ctx->src_packets);
149
150 if (ctx->encoder) {
151 avcodec_close(ctx->encoder);
152 }
153 if (ctx->sdbs) gf_bs_del(ctx->sdbs);
154 return;
155 }
156
157 //TODO add more feedback
ffenc_log_video(GF_Filter * filter,struct _gf_ffenc_ctx * ctx,AVPacket * pkt,Bool do_reporting)158 static void ffenc_log_video(GF_Filter *filter, struct _gf_ffenc_ctx *ctx, AVPacket *pkt, Bool do_reporting)
159 {
160 Double fps=0;
161 s32 q=-1;
162 u8 pictype=0;
163 #if LIBAVCODEC_VERSION_MAJOR >= 58
164 u64 errors[10];
165 u32 i;
166 u8 nb_errors = 0;
167 #endif
168 const char *ptype = "U";
169
170 if (!ctx->ls && !do_reporting) return;
171
172
173 #if LIBAVCODEC_VERSION_MAJOR >= 58
174 u32 sq_size;
175 u8 *side_q = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &sq_size);
176 if (side_q) {
177 gf_bs_reassign_buffer(ctx->sdbs, side_q, sq_size);
178 q = gf_bs_read_u32_le(ctx->sdbs);
179 pictype = gf_bs_read_u8(ctx->sdbs);
180 nb_errors = gf_bs_read_u8(ctx->sdbs);
181 /*res*/gf_bs_read_u16(ctx->sdbs);
182 if (nb_errors>10) nb_errors = 10;
183 for (i=0; i<nb_errors; i++) {
184 errors[i] = gf_bs_read_u64_le(ctx->sdbs);
185 }
186 }
187 #endif
188 if (ctx->time_spent) {
189 fps = ctx->nb_frames_out;
190 fps *= 1000000;
191 fps /= ctx->time_spent;
192 }
193 switch (pictype) {
194 case AV_PICTURE_TYPE_I: ptype = "I"; break;
195 case AV_PICTURE_TYPE_P: ptype = "P"; break;
196 case AV_PICTURE_TYPE_S: ptype = "S"; break;
197 case AV_PICTURE_TYPE_SP: ptype = "SP"; break;
198 case AV_PICTURE_TYPE_B: ptype = "B"; break;
199 case AV_PICTURE_TYPE_BI: ptype = "B"; break;
200 }
201
202 if (ctx->ls) {
203 fprintf(stderr, "[FFEnc] FPS %.02f F %d DTS "LLD" CTS "LLD" Q %02.02f PT %s (F_in %d)", fps, ctx->nb_frames_out, pkt->dts+ctx->ts_shift, pkt->pts+ctx->ts_shift, ((Double)q) / FF_QP2LAMBDA, ptype, ctx->nb_frames_in);
204 #if LIBAVCODEC_VERSION_MAJOR >= 58
205 if (nb_errors) {
206 fprintf(stderr, "PSNR");
207 for (i=0; i<nb_errors; i++) {
208 Double psnr = (Double) errors[i];
209 psnr /= ctx->width * ctx->height * 255.0 * 255.0;
210 fprintf(stderr, " %02.02f", psnr);
211 }
212 }
213 #endif
214 fprintf(stderr, "\r");
215 }
216
217 if (do_reporting) {
218 char szStatus[1024];
219 sprintf(szStatus, "[FFEnc] FPS %.02f F %d DTS "LLD" CTS "LLD" Q %02.02f PT %s (F_in %d)", fps, ctx->nb_frames_out, pkt->dts+ctx->ts_shift, pkt->pts+ctx->ts_shift, ((Double)q) / FF_QP2LAMBDA, ptype, ctx->nb_frames_in);
220 gf_filter_update_status(filter, -1, szStatus);
221 }
222 }
223
ffenc_process_video(GF_Filter * filter,struct _gf_ffenc_ctx * ctx)224 static GF_Err ffenc_process_video(GF_Filter *filter, struct _gf_ffenc_ctx *ctx)
225 {
226 AVPacket pkt;
227 s32 gotpck;
228 const char *data = NULL;
229 u32 size=0, i, count, offset, to_copy;
230 s32 res;
231 u64 now;
232 u8 *output;
233 u32 force_intra = 0;
234 Bool insert_jp2c = GF_FALSE;
235 GF_FilterPacket *dst_pck, *src_pck;
236 GF_FilterPacket *pck;
237
238 if (!ctx->in_pid) return GF_EOS;
239
240 pck = gf_filter_pid_get_packet(ctx->in_pid);
241
242 if (!ctx->encoder) {
243 if (ctx->infmt_negociate) return GF_OK;
244
245 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] encoder reconfiguration failed, aborting stream\n"));
246 gf_filter_pid_set_eos(ctx->out_pid);
247 return GF_EOS;
248 }
249
250 if (!pck) {
251 if (! gf_filter_pid_is_eos(ctx->in_pid)) return GF_OK;
252 if (ctx->flush_done) return GF_OK;
253 }
254
255 if (ctx->reconfig_pending) pck = NULL;
256
257 if (pck) data = gf_filter_pck_get_data(pck, &size);
258
259 av_init_packet(&pkt);
260 pkt.data = (uint8_t*)ctx->enc_buffer;
261 pkt.size = ctx->enc_buffer_size;
262
263 ctx->frame->pict_type = 0;
264 ctx->frame->width = ctx->width;
265 ctx->frame->height = ctx->height;
266 ctx->frame->format = ctx->pixel_fmt;
267
268 ctx->frame->pict_type = AV_PICTURE_TYPE_NONE;
269
270 //force picture type
271 if (ctx->all_intra)
272 ctx->frame->pict_type = AV_PICTURE_TYPE_I;
273
274 //if PCK_FILENUM is set on input, this is a file boundary, force IDR sync
275 if (pck && gf_filter_pck_get_property(pck, GF_PROP_PCK_FILENUM)) {
276 force_intra = 2;
277 }
278 //check if we need to force a closed gop
279 if (pck && (ctx->fintra.den && ctx->fintra.num) && !ctx->force_reconfig) {
280 u64 cts = gf_filter_pck_get_cts(pck);
281 if (!ctx->fintra_setup) {
282 ctx->fintra_setup = GF_TRUE;
283 ctx->orig_ts = cts;
284 force_intra = 1;
285 ctx->nb_forced=1;
286 } else if (cts < ctx->orig_ts) {
287 GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] timestamps not increasing monotonuously, reseting forced intra state !\n"));
288 ctx->orig_ts = cts;
289 force_intra = 1;
290 ctx->nb_forced=1;
291 } else {
292 u64 ts_diff = cts - ctx->orig_ts;
293 if (ts_diff * ctx->fintra.den >= ctx->nb_forced * ctx->fintra.num * ctx->timescale) {
294 force_intra = 1;
295 ctx->nb_forced++;
296 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[FFEnc] Forcing IDR at frame %d (CTS %d / %d)\n", ctx->nb_frames_in, cts, ctx->timescale));
297 }
298 }
299 }
300 if (force_intra) {
301 //file switch we force a full reset to force injecting xPS in the stream
302 //we could also inject them manually but we don't have them !!
303 if (force_intra==2) {
304 if (!ctx->force_reconfig) {
305 ctx->reconfig_pending = GF_TRUE;
306 ctx->force_reconfig = GF_TRUE;
307 pck = NULL;
308 } else {
309 ctx->force_reconfig = GF_FALSE;
310 }
311 }
312 ctx->frame->pict_type = AV_PICTURE_TYPE_I;
313 }
314
315
316 now = gf_sys_clock_high_res();
317 gotpck = 0;
318 if (pck) {
319 u32 ilaced;
320 if (data) {
321 ctx->frame->data[0] = (u8 *) data;
322 ctx->frame->linesize[0] = ctx->stride;
323 if (ctx->nb_planes>1) {
324 ctx->frame->data[1] = (u8 *) data + ctx->stride * ctx->height;
325 ctx->frame->linesize[1] = ctx->stride_uv ? ctx->stride_uv : ctx->stride/2;
326 if (ctx->nb_planes>2) {
327 ctx->frame->data[2] = (u8 *) ctx->frame->data[1] + ctx->stride_uv * ctx->height/2;
328 ctx->frame->linesize[2] = ctx->frame->linesize[1];
329 } else {
330 ctx->frame->linesize[2] = 0;
331 }
332 } else {
333 ctx->frame->linesize[1] = 0;
334 }
335 } else {
336 GF_Err e=GF_NOT_SUPPORTED;
337 GF_FilterFrameInterface *frame_ifce = gf_filter_pck_get_frame_interface(pck);
338 if (frame_ifce && frame_ifce->get_plane) {
339 e = frame_ifce->get_plane(frame_ifce, 0, (const u8 **) &ctx->frame->data[0], &ctx->frame->linesize[0]);
340 if (!e && (ctx->nb_planes>1)) {
341 e = frame_ifce->get_plane(frame_ifce, 1, (const u8 **) &ctx->frame->data[1], &ctx->frame->linesize[1]);
342 if (!e && (ctx->nb_planes>2)) {
343 e = frame_ifce->get_plane(frame_ifce, 1, (const u8 **) &ctx->frame->data[2], &ctx->frame->linesize[2]);
344 }
345 }
346 }
347 if (e) {
348 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to fetch %sframe data: %s\n", frame_ifce ? "hardware " : "", gf_error_to_string(e) ));
349 gf_filter_pid_drop_packet(ctx->in_pid);
350 return e;
351 }
352 }
353
354 ilaced = gf_filter_pck_get_interlaced(pck);
355 if (!ilaced) {
356 ctx->frame->interlaced_frame = 0;
357 } else {
358 ctx->frame->interlaced_frame = 1;
359 ctx->frame->top_field_first = (ilaced==2) ? 1 : 0;
360 }
361 ctx->frame->pts = gf_filter_pck_get_cts(pck);
362 ctx->frame->pkt_duration = gf_filter_pck_get_duration(pck);
363
364 #define SCALE_TS(_ts) if (_ts != GF_FILTER_NO_TS) { _ts *= ctx->encoder->time_base.den; _ts /= ctx->encoder->time_base.num; _ts /= ctx->timescale; }
365 #define UNSCALE_TS(_ts) if (_ts != AV_NOPTS_VALUE) { _ts *= ctx->encoder->time_base.num; _ts *= ctx->timescale; _ts /= ctx->encoder->time_base.den; }
366 #define UNSCALE_DUR(_ts) { _ts *= ctx->encoder->time_base.num; _ts *= ctx->timescale; _ts /= ctx->encoder->time_base.den; }
367
368 //store first frame CTS before rescaling, we use it after rescaling the output packet timing to compute CTS-DTS
369 if (!ctx->cts_first_frame_plus_one) {
370 ctx->cts_first_frame_plus_one = 1 + ctx->frame->pts;
371 }
372
373 if (ctx->remap_ts) {
374 SCALE_TS(ctx->frame->pts);
375
376 SCALE_TS(ctx->frame->pkt_duration);
377 }
378
379 ctx->frame->pkt_dts = ctx->frame->pkt_pts = ctx->frame->pts;
380
381 res = avcodec_encode_video2(ctx->encoder, &pkt, ctx->frame, &gotpck);
382 ctx->nb_frames_in++;
383
384 //keep ref to ource properties
385 gf_filter_pck_ref_props(&pck);
386 gf_list_add(ctx->src_packets, pck);
387
388 gf_filter_pid_drop_packet(ctx->in_pid);
389
390 if (ctx->remap_ts) {
391 UNSCALE_TS(ctx->frame->pts);
392 UNSCALE_TS(ctx->frame->pkt_duration);
393
394 UNSCALE_TS(pkt.dts);
395 UNSCALE_TS(pkt.pts);
396 UNSCALE_DUR(pkt.duration);
397 }
398 } else {
399 res = avcodec_encode_video2(ctx->encoder, &pkt, NULL, &gotpck);
400 if (!gotpck) {
401 //done flushing encoder while reconfiguring
402 if (ctx->reconfig_pending) {
403 ctx->reconfig_pending = GF_FALSE;
404 avcodec_close(ctx->encoder);
405 ctx->encoder = NULL;
406 GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec flush done, triggering reconfiguration\n"));
407 return ffenc_configure_pid(filter, ctx->in_pid, GF_FALSE);
408 }
409 ctx->flush_done = 1;
410 gf_filter_pid_set_eos(ctx->out_pid);
411 return GF_EOS;
412 }
413 if (ctx->remap_ts) {
414 UNSCALE_TS(pkt.dts);
415 UNSCALE_TS(pkt.pts);
416 UNSCALE_DUR(pkt.duration);
417 }
418 }
419 now = gf_sys_clock_high_res() - now;
420 ctx->time_spent += now;
421
422 if (res<0) {
423 ctx->nb_frames_out++;
424 return GF_SERVICE_ERROR;
425 }
426
427 if (!gotpck) {
428 return GF_OK;
429 }
430
431 ctx->nb_frames_out++;
432 if (ctx->init_cts_setup) {
433 ctx->init_cts_setup = GF_FALSE;
434 if (ctx->frame->pts != pkt.pts) {
435 //check shift in PTS
436 ctx->ts_shift = (s32) ( (s64) ctx->cts_first_frame_plus_one - 1 - (s64) pkt.pts );
437
438 //check shift in DTS
439 ctx->ts_shift += (s32) ( (s64) ctx->cts_first_frame_plus_one - 1 - (s64) pkt.dts );
440 }
441 if (ctx->ts_shift) {
442 s64 shift = ctx->ts_shift;
443 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DELAY, &PROP_SINT((s32) shift) );
444 }
445 }
446
447 src_pck = NULL;
448 count = gf_list_count(ctx->src_packets);
449 for (i=0; i<count; i++) {
450 src_pck = gf_list_get(ctx->src_packets, i);
451 if (gf_filter_pck_get_cts(src_pck) == pkt.pts) break;
452 src_pck = NULL;
453 }
454
455 offset = 0;
456 to_copy = size = pkt.size;
457
458
459 if (ctx->codecid == GF_CODECID_J2K) {
460 u32 b4cc = GF_4CC(pkt.data[4], pkt.data[5], pkt.data[6], pkt.data[7]);
461 if (b4cc == GF_4CC('j','P',' ',' ')) {
462 u32 jp2h_offset = 0;
463 offset = 12;
464 while (offset+8 < (u32) pkt.size) {
465 b4cc = GF_4CC(pkt.data[offset+4], pkt.data[offset+5], pkt.data[offset+6], pkt.data[offset+7]);
466 if (b4cc == GF_4CC('j','p','2','c')) {
467 break;
468 }
469 if (b4cc == GF_4CC('j','p','2','h')) {
470 jp2h_offset = offset;
471 }
472 offset++;
473 }
474 if (jp2h_offset) {
475 u32 len = pkt.data[jp2h_offset];
476 len <<= 8;
477 len |= pkt.data[jp2h_offset+1];
478 len <<= 8;
479 len |= pkt.data[jp2h_offset+2];
480 len <<= 8;
481 len |= pkt.data[jp2h_offset+3];
482
483 u32 dsi_crc = gf_crc_32(pkt.data + jp2h_offset + 8, len-8);
484 if (dsi_crc != ctx->dsi_crc) {
485 ctx->dsi_crc = dsi_crc;
486 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA(pkt.data + jp2h_offset + 8, len-8) );
487 }
488 }
489 size -= offset;
490 to_copy -= offset;
491 } else {
492 size += 8;
493
494 if (!ctx->dsi_crc) {
495 u8 *dsi;
496 u32 dsi_len;
497 GF_BitStream *bs = gf_bs_new(NULL, 0, GF_BITSTREAM_WRITE);
498 gf_bs_write_u32(bs, 14+8);
499 gf_bs_write_u32(bs, GF_4CC('i','h','d','r'));
500 gf_bs_write_u32(bs, ctx->height);
501 gf_bs_write_u32(bs, ctx->width);
502 gf_bs_write_u16(bs, ctx->nb_planes);
503 gf_bs_write_u8(bs, gf_pixel_get_bytes_per_pixel(ctx->gpac_pixel_fmt));
504 gf_bs_write_u8(bs, 7); //COMP
505 gf_bs_write_u8(bs, 0);
506 gf_bs_write_u8(bs, 0);
507 gf_bs_get_content(bs, &dsi, &dsi_len);
508 gf_bs_del(bs);
509 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
510 ctx->dsi_crc = 1;
511 }
512 }
513 }
514
515 dst_pck = gf_filter_pck_new_alloc(ctx->out_pid, size, &output);
516 if (insert_jp2c) {
517 u32 bsize = pkt.size + 8;
518 output[0] = (bsize >> 24) & 0xFF;
519 output[1] = (bsize >> 16) & 0xFF;
520 output[2] = (bsize >> 8) & 0xFF;
521 output[3] = (bsize) & 0xFF;
522 output[4] = 'j';
523 output[5] = 'p';
524 output[6] = '2';
525 output[7] = 'c';
526 output += 8;
527 }
528 memcpy(output, pkt.data + offset, to_copy);
529
530 if (src_pck) {
531 gf_filter_pck_merge_properties(src_pck, dst_pck);
532 gf_list_del_item(ctx->src_packets, src_pck);
533 gf_filter_pck_unref(src_pck);
534 } else {
535 if (pkt.duration) {
536 gf_filter_pck_set_duration(dst_pck, (u32) pkt.duration);
537 } else {
538 gf_filter_pck_set_duration(dst_pck, (u32) ctx->frame->pkt_duration);
539 }
540 }
541
542 ffenc_log_video(filter, ctx, &pkt, gf_filter_reporting_enabled(filter));
543
544 gf_filter_pck_set_cts(dst_pck, pkt.pts + ctx->ts_shift);
545 gf_filter_pck_set_dts(dst_pck, pkt.dts + ctx->ts_shift);
546
547 //this is not 100% correct since we don't have any clue if this is SAP1/2/3/4 ...
548 //since we send the output to our reframers we should be fine
549 if (pkt.flags & AV_PKT_FLAG_KEY) {
550 gf_filter_pck_set_sap(dst_pck, GF_FILTER_SAP_1);
551 GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] frame %d is SAP\n", ctx->nb_frames_out));
552 }
553 else
554 gf_filter_pck_set_sap(dst_pck, 0);
555
556 #if LIBAVCODEC_VERSION_MAJOR >= 58
557 if (pkt.flags & AV_PKT_FLAG_DISPOSABLE) {
558 gf_filter_pck_set_dependency_flags(dst_pck, 0x8);
559 }
560 #endif
561 gf_filter_pck_send(dst_pck);
562
563 //we're in final flush, request a process task until all frames flushe
564 //we could recursiveley call ourselves, same result
565 if (!pck) {
566 gf_filter_post_process_task(filter);
567 }
568 return GF_OK;
569 }
570
571
572
ffenc_audio_append_samples(struct _gf_ffenc_ctx * ctx,const u8 * data,u32 size,u32 sample_offset,u32 nb_samples)573 static void ffenc_audio_append_samples(struct _gf_ffenc_ctx *ctx, const u8 *data, u32 size, u32 sample_offset, u32 nb_samples)
574 {
575 u8 *dst;
576 u32 f_idx, s_idx;
577 u32 i, bytes_per_chan, src_frame_size;
578
579 if (!ctx->audio_buffer || !data)
580 return;
581
582 if (!ctx->planar_audio) {
583 u32 offset_src = sample_offset * ctx->bytes_per_sample;
584 u32 offset_dst = ctx->samples_in_audio_buffer * ctx->bytes_per_sample;
585 u32 len = nb_samples * ctx->bytes_per_sample;
586 memcpy(ctx->audio_buffer + offset_dst, data + offset_src, sizeof(u8)*len);
587 ctx->samples_in_audio_buffer += nb_samples;
588 return;
589 }
590
591 bytes_per_chan = ctx->bytes_per_sample / ctx->channels;
592 src_frame_size = size / ctx->bytes_per_sample;
593 assert(ctx->samples_in_audio_buffer + nb_samples <= (u32) ctx->audio_buffer_size);
594 assert(sample_offset + nb_samples <= src_frame_size);
595 assert(ctx->encoder->frame_size);
596
597 f_idx = ctx->samples_in_audio_buffer / ctx->encoder->frame_size;
598 s_idx = ctx->samples_in_audio_buffer % ctx->encoder->frame_size;
599 if (s_idx) {
600 assert(s_idx + nb_samples <= (u32) ctx->encoder->frame_size);
601 }
602 dst = ctx->audio_buffer + (f_idx * ctx->channels * ctx->encoder->frame_size + s_idx) * bytes_per_chan;
603 while (nb_samples) {
604 const u8 *src;
605 u32 nb_samples_to_copy = nb_samples;
606 if (nb_samples_to_copy > (u32) ctx->encoder->frame_size)
607 nb_samples_to_copy = ctx->encoder->frame_size;
608
609 assert(sample_offset<src_frame_size);
610
611 src = data + sample_offset * bytes_per_chan;
612
613 for (i=0; i<ctx->channels; i++) {
614 memcpy(dst, src, sizeof(u8) * nb_samples_to_copy * bytes_per_chan);
615
616 dst += ctx->encoder->frame_size * bytes_per_chan;
617 src += src_frame_size * bytes_per_chan;
618 }
619 ctx->samples_in_audio_buffer += nb_samples_to_copy;
620 nb_samples -= nb_samples_to_copy;
621 sample_offset += nb_samples_to_copy;
622 }
623 }
624
ffenc_process_audio(GF_Filter * filter,struct _gf_ffenc_ctx * ctx)625 static GF_Err ffenc_process_audio(GF_Filter *filter, struct _gf_ffenc_ctx *ctx)
626 {
627 AVPacket pkt;
628 s32 gotpck;
629 const char *data = NULL;
630 u32 size=0, nb_copy=0, i, count;
631 Bool from_internal_buffer_only = GF_FALSE;
632 s32 res;
633 u32 nb_samples=0;
634 u64 ts_diff;
635 u8 *output;
636 GF_FilterPacket *dst_pck, *src_pck;
637 GF_FilterPacket *pck;
638
639 if (!ctx->in_pid) return GF_EOS;
640
641 pck = gf_filter_pid_get_packet(ctx->in_pid);
642
643 if (!ctx->encoder) {
644 if (ctx->infmt_negociate) return GF_OK;
645
646 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] encoder reconfiguration failed, aborting stream\n"));
647 gf_filter_pid_set_eos(ctx->out_pid);
648 return GF_EOS;
649 }
650
651 if (!pck) {
652 if (! gf_filter_pid_is_eos(ctx->in_pid)) return GF_OK;
653 if (ctx->flush_done) return GF_EOS;
654 }
655
656 if (ctx->reconfig_pending) pck = NULL;
657
658 if (ctx->encoder->frame_size && (ctx->encoder->frame_size <= (s32) ctx->samples_in_audio_buffer)) {
659 avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, ctx->audio_buffer, ctx->bytes_per_sample * ctx->encoder->frame_size, 0);
660
661 from_internal_buffer_only = GF_TRUE;
662
663 } else if (pck) {
664 data = gf_filter_pck_get_data(pck, &size);
665 if (!data) {
666 GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Packet without associated data\n"));
667 gf_filter_pid_drop_packet(ctx->in_pid);
668 return GF_OK;
669 }
670
671 if (!ctx->samples_in_audio_buffer) {
672 ctx->first_byte_cts = gf_filter_pck_get_cts(pck);
673 }
674
675 src_pck = pck;
676 gf_filter_pck_ref_props(&src_pck);
677 gf_list_add(ctx->src_packets, src_pck);
678
679 nb_samples = size / ctx->bytes_per_sample;
680 if (ctx->encoder->frame_size && (nb_samples + ctx->samples_in_audio_buffer < (u32) ctx->encoder->frame_size)) {
681 ffenc_audio_append_samples(ctx, data, size, 0, nb_samples);
682 gf_filter_pid_drop_packet(ctx->in_pid);
683 return GF_OK;
684 }
685
686 if (ctx->encoder->frame_size) {
687 nb_copy = ctx->encoder->frame_size - ctx->samples_in_audio_buffer;
688 ffenc_audio_append_samples(ctx, data, size, 0, nb_copy);
689
690 ctx->frame->nb_samples = ctx->encoder->frame_size;
691 res = avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, ctx->audio_buffer, ctx->encoder->frame_size*ctx->bytes_per_sample, 0);
692
693 } else {
694 ctx->frame->nb_samples = size / ctx->bytes_per_sample;
695 res = avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, data, size, 0);
696 data = NULL;
697 size = 0;
698 }
699 if (res<0) {
700 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Error filling raw audio frame: %s\n", av_err2str(res) ));
701 //discard
702 ctx->samples_in_audio_buffer = 0;
703 if (data && (nb_samples > nb_copy)) {
704 ffenc_audio_append_samples(ctx, data, size, nb_copy, nb_samples - nb_copy);
705 ts_diff = nb_copy;
706 ts_diff *= ctx->timescale;
707 ts_diff /= ctx->sample_rate;
708 ctx->first_byte_cts = gf_filter_pck_get_cts(pck) + ts_diff;
709 }
710 gf_filter_pid_drop_packet(ctx->in_pid);
711 return GF_SERVICE_ERROR;
712 }
713 }
714
715 av_init_packet(&pkt);
716 pkt.data = (uint8_t*)ctx->enc_buffer;
717 pkt.size = ctx->enc_buffer_size;
718
719 ctx->frame->nb_samples = ctx->encoder->frame_size;
720 ctx->frame->format = ctx->encoder->sample_fmt;
721 ctx->frame->channels = ctx->encoder->channels;
722 ctx->frame->channel_layout = ctx->encoder->channel_layout;
723 gotpck = 0;
724 if (pck) {
725 ctx->frame->pkt_dts = ctx->frame->pkt_pts = ctx->frame->pts = ctx->first_byte_cts;
726 res = avcodec_encode_audio2(ctx->encoder, &pkt, ctx->frame, &gotpck);
727 } else {
728 res = avcodec_encode_audio2(ctx->encoder, &pkt, NULL, &gotpck);
729 if (!gotpck) {
730 //done flushing encoder while reconfiguring
731 if (ctx->reconfig_pending) {
732 ctx->reconfig_pending = GF_FALSE;
733 avcodec_close(ctx->encoder);
734 ctx->encoder = NULL;
735 GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec flush done, triggering reconfiguration\n"));
736 return ffenc_configure_pid(filter, ctx->in_pid, GF_FALSE);
737 }
738 ctx->flush_done = 1;
739 gf_filter_pid_set_eos(ctx->out_pid);
740 return GF_EOS;
741 }
742 }
743
744 if (from_internal_buffer_only) {
745 //avcodec_fill_audio_frame does not perform copy, so make sure we discard internal buffer AFTER we encode
746 u32 offset, len, nb_samples_to_drop;
747
748 //we always drop a complete encoder frame size, so same code for planar and packed
749 nb_samples_to_drop = ctx->encoder->frame_size;
750
751 if (ctx->samples_in_audio_buffer > nb_samples_to_drop) {
752 offset = nb_samples_to_drop * ctx->bytes_per_sample;
753 len = (ctx->samples_in_audio_buffer - nb_samples_to_drop) * ctx->bytes_per_sample;
754 memmove(ctx->audio_buffer, ctx->audio_buffer + offset, sizeof(u8)*len);
755 ctx->samples_in_audio_buffer -= nb_samples_to_drop;
756 } else {
757 ctx->samples_in_audio_buffer = 0;
758 }
759
760 }
761
762 //increase timestamp
763 ts_diff = ctx->frame->nb_samples;
764 if (ctx->timescale!=ctx->sample_rate) {
765 ts_diff *= ctx->timescale;
766 ts_diff /= ctx->sample_rate;
767 }
768 ctx->first_byte_cts += ts_diff;
769
770 if (pck && !from_internal_buffer_only) {
771 ctx->samples_in_audio_buffer = 0;
772 if (nb_samples > nb_copy) {
773 ffenc_audio_append_samples(ctx, data, size, nb_copy, nb_samples - nb_copy);
774 }
775 gf_filter_pid_drop_packet(ctx->in_pid);
776 }
777
778 if (res<0) {
779 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Error encoding frame: %s\n", av_err2str(res) ));
780 return GF_SERVICE_ERROR;
781 }
782 if (!gotpck) {
783 return GF_OK;
784 }
785 dst_pck = gf_filter_pck_new_alloc(ctx->out_pid, pkt.size, &output);
786 memcpy(output, pkt.data, pkt.size);
787
788 if (ctx->init_cts_setup) {
789 ctx->init_cts_setup = GF_FALSE;
790 if (ctx->frame->pts != pkt.pts) {
791 ctx->ts_shift = (s32) ( (s64) ctx->frame->pts - (s64) pkt.pts );
792 }
793 // if (ctx->ts_shift) {
794 // s64 shift = ctx->ts_shift;
795 // gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DELAY, &PROP_SINT((s32) -shift) );
796 // }
797 }
798
799 //try to locate first source packet with cts greater than this packet cts and use it as source for properties
800 //this is not optimal because we dont produce N for N because of different window coding sizes
801 src_pck = NULL;
802 count = gf_list_count(ctx->src_packets);
803 for (i=0; i<count; i++) {
804 u64 acts;
805 u32 adur;
806 src_pck = gf_list_get(ctx->src_packets, i);
807 acts = gf_filter_pck_get_cts(src_pck);
808 adur = gf_filter_pck_get_duration(src_pck);
809
810 if ((s64) acts >= pkt.pts) {
811 break;
812 }
813
814 if (acts + adur <= (u64) ( pkt.pts + ctx->ts_shift) ) {
815 gf_list_rem(ctx->src_packets, i);
816 gf_filter_pck_unref(src_pck);
817 i--;
818 count--;
819 }
820 src_pck = NULL;
821 }
822 if (src_pck) {
823 gf_filter_pck_merge_properties(src_pck, dst_pck);
824 gf_list_del_item(ctx->src_packets, src_pck);
825 gf_filter_pck_unref(src_pck);
826 }
827 gf_filter_pck_set_cts(dst_pck, pkt.pts + ctx->ts_shift);
828 gf_filter_pck_set_dts(dst_pck, pkt.dts + ctx->ts_shift);
829 //this is not 100% correct since we don't have any clue if this is SAP1/4 (roll info missing)
830 if (pkt.flags & AV_PKT_FLAG_KEY)
831 gf_filter_pck_set_sap(dst_pck, GF_FILTER_SAP_1);
832 else
833 gf_filter_pck_set_sap(dst_pck, 0);
834
835 gf_filter_pck_set_duration(dst_pck, (u32) pkt.duration);
836
837 gf_filter_pck_send(dst_pck);
838
839 //we're in final flush, request a process task until all frames flushe
840 //we could recursiveley call ourselves, same result
841 if (!pck) {
842 gf_filter_post_process_task(filter);
843 }
844 return GF_OK;
845 }
846
ffenc_process(GF_Filter * filter)847 static GF_Err ffenc_process(GF_Filter *filter)
848 {
849 GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
850 if (!ctx->out_pid || gf_filter_pid_would_block(ctx->out_pid))
851 return GF_OK;
852 return ctx->process(filter, ctx);
853 }
854
ffenc_copy_pid_props(GF_FFEncodeCtx * ctx)855 static void ffenc_copy_pid_props(GF_FFEncodeCtx *ctx)
856 {
857 //copy properties at init or reconfig
858 gf_filter_pid_copy_properties(ctx->out_pid, ctx->in_pid);
859 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, NULL);
860 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_CODECID, &PROP_UINT(ctx->codecid) );
861
862 switch (ctx->codecid) {
863 case GF_CODECID_AVC:
864 case GF_CODECID_HEVC:
865 case GF_CODECID_MPEG4_PART2:
866 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_UNFRAMED, &PROP_BOOL(GF_TRUE) );
867 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_UNFRAMED_FULL_AU, &PROP_BOOL(GF_TRUE) );
868 break;
869 default:
870 if (ctx->encoder && ctx->encoder->extradata_size && ctx->encoder->extradata) {
871 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA(ctx->encoder->extradata, ctx->encoder->extradata_size) );
872 }
873 break;
874 }
875 }
876
ffenc_configure_pid(GF_Filter * filter,GF_FilterPid * pid,Bool is_remove)877 static GF_Err ffenc_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
878 {
879 s32 res;
880 u32 type=0, fftype, ff_codectag=0;
881 u32 i=0;
882 u32 change_input_fmt = 0;
883 const GF_PropertyValue *prop;
884 const AVCodec *codec=NULL;
885 const AVCodec *desired_codec=NULL;
886 u32 codec_id, pfmt, afmt;
887 GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
888
889 //disconnect of src pid (not yet supported)
890 if (is_remove) {
891 ctx->in_pid = NULL;
892 //one in one out, this is simple
893 if (ctx->out_pid) gf_filter_pid_remove(ctx->out_pid);
894 return GF_OK;
895 }
896 //check our PID: streamtype and codecid
897 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_STREAM_TYPE);
898 if (!prop) return GF_NOT_SUPPORTED;
899
900 type = prop->value.uint;
901 switch (type) {
902 case GF_STREAM_AUDIO:
903 case GF_STREAM_VISUAL:
904 break;
905 default:
906 return GF_NOT_SUPPORTED;
907 }
908 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CODECID);
909 if (!prop || prop->value.uint!=GF_CODECID_RAW) return GF_NOT_SUPPORTED;
910
911 //figure out if output was preconfigured during filter chain setup
912 prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_CODECID);
913 if (prop) {
914 ctx->codecid = prop->value.uint;
915 } else if (!ctx->codecid && ctx->c) {
916 ctx->codecid = gf_codec_parse(ctx->c);
917 if (!ctx->codecid) {
918 codec = avcodec_find_encoder_by_name(ctx->c);
919 if (codec)
920 ctx->codecid = ffmpeg_codecid_to_gpac(codec->id);
921 }
922 }
923 //if the codec was set using ffc, get it
924 if (ctx->ffc) {
925 desired_codec = avcodec_find_encoder_by_name(ctx->ffc);
926 }
927
928 if (!ctx->codecid && !desired_codec) {
929 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] No codecid specified\n" ));
930 return GF_BAD_PARAM;
931 }
932
933 //initial config or update
934 if (!ctx->in_pid || (ctx->in_pid==pid)) {
935 ctx->in_pid = pid;
936 if (!ctx->type) ctx->type = type;
937 //no support for dynamic changes of stream types
938 else if (ctx->type != type) {
939 return GF_NOT_SUPPORTED;
940 }
941 } else {
942 //only one input pid in ctx
943 if (ctx->in_pid) return GF_REQUIRES_NEW_INSTANCE;
944 }
945
946 if (ctx->codecid) {
947 codec_id = ffmpeg_codecid_from_gpac(ctx->codecid, &ff_codectag);
948 if (codec_id) {
949 if (desired_codec && desired_codec->id==codec_id)
950 codec = desired_codec;
951 else
952 codec = avcodec_find_encoder(codec_id);
953 }
954 } else {
955 codec = desired_codec;
956 }
957 if (!codec) {
958 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Cannot find encoder for codec %s\n", gf_codecid_name(ctx->codecid) ));
959 return GF_NOT_SUPPORTED;
960 }
961 codec_id = codec->id;
962 if (!ctx->codecid)
963 ctx->codecid = ffmpeg_codecid_to_gpac(codec->id);
964
965 fftype = ffmpeg_stream_type_to_gpac(codec->type);
966 if (fftype != type) {
967 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Mismatch between stream type, codec indicates %s but source type is %s\n", gf_stream_type_name(fftype), gf_stream_type_name(type) ));
968 return GF_NOT_SUPPORTED;
969 }
970
971 //declare our output pid to make sure we connect the chain
972 ctx->in_pid = pid;
973 if (!ctx->out_pid) {
974 char szCodecName[1000];
975 ctx->out_pid = gf_filter_pid_new(filter);
976
977 //to change once we implement on-the-fly codec change
978 sprintf(szCodecName, "ffenc:%s", codec->name ? codec->name : "unknown");
979 gf_filter_set_name(filter, szCodecName);
980 gf_filter_pid_set_framing_mode(ctx->in_pid, GF_TRUE);
981 }
982 if (type==GF_STREAM_AUDIO) {
983 ctx->process = ffenc_process_audio;
984 } else {
985 ctx->process = ffenc_process_video;
986 }
987
988 ffenc_copy_pid_props(ctx);
989 if (ctx->target_rate)
990 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_BITRATE, &PROP_UINT(ctx->target_rate));
991
992
993 #define GET_PROP(_a, _code, _name) \
994 prop = gf_filter_pid_get_property(pid, _code); \
995 if (!prop) {\
996 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[FFEnc] Input %s unknown, waiting for reconfigure\n", _name)); \
997 return GF_OK; \
998 }\
999 _a =prop->value.uint;
1000
1001 pfmt = afmt = 0;
1002 if (type==GF_STREAM_VISUAL) {
1003 GET_PROP(ctx->width, GF_PROP_PID_WIDTH, "width")
1004 GET_PROP(ctx->height, GF_PROP_PID_HEIGHT, "height")
1005 GET_PROP(pfmt, GF_PROP_PID_PIXFMT, "pixel format")
1006
1007 prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_STRIDE);
1008 //keep stride and stride_uv to 0 i fnot set, and recompute from pixel format
1009 if (prop) ctx->stride = prop->value.uint;
1010 prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_STRIDE_UV);
1011 if (prop) ctx->stride_uv = prop->value.uint;
1012 } else {
1013 GET_PROP(ctx->sample_rate, GF_PROP_PID_SAMPLE_RATE, "sample rate")
1014 GET_PROP(ctx->channels, GF_PROP_PID_NUM_CHANNELS, "nb channels")
1015 GET_PROP(afmt, GF_PROP_PID_AUDIO_FORMAT, "audio format")
1016 }
1017
1018 if (ctx->encoder) {
1019 codec_id = ffmpeg_codecid_from_gpac(ctx->codecid, &ff_codectag);
1020
1021 if (type==GF_STREAM_AUDIO) {
1022 if ((ctx->encoder->codec->id==codec_id) && (ctx->encoder->sample_rate==ctx->sample_rate) && (ctx->encoder->channels==ctx->channels) && (ctx->gpac_audio_fmt == afmt ) ) {
1023 return GF_OK;
1024 }
1025 } else {
1026 if ((ctx->encoder->codec->id==codec_id) && (ctx->encoder->width==ctx->width) && (ctx->encoder->height==ctx->height) && (ctx->gpac_pixel_fmt == pfmt ) ) {
1027 return GF_OK;
1028 }
1029 }
1030
1031 GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec reconfiguration, begining flush\n"));
1032 ctx->reconfig_pending = GF_TRUE;
1033 return GF_OK;
1034 }
1035
1036 if (type==GF_STREAM_VISUAL) {
1037 u32 force_pfmt = AV_PIX_FMT_NONE;
1038 if (ctx->pfmt) {
1039 u32 ff_pfmt = ffmpeg_pixfmt_from_gpac(ctx->pfmt);
1040 i=0;
1041 while (codec->pix_fmts) {
1042 if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1043 if (codec->pix_fmts[i] == ff_pfmt) {
1044 force_pfmt = ff_pfmt;
1045 break;
1046 }
1047 //handle pixel formats aliases
1048 if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i]) == ctx->pfmt) {
1049 force_pfmt = ctx->pixel_fmt;
1050 break;
1051 }
1052 i++;
1053 }
1054 if (force_pfmt == AV_PIX_FMT_NONE) {
1055 GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Requested source format %s not supported by codec, using default one\n", gf_pixel_fmt_name(ctx->pfmt) ));
1056 } else {
1057 change_input_fmt = force_pfmt;
1058 }
1059 }
1060 ctx->pixel_fmt = ffmpeg_pixfmt_from_gpac(pfmt);
1061 //check pixel format
1062 if (force_pfmt == AV_PIX_FMT_NONE) {
1063 change_input_fmt = AV_PIX_FMT_NONE;
1064 i=0;
1065 while (codec->pix_fmts) {
1066 if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1067 if (codec->pix_fmts[i] == ctx->pixel_fmt) {
1068 change_input_fmt = ctx->pixel_fmt;
1069 break;
1070 }
1071 //handle pixel formats aliases
1072 if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i]) == pfmt) {
1073 ctx->pixel_fmt = change_input_fmt = codec->pix_fmts[i];
1074 break;
1075 }
1076 i++;
1077 }
1078 if (!ctx->ffc && (change_input_fmt == AV_PIX_FMT_NONE)) {
1079 #if (LIBAVCODEC_VERSION_MAJOR >= 58) && (LIBAVCODEC_VERSION_MINOR>=20)
1080 void *ff_opaque=NULL;
1081 #else
1082 AVCodec *codec_alt = NULL;
1083 #endif
1084 while (1) {
1085 #if (LIBAVCODEC_VERSION_MAJOR >= 58) && (LIBAVCODEC_VERSION_MINOR>=20)
1086 const AVCodec *codec_alt = av_codec_iterate(&ff_opaque);
1087 #else
1088 codec_alt = av_codec_next(codec_alt);
1089 #endif
1090 if (!codec_alt) break;
1091 if (codec_alt==codec) continue;
1092 if (codec_alt->id == codec_id) {
1093 i=0;
1094 while (codec_alt->pix_fmts) {
1095 if (codec_alt->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1096 if (codec_alt->pix_fmts[i] == ctx->pixel_fmt) {
1097 change_input_fmt = ctx->pixel_fmt;
1098 GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Reassigning codec from %s to %s to match pixel format\n", codec->name, codec_alt->name ));
1099 codec = codec_alt;
1100 break;
1101 }
1102 i++;
1103 }
1104 }
1105 }
1106 }
1107 }
1108
1109 if (ctx->pixel_fmt != change_input_fmt) {
1110 u32 ff_pmft = ctx->pixel_fmt;
1111
1112 if (force_pfmt == AV_PIX_FMT_NONE) {
1113 ff_pmft = AV_PIX_FMT_NONE;
1114 i=0;
1115 //find a mapped pixel format
1116 while (codec->pix_fmts) {
1117 if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1118 if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i])) {
1119 ff_pmft = codec->pix_fmts[i];
1120 break;
1121 }
1122 i++;
1123 }
1124 if (ff_pmft == AV_PIX_FMT_NONE) {
1125 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Could not find a matching GPAC pixel format for encoder %s\n", codec->name ));
1126 return GF_NOT_SUPPORTED;
1127 }
1128 } else if (ctx->pfmt) {
1129 ff_pmft = ffmpeg_pixfmt_from_gpac(ctx->pfmt);
1130 }
1131 pfmt = ffmpeg_pixfmt_to_gpac(ff_pmft);
1132 gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_PIXFMT, &PROP_UINT(pfmt) );
1133 ctx->infmt_negociate = GF_TRUE;
1134 } else {
1135 ctx->infmt_negociate = GF_FALSE;
1136 }
1137 } else {
1138 u32 change_input_sr = 0;
1139 //check audio format
1140 ctx->sample_fmt = ffmpeg_audio_fmt_from_gpac(afmt);
1141 change_input_fmt = 0;
1142 while (codec->sample_fmts) {
1143 if (codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) break;
1144 if (codec->sample_fmts[i] == ctx->sample_fmt) {
1145 change_input_fmt = ctx->sample_fmt;
1146 break;
1147 }
1148 i++;
1149 }
1150 i=0;
1151 if (!codec->supported_samplerates)
1152 change_input_sr = ctx->sample_rate;
1153
1154 while (codec->supported_samplerates) {
1155 if (!codec->supported_samplerates[i]) break;
1156 if (codec->supported_samplerates[i]==ctx->sample_rate) {
1157 change_input_sr = ctx->sample_rate;
1158 break;
1159 }
1160 i++;
1161 }
1162 if ((ctx->sample_fmt != change_input_fmt) || (ctx->sample_rate != change_input_sr)) {
1163 if (ctx->sample_fmt != change_input_fmt) {
1164 ctx->sample_fmt = codec->sample_fmts ? codec->sample_fmts[0] : AV_SAMPLE_FMT_S16;
1165 afmt = ffmpeg_audio_fmt_to_gpac(ctx->sample_fmt);
1166 gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_AUDIO_FORMAT, &PROP_UINT(afmt) );
1167 }
1168 if (ctx->sample_rate != change_input_sr) {
1169 gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_SAMPLE_RATE, &PROP_UINT(codec->supported_samplerates[0]) );
1170 }
1171 ctx->infmt_negociate = GF_TRUE;
1172 } else {
1173 ctx->infmt_negociate = GF_FALSE;
1174 }
1175 }
1176
1177 //renegociate input, wait for reconfig call
1178 if (ctx->infmt_negociate) return GF_OK;
1179
1180 ctx->gpac_pixel_fmt = pfmt;
1181 ctx->gpac_audio_fmt = afmt;
1182 ctx->dsi_crc = 0;
1183
1184 ctx->encoder = avcodec_alloc_context3(codec);
1185 if (! ctx->encoder) return GF_OUT_OF_MEM;
1186
1187 ctx->encoder->codec_tag = ff_codectag;
1188 if (type==GF_STREAM_VISUAL) {
1189 ctx->encoder->width = ctx->width;
1190 ctx->encoder->height = ctx->height;
1191 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_SAR);
1192 if (prop) {
1193 ctx->encoder->sample_aspect_ratio.num = prop->value.frac.num;
1194 ctx->timescale = ctx->encoder->sample_aspect_ratio.den = prop->value.frac.den;
1195 } else {
1196 ctx->encoder->sample_aspect_ratio.num = 1;
1197 ctx->encoder->sample_aspect_ratio.den = 1;
1198 }
1199 //CHECKME: do we need to use 1/FPS ?
1200 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_TIMESCALE);
1201 if (prop) {
1202 ctx->encoder->time_base.num = 1;
1203 ctx->timescale = ctx->encoder->time_base.den = prop->value.uint;
1204 }
1205 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_FPS);
1206 if (prop) {
1207 ctx->encoder->gop_size = prop->value.frac.num / prop->value.frac.den;
1208 ctx->encoder->time_base.num = prop->value.frac.den;
1209 ctx->encoder->time_base.den = prop->value.frac.num;
1210 }
1211
1212 gf_media_get_reduced_frame_rate(&ctx->encoder->time_base.den, &ctx->encoder->time_base.num);
1213
1214 if (ctx->low_delay) {
1215 av_dict_set(&ctx->options, "vprofile", "baseline", 0);
1216 av_dict_set(&ctx->options, "preset", "ultrafast", 0);
1217 av_dict_set(&ctx->options, "tune", "zerolatency", 0);
1218 if (ctx->codecid==GF_CODECID_AVC) {
1219 av_dict_set(&ctx->options, "x264opts", "no-mbtree:sliced-threads:sync-lookahead=0", 0);
1220 }
1221 #if LIBAVCODEC_VERSION_MAJOR >= 58
1222 ctx->encoder->flags |= AV_CODEC_FLAG_LOW_DELAY;
1223 #endif
1224 }
1225
1226 if (ctx->fintra.den && ctx->fintra.num) {
1227 av_dict_set(&ctx->options, "forced-idr", "1", 0);
1228 }
1229
1230 //we don't use out of band headers, since x264 in ffmpeg (and likely other) do not output in MP4 format but
1231 //in annexB (extradata only contains SPS/PPS/etc in annexB)
1232 //so we indicate unframed for these codecs and use our own filter for annexB->MP4
1233
1234 if (!ctx->frame)
1235 ctx->frame = av_frame_alloc();
1236
1237 ctx->enc_buffer_size = ctx->width*ctx->height + ENC_BUF_ALLOC_SAFE;
1238 ctx->enc_buffer = gf_realloc(ctx->enc_buffer, sizeof(char)*ctx->enc_buffer_size);
1239
1240 gf_pixel_get_size_info(pfmt, ctx->width, ctx->height, NULL, &ctx->stride, &ctx->stride_uv, &ctx->nb_planes, &ctx->uv_height);
1241
1242 ctx->encoder->pix_fmt = ctx->pixel_fmt;
1243 ctx->init_cts_setup = GF_TRUE;
1244 ctx->frame->format = ctx->encoder->pix_fmt;
1245 } else if (type==GF_STREAM_AUDIO) {
1246 ctx->process = ffenc_process_audio;
1247
1248 ctx->encoder->sample_rate = ctx->sample_rate;
1249 ctx->encoder->channels = ctx->channels;
1250
1251 //TODO
1252 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CHANNEL_LAYOUT);
1253 if (prop) {
1254 ctx->encoder->channel_layout = ffmpeg_channel_layout_from_gpac(prop->value.longuint);
1255 } else if (ctx->channels==1) {
1256 ctx->encoder->channel_layout = AV_CH_LAYOUT_MONO;
1257 } else if (ctx->channels==2) {
1258 ctx->encoder->channel_layout = AV_CH_LAYOUT_STEREO;
1259 }
1260
1261 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_TIMESCALE);
1262 if (prop) {
1263 ctx->encoder->time_base.num = 1;
1264 ctx->encoder->time_base.den = prop->value.uint;
1265 ctx->timescale = prop->value.uint;
1266 } else {
1267 ctx->encoder->time_base.num = 1;
1268 ctx->encoder->time_base.den = ctx->sample_rate;
1269 ctx->timescale = ctx->sample_rate;
1270 }
1271
1272 //for aac
1273 switch (ctx->codecid) {
1274 case GF_CODECID_AAC_MPEG4:
1275 case GF_CODECID_AAC_MPEG2_MP:
1276 case GF_CODECID_AAC_MPEG2_LCP:
1277 case GF_CODECID_AAC_MPEG2_SSRP:
1278 av_dict_set(&ctx->options, "strict", "experimental", 0);
1279 break;
1280 }
1281
1282 if (!ctx->frame)
1283 ctx->frame = av_frame_alloc();
1284
1285 ctx->enc_buffer_size = ctx->channels*ctx->sample_rate + ENC_BUF_ALLOC_SAFE;
1286 ctx->enc_buffer = gf_realloc(ctx->enc_buffer, sizeof(char) * ctx->enc_buffer_size);
1287
1288 ctx->encoder->sample_fmt = ctx->sample_fmt;
1289 ctx->planar_audio = gf_audio_fmt_is_planar(afmt);
1290 ctx->frame->format = ctx->encoder->sample_fmt;
1291
1292 ctx->audio_buffer_size = ctx->sample_rate;
1293 ctx->audio_buffer = gf_realloc(ctx->audio_buffer, sizeof(char) * ctx->enc_buffer_size);
1294 ctx->bytes_per_sample = ctx->channels * gf_audio_fmt_bit_depth(afmt) / 8;
1295 ctx->init_cts_setup = GF_TRUE;
1296
1297 switch (ctx->codecid) {
1298 case GF_CODECID_AAC_MPEG4:
1299 case GF_CODECID_AAC_MPEG2_MP:
1300 case GF_CODECID_AAC_MPEG2_LCP:
1301 case GF_CODECID_AAC_MPEG2_SSRP:
1302 {
1303 #ifndef GPAC_DISABLE_AV_PARSERS
1304 GF_M4ADecSpecInfo acfg;
1305 u8 *dsi;
1306 u32 dsi_len;
1307 memset(&acfg, 0, sizeof(GF_M4ADecSpecInfo));
1308 acfg.base_object_type = GF_M4A_AAC_LC;
1309 acfg.base_sr = ctx->sample_rate;
1310 acfg.nb_chan = ctx->channels;
1311 acfg.sbr_object_type = 0;
1312 acfg.audioPL = gf_m4a_get_profile(&acfg);
1313
1314 gf_m4a_write_config(&acfg, &dsi, &dsi_len);
1315 gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
1316 #endif
1317
1318 }
1319 break;
1320 }
1321 }
1322
1323 ffmpeg_set_enc_dec_flags(ctx->options, ctx->encoder);
1324
1325 if (ctx->all_intra) ctx->encoder->gop_size = 0;
1326 else if (ctx->gop_size) ctx->encoder->gop_size = ctx->gop_size;
1327
1328 res = avcodec_open2(ctx->encoder, codec, &ctx->options );
1329 if (res < 0) {
1330 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] PID %s failed to open codec context: %s\n", gf_filter_pid_get_name(pid), av_err2str(res) ));
1331 return GF_BAD_PARAM;
1332 }
1333 ctx->remap_ts = (ctx->encoder->time_base.den != ctx->timescale) ? GF_TRUE : GF_FALSE;
1334
1335
1336 ffmpeg_report_unused_options(filter, ctx->options);
1337
1338 ffenc_copy_pid_props(ctx);
1339 return GF_OK;
1340 }
1341
1342
ffenc_update_arg(GF_Filter * filter,const char * arg_name,const GF_PropertyValue * arg_val)1343 static GF_Err ffenc_update_arg(GF_Filter *filter, const char *arg_name, const GF_PropertyValue *arg_val)
1344 {
1345 s32 res;
1346 GF_FFEncodeCtx *ctx = gf_filter_get_udta(filter);
1347
1348 if (!strcmp(arg_name, "global_header")) return GF_OK;
1349 else if (!strcmp(arg_name, "local_header")) return GF_OK;
1350 else if (!strcmp(arg_name, "low_delay")) ctx->low_delay = GF_TRUE;
1351 //remap some options
1352 else if (!strcmp(arg_name, "bitrate") || !strcmp(arg_name, "rate")) arg_name = "b";
1353 // else if (!strcmp(arg_name, "gop")) arg_name = "g";
1354 //disable low delay if these options are set
1355 else if (!strcmp(arg_name, "x264opts")) ctx->low_delay = GF_FALSE;
1356 else if (!strcmp(arg_name, "vprofile")) ctx->low_delay = GF_FALSE;
1357 else if (!strcmp(arg_name, "preset")) ctx->low_delay = GF_FALSE;
1358 else if (!strcmp(arg_name, "tune")) ctx->low_delay = GF_FALSE;
1359
1360 if (!strcmp(arg_name, "g") || !strcmp(arg_name, "gop"))
1361 ctx->gop_size = arg_val->value.string ? atoi(arg_val->value.string) : 25;
1362
1363 if (!strcmp(arg_name, "b") && arg_val->value.string) {
1364 ctx->target_rate = atoi(arg_val->value.string);
1365 if (strchr(arg_val->value.string, 'm') || strchr(arg_val->value.string, 'M'))
1366 ctx->target_rate *= 1000000;
1367 else if (strchr(arg_val->value.string, 'k') || strchr(arg_val->value.string, 'K'))
1368 ctx->target_rate *= 1000;
1369 }
1370
1371 //initial parsing of arguments
1372 if (!ctx->initialized) {
1373 const char *arg_val_str;
1374 switch (arg_val->type) {
1375 case GF_PROP_STRING:
1376 arg_val_str = arg_val->value.string;
1377 if (!arg_val_str) arg_val_str = "1";
1378 res = av_dict_set(&ctx->options, arg_name, arg_val_str, 0);
1379 if (res<0) {
1380 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to set option %s:%s\n", arg_name, arg_val ));
1381 }
1382 break;
1383 default:
1384 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to set option %s:%s, unrecognized type %d\n", arg_name, arg_val, arg_val->type ));
1385 return GF_NOT_SUPPORTED;
1386 }
1387 return GF_OK;
1388 }
1389 //updates of arguments, not supported for ffmpeg decoders
1390 return GF_NOT_SUPPORTED;
1391 }
1392
1393 static const GF_FilterCapability FFEncodeCaps[] =
1394 {
1395 CAP_UINT(GF_CAPS_INPUT_OUTPUT,GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
1396 CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1397 CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
1398 CAP_UINT(GF_CAPS_OUTPUT_EXCLUDED, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1399 //some video encoding dumps in unframe mode, we declare the pid property at runtime
1400 {0},
1401 CAP_UINT(GF_CAPS_INPUT_OUTPUT,GF_PROP_PID_STREAM_TYPE, GF_STREAM_AUDIO),
1402 CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
1403 CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1404 CAP_UINT(GF_CAPS_OUTPUT_EXCLUDED, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1405
1406 };
1407
1408 GF_FilterRegister FFEncodeRegister = {
1409 .name = "ffenc",
1410 .version=LIBAVCODEC_IDENT,
1411 GF_FS_SET_DESCRIPTION("FFMPEG encoder")
1412 GF_FS_SET_HELP("Encodes audio and video streams.\nSee FFMPEG documentation (https://ffmpeg.org/documentation.html) for more details"
1413 "\n"
1414 "Note: if no codec is explicited through [-ffc]() option and no pixel format is given, codecs will be enumerated to find a matching pixel format.\n"
1415 "\n"
1416 "The encoder will force a closed gop boundary at each packet with a `FileNumber` property set.\n"
1417 )
1418 .private_size = sizeof(GF_FFEncodeCtx),
1419 SETCAPS(FFEncodeCaps),
1420 .initialize = ffenc_initialize,
1421 .finalize = ffenc_finalize,
1422 .configure_pid = ffenc_configure_pid,
1423 .process = ffenc_process,
1424 .update_arg = ffenc_update_arg,
1425 .flags = GF_FS_REG_META,
1426 };
1427
1428 #define OFFS(_n) #_n, offsetof(GF_FFEncodeCtx, _n)
1429 static const GF_FilterArgs FFEncodeArgs[] =
1430 {
1431 { OFFS(c), "codec identifier. Can be any supported GPAC ID or ffmpeg ID or filter subclass name", GF_PROP_STRING, NULL, NULL, 0},
1432 { OFFS(pfmt), "pixel format for input video. When not set, input format is used", GF_PROP_PIXFMT, "none", NULL, 0},
1433 { OFFS(fintra), "force intra / IDR frames at the given period in sec, eg `fintra=60000/1001` will force an intra every 2 seconds on 29.97 fps video; ignored for audio", GF_PROP_FRACTION, "0", NULL, 0},
1434
1435 { OFFS(all_intra), "only produce intra frames", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_UPDATE|GF_FS_ARG_HINT_ADVANCED},
1436 { OFFS(ls), "log stats", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
1437 { OFFS(ffc), "ffmpeg codec name. This allows enforcing a given codec if multiple codecs support the codec ID set (eg aac vs vo_aacenc)", GF_PROP_STRING, NULL, NULL, 0},
1438
1439 { "*", -1, "any possible options defined for AVCodecContext and sub-classes. see `gpac -hx ffenc` and `gpac -hx ffenc:*`", GF_PROP_STRING, NULL, NULL, GF_FS_ARG_META},
1440 {0}
1441 };
1442
1443 const int FFENC_STATIC_ARGS = (sizeof (FFEncodeArgs) / sizeof (GF_FilterArgs)) - 1;
1444
ffenc_register(GF_FilterSession * session)1445 const GF_FilterRegister *ffenc_register(GF_FilterSession *session)
1446 {
1447 ffmpeg_build_register(session, &FFEncodeRegister, FFEncodeArgs, FFENC_STATIC_ARGS, FF_REG_TYPE_ENCODE);
1448 return &FFEncodeRegister;
1449 }
1450
1451
1452 #else
1453 #include <gpac/filters.h>
ffenc_register(GF_FilterSession * session)1454 const GF_FilterRegister *ffenc_register(GF_FilterSession *session)
1455 {
1456 return NULL;
1457 }
1458
1459 #endif //GPAC_HAS_FFMPEG
1460
1461