1 /*
2  *			GPAC - Multimedia Framework C SDK
3  *
4  *			Authors: Jean Le Feuvre
5  *			Copyright (c) Telecom ParisTech 2018
6  *					All rights reserved
7  *
8  *  This file is part of GPAC / ffmpeg encode filter
9  *
10  *  GPAC is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU Lesser General Public License as published by
12  *  the Free Software Foundation; either version 2, or (at your option)
13  *  any later version.
14  *
15  *  GPAC is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; see the file COPYING.  If not, write to
22  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25 
26 #include <gpac/setup.h>
27 #include <gpac/bitstream.h>
28 #include <gpac/avparse.h>
29 
30 #ifdef GPAC_HAS_FFMPEG
31 
32 #include "ff_common.h"
33 
34 #define ENC_BUF_ALLOC_SAFE	10000
35 
36 typedef struct _gf_ffenc_ctx
37 {
38 	//opts
39 	Bool all_intra;
40 	char *c, *ffc;
41 	Bool ls;
42 	u32 pfmt;
43 	GF_Fraction fintra;
44 
45 	//internal data
46 	Bool initialized;
47 
48 	u32 gop_size;
49 	u32 target_rate;
50 
51 	AVCodecContext *encoder;
52 	//decode options
53 	AVDictionary *options;
54 
55 	GF_FilterPid *in_pid, *out_pid;
56 	//media type
57 	u32 type;
58 	u32 timescale;
59 
60 	u32 nb_frames_out, nb_frames_in;
61 	u64 time_spent;
62 
63 	Bool low_delay;
64 
65 	GF_Err (*process)(GF_Filter *filter, struct _gf_ffenc_ctx *ctx);
66 	//gpac one
67 	u32 codecid;
68 	//done flushing encoder (eg sent NULL frames)
69 	u32 flush_done;
70 	//frame used by both video and audio encoder
71 	AVFrame *frame;
72 
73 	//encoding buffer - we allocate ENC_BUF_ALLOC_SAFE+WxH for the video (some image codecs in ffmpeg require more than WxH for headers), ENC_BUF_ALLOC_SAFE+nb_ch*samplerate for the audio
74 	//this should be enough to hold any lossless compression formats
75 	char *enc_buffer;
76 	u32 enc_buffer_size;
77 
78 	Bool init_cts_setup;
79 
80 	//video state
81 	u32 width, height, stride, stride_uv, nb_planes, uv_height;
82 	//ffmpeg one
83 	enum AVPixelFormat pixel_fmt;
84 	u64 cts_first_frame_plus_one;
85 
86 	//audio state
87 	u32 channels, sample_rate, channel_layout, bytes_per_sample;
88 	//ffmpeg one
89 	u32 sample_fmt;
90 	//we store input audio frame in this buffer untill we have enough data for one encoder frame
91 	//we also store the remaining of a consumed frame here, so that input packet is realeased ASAP
92 	char *audio_buffer;
93 	u32 audio_buffer_size;
94 	u32 samples_in_audio_buffer;
95 	//cts of first byte in frame
96 	u64 first_byte_cts;
97 	Bool planar_audio;
98 
99 	//shift of TS - ffmpeg may give pkt-> PTS < frame->PTS to indicate discard samples
100 	//we convert back to frame PTS but signal discard samples at the PID level
101 	s32 ts_shift;
102 
103 	GF_List *src_packets;
104 
105 	GF_BitStream *sdbs;
106 
107 	Bool reconfig_pending;
108 	Bool infmt_negociate;
109 	Bool remap_ts;
110 	Bool force_reconfig;
111 
112 	u32 dsi_crc;
113 
114 	u32 gpac_pixel_fmt;
115 	u32 gpac_audio_fmt;
116 
117 	Bool fintra_setup;
118 	u64 orig_ts;
119 	u32 nb_forced;
120 
121 } GF_FFEncodeCtx;
122 
123 static GF_Err ffenc_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove);
124 
ffenc_initialize(GF_Filter * filter)125 static GF_Err ffenc_initialize(GF_Filter *filter)
126 {
127 	GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
128 	ctx->initialized = GF_TRUE;
129 	ctx->src_packets = gf_list_new();
130 	ctx->sdbs = gf_bs_new((u8*)ctx, 1, GF_BITSTREAM_READ);
131 
132 	ffmpeg_setup_logs(GF_LOG_CODEC);
133 	return GF_OK;
134 }
135 
ffenc_finalize(GF_Filter * filter)136 static void ffenc_finalize(GF_Filter *filter)
137 {
138 	GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
139 	if (ctx->options) av_dict_free(&ctx->options);
140 	if (ctx->frame) av_frame_free(&ctx->frame);
141 	if (ctx->enc_buffer) gf_free(ctx->enc_buffer);
142 	if (ctx->audio_buffer) gf_free(ctx->audio_buffer);
143 
144 	while (gf_list_count(ctx->src_packets)) {
145 		GF_FilterPacket *pck = gf_list_pop_back(ctx->src_packets);
146 		gf_filter_pck_unref(pck);
147 	}
148 	gf_list_del(ctx->src_packets);
149 
150 	if (ctx->encoder) {
151 		avcodec_close(ctx->encoder);
152 	}
153 	if (ctx->sdbs) gf_bs_del(ctx->sdbs);
154 	return;
155 }
156 
157 //TODO add more feedback
ffenc_log_video(GF_Filter * filter,struct _gf_ffenc_ctx * ctx,AVPacket * pkt,Bool do_reporting)158 static void ffenc_log_video(GF_Filter *filter, struct _gf_ffenc_ctx *ctx, AVPacket *pkt, Bool do_reporting)
159 {
160 	Double fps=0;
161 	s32 q=-1;
162 	u8 pictype=0;
163 #if LIBAVCODEC_VERSION_MAJOR >= 58
164 	u64 errors[10];
165 	u32 i;
166 	u8 nb_errors = 0;
167 #endif
168 	const char *ptype = "U";
169 
170 	if (!ctx->ls && !do_reporting) return;
171 
172 
173 #if LIBAVCODEC_VERSION_MAJOR >= 58
174 	u32 sq_size;
175 	u8 *side_q = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &sq_size);
176 	if (side_q) {
177 		gf_bs_reassign_buffer(ctx->sdbs, side_q, sq_size);
178 		q = gf_bs_read_u32_le(ctx->sdbs);
179 		pictype = gf_bs_read_u8(ctx->sdbs);
180 		nb_errors = gf_bs_read_u8(ctx->sdbs);
181 		/*res*/gf_bs_read_u16(ctx->sdbs);
182 		if (nb_errors>10) nb_errors = 10;
183 		for (i=0; i<nb_errors; i++) {
184 			errors[i] = gf_bs_read_u64_le(ctx->sdbs);
185 		}
186 	}
187 #endif
188 	if (ctx->time_spent) {
189 		fps = ctx->nb_frames_out;
190 		fps *= 1000000;
191 		fps /= ctx->time_spent;
192 	}
193 	switch (pictype) {
194 	case AV_PICTURE_TYPE_I: ptype = "I"; break;
195 	case AV_PICTURE_TYPE_P: ptype = "P"; break;
196 	case AV_PICTURE_TYPE_S: ptype = "S"; break;
197 	case AV_PICTURE_TYPE_SP: ptype = "SP"; break;
198 	case AV_PICTURE_TYPE_B: ptype = "B"; break;
199 	case AV_PICTURE_TYPE_BI: ptype = "B"; break;
200 	}
201 
202 	if (ctx->ls) {
203 		fprintf(stderr, "[FFEnc] FPS %.02f F %d DTS "LLD" CTS "LLD" Q %02.02f PT %s (F_in %d)", fps, ctx->nb_frames_out, pkt->dts+ctx->ts_shift, pkt->pts+ctx->ts_shift, ((Double)q) /  FF_QP2LAMBDA, ptype, ctx->nb_frames_in);
204 #if LIBAVCODEC_VERSION_MAJOR >= 58
205 		if (nb_errors) {
206 			fprintf(stderr, "PSNR");
207 			for (i=0; i<nb_errors; i++) {
208 				Double psnr = (Double) errors[i];
209 				psnr /= ctx->width * ctx->height * 255.0 * 255.0;
210 				fprintf(stderr, " %02.02f", psnr);
211 			}
212 		}
213 #endif
214 		fprintf(stderr, "\r");
215 	}
216 
217 	if (do_reporting) {
218 		char szStatus[1024];
219 		sprintf(szStatus, "[FFEnc] FPS %.02f F %d DTS "LLD" CTS "LLD" Q %02.02f PT %s (F_in %d)", fps, ctx->nb_frames_out, pkt->dts+ctx->ts_shift, pkt->pts+ctx->ts_shift, ((Double)q) /  FF_QP2LAMBDA, ptype, ctx->nb_frames_in);
220 		gf_filter_update_status(filter, -1, szStatus);
221 	}
222 }
223 
ffenc_process_video(GF_Filter * filter,struct _gf_ffenc_ctx * ctx)224 static GF_Err ffenc_process_video(GF_Filter *filter, struct _gf_ffenc_ctx *ctx)
225 {
226 	AVPacket pkt;
227 	s32 gotpck;
228 	const char *data = NULL;
229 	u32 size=0, i, count, offset, to_copy;
230 	s32 res;
231 	u64 now;
232 	u8 *output;
233 	u32 force_intra = 0;
234 	Bool insert_jp2c = GF_FALSE;
235 	GF_FilterPacket *dst_pck, *src_pck;
236 	GF_FilterPacket *pck;
237 
238 	if (!ctx->in_pid) return GF_EOS;
239 
240 	pck = gf_filter_pid_get_packet(ctx->in_pid);
241 
242 	if (!ctx->encoder) {
243 		if (ctx->infmt_negociate) return GF_OK;
244 
245 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] encoder reconfiguration failed, aborting stream\n"));
246 		gf_filter_pid_set_eos(ctx->out_pid);
247 		return GF_EOS;
248 	}
249 
250 	if (!pck) {
251 		if (! gf_filter_pid_is_eos(ctx->in_pid)) return GF_OK;
252 		if (ctx->flush_done) return GF_OK;
253 	}
254 
255 	if (ctx->reconfig_pending) pck = NULL;
256 
257 	if (pck) data = gf_filter_pck_get_data(pck, &size);
258 
259 	av_init_packet(&pkt);
260 	pkt.data = (uint8_t*)ctx->enc_buffer;
261 	pkt.size = ctx->enc_buffer_size;
262 
263 	ctx->frame->pict_type = 0;
264 	ctx->frame->width = ctx->width;
265 	ctx->frame->height = ctx->height;
266 	ctx->frame->format = ctx->pixel_fmt;
267 
268 	ctx->frame->pict_type = AV_PICTURE_TYPE_NONE;
269 
270 	//force picture type
271 	if (ctx->all_intra)
272 		ctx->frame->pict_type = AV_PICTURE_TYPE_I;
273 
274 	//if PCK_FILENUM is set on input, this is a file boundary, force IDR sync
275 	if (pck && gf_filter_pck_get_property(pck, GF_PROP_PCK_FILENUM)) {
276 		force_intra = 2;
277 	}
278 	//check if we need to force a closed gop
279 	if (pck && (ctx->fintra.den && ctx->fintra.num) && !ctx->force_reconfig) {
280 		u64 cts = gf_filter_pck_get_cts(pck);
281 		if (!ctx->fintra_setup) {
282 			ctx->fintra_setup = GF_TRUE;
283 			ctx->orig_ts = cts;
284 			force_intra = 1;
285 			ctx->nb_forced=1;
286 		} else if (cts < ctx->orig_ts) {
287 			GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] timestamps not increasing monotonuously, reseting forced intra state !\n"));
288 			ctx->orig_ts = cts;
289 			force_intra = 1;
290 			ctx->nb_forced=1;
291 		} else {
292 			u64 ts_diff = cts - ctx->orig_ts;
293 			if (ts_diff * ctx->fintra.den >= ctx->nb_forced * ctx->fintra.num * ctx->timescale) {
294 				force_intra = 1;
295 				ctx->nb_forced++;
296 				GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[FFEnc] Forcing IDR at frame %d (CTS %d / %d)\n", ctx->nb_frames_in, cts, ctx->timescale));
297 			}
298 		}
299 	}
300 	if (force_intra) {
301 		//file switch we force a full reset to force injecting xPS in the stream
302 		//we could also inject them manually but we don't have them !!
303 		if (force_intra==2) {
304 			if (!ctx->force_reconfig) {
305 				ctx->reconfig_pending = GF_TRUE;
306 				ctx->force_reconfig = GF_TRUE;
307 				pck = NULL;
308 			} else {
309 				ctx->force_reconfig = GF_FALSE;
310 			}
311 		}
312 		ctx->frame->pict_type = AV_PICTURE_TYPE_I;
313 	}
314 
315 
316 	now = gf_sys_clock_high_res();
317 	gotpck = 0;
318 	if (pck) {
319 		u32 ilaced;
320 		if (data) {
321 			ctx->frame->data[0] = (u8 *) data;
322 			ctx->frame->linesize[0] = ctx->stride;
323 			if (ctx->nb_planes>1) {
324 				ctx->frame->data[1] = (u8 *) data + ctx->stride * ctx->height;
325 				ctx->frame->linesize[1] = ctx->stride_uv ? ctx->stride_uv : ctx->stride/2;
326 				if (ctx->nb_planes>2) {
327 					ctx->frame->data[2] = (u8 *) ctx->frame->data[1] + ctx->stride_uv * ctx->height/2;
328 					ctx->frame->linesize[2] = ctx->frame->linesize[1];
329 				} else {
330 					ctx->frame->linesize[2] = 0;
331 				}
332 			} else {
333 				ctx->frame->linesize[1] = 0;
334 			}
335 		} else {
336 			GF_Err e=GF_NOT_SUPPORTED;
337 			GF_FilterFrameInterface *frame_ifce = gf_filter_pck_get_frame_interface(pck);
338 			if (frame_ifce && frame_ifce->get_plane) {
339 				e = frame_ifce->get_plane(frame_ifce, 0, (const u8 **) &ctx->frame->data[0], &ctx->frame->linesize[0]);
340 				if (!e && (ctx->nb_planes>1)) {
341 					e = frame_ifce->get_plane(frame_ifce, 1, (const u8 **) &ctx->frame->data[1], &ctx->frame->linesize[1]);
342 					if (!e && (ctx->nb_planes>2)) {
343 						e = frame_ifce->get_plane(frame_ifce, 1, (const u8 **) &ctx->frame->data[2], &ctx->frame->linesize[2]);
344 					}
345 				}
346 			}
347 			if (e) {
348 				GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to fetch %sframe data: %s\n", frame_ifce ? "hardware " : "", gf_error_to_string(e) ));
349 				gf_filter_pid_drop_packet(ctx->in_pid);
350 				return e;
351 			}
352 		}
353 
354 		ilaced = gf_filter_pck_get_interlaced(pck);
355 		if (!ilaced) {
356 			ctx->frame->interlaced_frame = 0;
357 		} else {
358 			ctx->frame->interlaced_frame = 1;
359 			ctx->frame->top_field_first = (ilaced==2) ? 1 : 0;
360 		}
361 		ctx->frame->pts = gf_filter_pck_get_cts(pck);
362 		ctx->frame->pkt_duration = gf_filter_pck_get_duration(pck);
363 
364 #define SCALE_TS(_ts) if (_ts != GF_FILTER_NO_TS) { _ts *= ctx->encoder->time_base.den; _ts /= ctx->encoder->time_base.num; _ts /= ctx->timescale; }
365 #define UNSCALE_TS(_ts) if (_ts != AV_NOPTS_VALUE)  { _ts *= ctx->encoder->time_base.num; _ts *= ctx->timescale; _ts /= ctx->encoder->time_base.den; }
366 #define UNSCALE_DUR(_ts) { _ts *= ctx->encoder->time_base.num; _ts *= ctx->timescale; _ts /= ctx->encoder->time_base.den; }
367 
368 		//store first frame CTS before rescaling, we use it after rescaling the output packet timing to compute CTS-DTS
369 		if (!ctx->cts_first_frame_plus_one) {
370 			ctx->cts_first_frame_plus_one = 1 + ctx->frame->pts;
371 		}
372 
373 		if (ctx->remap_ts) {
374 			SCALE_TS(ctx->frame->pts);
375 
376 			SCALE_TS(ctx->frame->pkt_duration);
377 		}
378 
379 		ctx->frame->pkt_dts = ctx->frame->pkt_pts = ctx->frame->pts;
380 
381 		res = avcodec_encode_video2(ctx->encoder, &pkt, ctx->frame, &gotpck);
382 		ctx->nb_frames_in++;
383 
384 		//keep ref to ource properties
385 		gf_filter_pck_ref_props(&pck);
386 		gf_list_add(ctx->src_packets, pck);
387 
388 		gf_filter_pid_drop_packet(ctx->in_pid);
389 
390 		if (ctx->remap_ts) {
391 			UNSCALE_TS(ctx->frame->pts);
392 			UNSCALE_TS(ctx->frame->pkt_duration);
393 
394 			UNSCALE_TS(pkt.dts);
395 			UNSCALE_TS(pkt.pts);
396 			UNSCALE_DUR(pkt.duration);
397 		}
398 	} else {
399 		res = avcodec_encode_video2(ctx->encoder, &pkt, NULL, &gotpck);
400 		if (!gotpck) {
401 			//done flushing encoder while reconfiguring
402 			if (ctx->reconfig_pending) {
403 				ctx->reconfig_pending = GF_FALSE;
404 				avcodec_close(ctx->encoder);
405 				ctx->encoder = NULL;
406 				GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec flush done, triggering reconfiguration\n"));
407 				return ffenc_configure_pid(filter, ctx->in_pid, GF_FALSE);
408 			}
409 			ctx->flush_done = 1;
410 			gf_filter_pid_set_eos(ctx->out_pid);
411 			return GF_EOS;
412 		}
413 		if (ctx->remap_ts) {
414 			UNSCALE_TS(pkt.dts);
415 			UNSCALE_TS(pkt.pts);
416 			UNSCALE_DUR(pkt.duration);
417 		}
418 	}
419 	now = gf_sys_clock_high_res() - now;
420 	ctx->time_spent += now;
421 
422 	if (res<0) {
423 		ctx->nb_frames_out++;
424 		return GF_SERVICE_ERROR;
425 	}
426 
427 	if (!gotpck) {
428 		return GF_OK;
429 	}
430 
431 	ctx->nb_frames_out++;
432 	if (ctx->init_cts_setup) {
433 		ctx->init_cts_setup = GF_FALSE;
434 		if (ctx->frame->pts != pkt.pts) {
435 			//check shift in PTS
436 			ctx->ts_shift = (s32) ( (s64) ctx->cts_first_frame_plus_one - 1 - (s64) pkt.pts );
437 
438 			//check shift in DTS
439 			ctx->ts_shift += (s32) ( (s64) ctx->cts_first_frame_plus_one - 1 - (s64) pkt.dts );
440 		}
441 		if (ctx->ts_shift) {
442 			s64 shift = ctx->ts_shift;
443 			gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DELAY, &PROP_SINT((s32) shift) );
444 		}
445 	}
446 
447 	src_pck = NULL;
448 	count = gf_list_count(ctx->src_packets);
449 	for (i=0; i<count; i++) {
450 		src_pck = gf_list_get(ctx->src_packets, i);
451 		if (gf_filter_pck_get_cts(src_pck) == pkt.pts) break;
452 		src_pck = NULL;
453 	}
454 
455 	offset = 0;
456 	to_copy = size = pkt.size;
457 
458 
459 	if (ctx->codecid == GF_CODECID_J2K) {
460 		u32 b4cc = GF_4CC(pkt.data[4], pkt.data[5], pkt.data[6], pkt.data[7]);
461 		if (b4cc == GF_4CC('j','P',' ',' ')) {
462 			u32 jp2h_offset = 0;
463 			offset = 12;
464 			while (offset+8 < (u32) pkt.size) {
465 				b4cc = GF_4CC(pkt.data[offset+4], pkt.data[offset+5], pkt.data[offset+6], pkt.data[offset+7]);
466 				if (b4cc == GF_4CC('j','p','2','c')) {
467 					break;
468 				}
469 				if (b4cc == GF_4CC('j','p','2','h')) {
470 					jp2h_offset = offset;
471 				}
472 				offset++;
473 			}
474 			if (jp2h_offset) {
475 				u32 len = pkt.data[jp2h_offset];
476 				len <<= 8;
477 				len |= pkt.data[jp2h_offset+1];
478 				len <<= 8;
479 				len |= pkt.data[jp2h_offset+2];
480 				len <<= 8;
481 				len |= pkt.data[jp2h_offset+3];
482 
483 				u32 dsi_crc = gf_crc_32(pkt.data + jp2h_offset + 8, len-8);
484 				if (dsi_crc != ctx->dsi_crc) {
485 					ctx->dsi_crc = dsi_crc;
486 					gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA(pkt.data + jp2h_offset + 8, len-8) );
487 				}
488 			}
489 			size -= offset;
490 			to_copy -= offset;
491 		} else {
492 			size += 8;
493 
494 			if (!ctx->dsi_crc) {
495 				u8 *dsi;
496 				u32 dsi_len;
497 				GF_BitStream *bs = gf_bs_new(NULL, 0, GF_BITSTREAM_WRITE);
498 				gf_bs_write_u32(bs, 14+8);
499 				gf_bs_write_u32(bs, GF_4CC('i','h','d','r'));
500 				gf_bs_write_u32(bs, ctx->height);
501 				gf_bs_write_u32(bs, ctx->width);
502 				gf_bs_write_u16(bs, ctx->nb_planes);
503 				gf_bs_write_u8(bs, gf_pixel_get_bytes_per_pixel(ctx->gpac_pixel_fmt));
504 				gf_bs_write_u8(bs, 7); //COMP
505 				gf_bs_write_u8(bs, 0);
506 				gf_bs_write_u8(bs, 0);
507 				gf_bs_get_content(bs, &dsi, &dsi_len);
508 				gf_bs_del(bs);
509 				gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
510 				ctx->dsi_crc = 1;
511 			}
512 		}
513 	}
514 
515 	dst_pck = gf_filter_pck_new_alloc(ctx->out_pid, size, &output);
516 	if (insert_jp2c) {
517 		u32 bsize = pkt.size + 8;
518 		output[0] = (bsize >> 24) & 0xFF;
519 		output[1] = (bsize >> 16) & 0xFF;
520 		output[2] = (bsize >> 8) & 0xFF;
521 		output[3] = (bsize) & 0xFF;
522 		output[4] = 'j';
523 		output[5] = 'p';
524 		output[6] = '2';
525 		output[7] = 'c';
526 		output += 8;
527 	}
528 	memcpy(output, pkt.data + offset, to_copy);
529 
530 	if (src_pck) {
531 		gf_filter_pck_merge_properties(src_pck, dst_pck);
532 		gf_list_del_item(ctx->src_packets, src_pck);
533 		gf_filter_pck_unref(src_pck);
534 	} else {
535 		if (pkt.duration) {
536 			gf_filter_pck_set_duration(dst_pck, (u32) pkt.duration);
537 		} else {
538 			gf_filter_pck_set_duration(dst_pck, (u32) ctx->frame->pkt_duration);
539 		}
540 	}
541 
542 	ffenc_log_video(filter, ctx, &pkt, gf_filter_reporting_enabled(filter));
543 
544 	gf_filter_pck_set_cts(dst_pck, pkt.pts + ctx->ts_shift);
545 	gf_filter_pck_set_dts(dst_pck, pkt.dts + ctx->ts_shift);
546 
547 	//this is not 100% correct since we don't have any clue if this is SAP1/2/3/4 ...
548 	//since we send the output to our reframers we should be fine
549 	if (pkt.flags & AV_PKT_FLAG_KEY) {
550 		gf_filter_pck_set_sap(dst_pck, GF_FILTER_SAP_1);
551 		GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] frame %d is SAP\n", ctx->nb_frames_out));
552 	}
553 	else
554 		gf_filter_pck_set_sap(dst_pck, 0);
555 
556 #if LIBAVCODEC_VERSION_MAJOR >= 58
557 	if (pkt.flags & AV_PKT_FLAG_DISPOSABLE) {
558 		gf_filter_pck_set_dependency_flags(dst_pck, 0x8);
559 	}
560 #endif
561 	gf_filter_pck_send(dst_pck);
562 
563 	//we're in final flush, request a process task until all frames flushe
564 	//we could recursiveley call ourselves, same result
565 	if (!pck) {
566 		gf_filter_post_process_task(filter);
567 	}
568 	return GF_OK;
569 }
570 
571 
572 
ffenc_audio_append_samples(struct _gf_ffenc_ctx * ctx,const u8 * data,u32 size,u32 sample_offset,u32 nb_samples)573 static void ffenc_audio_append_samples(struct _gf_ffenc_ctx *ctx, const u8 *data, u32 size, u32 sample_offset, u32 nb_samples)
574 {
575 	u8 *dst;
576 	u32 f_idx, s_idx;
577 	u32 i, bytes_per_chan, src_frame_size;
578 
579 	if (!ctx->audio_buffer || !data)
580 		return;
581 
582 	if (!ctx->planar_audio) {
583 		u32 offset_src = sample_offset * ctx->bytes_per_sample;
584 		u32 offset_dst = ctx->samples_in_audio_buffer * ctx->bytes_per_sample;
585 		u32 len = nb_samples * ctx->bytes_per_sample;
586 		memcpy(ctx->audio_buffer + offset_dst, data + offset_src, sizeof(u8)*len);
587 		ctx->samples_in_audio_buffer += nb_samples;
588 		return;
589 	}
590 
591 	bytes_per_chan = ctx->bytes_per_sample / ctx->channels;
592 	src_frame_size = size / ctx->bytes_per_sample;
593 	assert(ctx->samples_in_audio_buffer + nb_samples <= (u32) ctx->audio_buffer_size);
594 	assert(sample_offset + nb_samples <= src_frame_size);
595 	assert(ctx->encoder->frame_size);
596 
597 	f_idx = ctx->samples_in_audio_buffer / ctx->encoder->frame_size;
598 	s_idx = ctx->samples_in_audio_buffer % ctx->encoder->frame_size;
599 	if (s_idx) {
600 		assert(s_idx + nb_samples <= (u32) ctx->encoder->frame_size);
601 	}
602 	dst = ctx->audio_buffer + (f_idx * ctx->channels * ctx->encoder->frame_size + s_idx) * bytes_per_chan;
603 	while (nb_samples) {
604 		const u8 *src;
605 		u32 nb_samples_to_copy = nb_samples;
606 		if (nb_samples_to_copy > (u32) ctx->encoder->frame_size)
607 			nb_samples_to_copy = ctx->encoder->frame_size;
608 
609 		assert(sample_offset<src_frame_size);
610 
611 		src = data + sample_offset * bytes_per_chan;
612 
613 		for (i=0; i<ctx->channels; i++) {
614 			memcpy(dst, src, sizeof(u8) * nb_samples_to_copy * bytes_per_chan);
615 
616 			dst += ctx->encoder->frame_size * bytes_per_chan;
617 			src += src_frame_size * bytes_per_chan;
618 		}
619 		ctx->samples_in_audio_buffer += nb_samples_to_copy;
620 		nb_samples -= nb_samples_to_copy;
621 		sample_offset += nb_samples_to_copy;
622 	}
623 }
624 
ffenc_process_audio(GF_Filter * filter,struct _gf_ffenc_ctx * ctx)625 static GF_Err ffenc_process_audio(GF_Filter *filter, struct _gf_ffenc_ctx *ctx)
626 {
627 	AVPacket pkt;
628 	s32 gotpck;
629 	const char *data = NULL;
630 	u32 size=0, nb_copy=0, i, count;
631 	Bool from_internal_buffer_only = GF_FALSE;
632 	s32 res;
633 	u32 nb_samples=0;
634 	u64 ts_diff;
635 	u8 *output;
636 	GF_FilterPacket *dst_pck, *src_pck;
637 	GF_FilterPacket *pck;
638 
639 	if (!ctx->in_pid) return GF_EOS;
640 
641 	pck = gf_filter_pid_get_packet(ctx->in_pid);
642 
643 	if (!ctx->encoder) {
644 		if (ctx->infmt_negociate) return GF_OK;
645 
646 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] encoder reconfiguration failed, aborting stream\n"));
647 		gf_filter_pid_set_eos(ctx->out_pid);
648 		return GF_EOS;
649 	}
650 
651 	if (!pck) {
652 		if (! gf_filter_pid_is_eos(ctx->in_pid)) return GF_OK;
653 		if (ctx->flush_done) return GF_EOS;
654 	}
655 
656 	if (ctx->reconfig_pending) pck = NULL;
657 
658 	if (ctx->encoder->frame_size && (ctx->encoder->frame_size <= (s32) ctx->samples_in_audio_buffer)) {
659 		avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, ctx->audio_buffer, ctx->bytes_per_sample * ctx->encoder->frame_size, 0);
660 
661 		from_internal_buffer_only = GF_TRUE;
662 
663 	} else if (pck) {
664 		data = gf_filter_pck_get_data(pck, &size);
665 		if (!data) {
666 			GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Packet without associated data\n"));
667 			gf_filter_pid_drop_packet(ctx->in_pid);
668 			return GF_OK;
669 		}
670 
671 		if (!ctx->samples_in_audio_buffer) {
672 			ctx->first_byte_cts = gf_filter_pck_get_cts(pck);
673 		}
674 
675 		src_pck = pck;
676 		gf_filter_pck_ref_props(&src_pck);
677 		gf_list_add(ctx->src_packets, src_pck);
678 
679 		nb_samples = size / ctx->bytes_per_sample;
680 		if (ctx->encoder->frame_size && (nb_samples + ctx->samples_in_audio_buffer < (u32) ctx->encoder->frame_size)) {
681 			ffenc_audio_append_samples(ctx, data, size, 0, nb_samples);
682 			gf_filter_pid_drop_packet(ctx->in_pid);
683 			return GF_OK;
684 		}
685 
686 		if (ctx->encoder->frame_size) {
687 			nb_copy = ctx->encoder->frame_size - ctx->samples_in_audio_buffer;
688 			ffenc_audio_append_samples(ctx, data, size, 0, nb_copy);
689 
690 			ctx->frame->nb_samples = ctx->encoder->frame_size;
691 			res = avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, ctx->audio_buffer, ctx->encoder->frame_size*ctx->bytes_per_sample, 0);
692 
693 		} else {
694 			ctx->frame->nb_samples = size / ctx->bytes_per_sample;
695 			res = avcodec_fill_audio_frame(ctx->frame, ctx->channels, ctx->sample_fmt, data, size, 0);
696 			data = NULL;
697 			size = 0;
698 		}
699 		if (res<0) {
700 			GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Error filling raw audio frame: %s\n", av_err2str(res) ));
701 			//discard
702 			ctx->samples_in_audio_buffer = 0;
703 			if (data && (nb_samples > nb_copy)) {
704 				ffenc_audio_append_samples(ctx, data, size, nb_copy, nb_samples - nb_copy);
705 				ts_diff = nb_copy;
706 				ts_diff *= ctx->timescale;
707 				ts_diff /= ctx->sample_rate;
708 				ctx->first_byte_cts = gf_filter_pck_get_cts(pck) + ts_diff;
709 			}
710 			gf_filter_pid_drop_packet(ctx->in_pid);
711 			return GF_SERVICE_ERROR;
712 		}
713 	}
714 
715 	av_init_packet(&pkt);
716 	pkt.data = (uint8_t*)ctx->enc_buffer;
717 	pkt.size = ctx->enc_buffer_size;
718 
719 	ctx->frame->nb_samples = ctx->encoder->frame_size;
720 	ctx->frame->format = ctx->encoder->sample_fmt;
721 	ctx->frame->channels = ctx->encoder->channels;
722 	ctx->frame->channel_layout = ctx->encoder->channel_layout;
723 	gotpck = 0;
724 	if (pck) {
725 		ctx->frame->pkt_dts = ctx->frame->pkt_pts = ctx->frame->pts = ctx->first_byte_cts;
726 		res = avcodec_encode_audio2(ctx->encoder, &pkt, ctx->frame, &gotpck);
727 	} else {
728 		res = avcodec_encode_audio2(ctx->encoder, &pkt, NULL, &gotpck);
729 		if (!gotpck) {
730 			//done flushing encoder while reconfiguring
731 			if (ctx->reconfig_pending) {
732 				ctx->reconfig_pending = GF_FALSE;
733 				avcodec_close(ctx->encoder);
734 				ctx->encoder = NULL;
735 				GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec flush done, triggering reconfiguration\n"));
736 				return ffenc_configure_pid(filter, ctx->in_pid, GF_FALSE);
737 			}
738 			ctx->flush_done = 1;
739 			gf_filter_pid_set_eos(ctx->out_pid);
740 			return GF_EOS;
741 		}
742 	}
743 
744 	if (from_internal_buffer_only) {
745 		//avcodec_fill_audio_frame does not perform copy, so make sure we discard internal buffer AFTER we encode
746 		u32 offset, len, nb_samples_to_drop;
747 
748 		//we always drop a complete encoder frame size, so same code for planar and packed
749 		nb_samples_to_drop = ctx->encoder->frame_size;
750 
751 		if (ctx->samples_in_audio_buffer > nb_samples_to_drop) {
752 			offset = nb_samples_to_drop * ctx->bytes_per_sample;
753 			len = (ctx->samples_in_audio_buffer - nb_samples_to_drop) * ctx->bytes_per_sample;
754 			memmove(ctx->audio_buffer, ctx->audio_buffer + offset, sizeof(u8)*len);
755 			ctx->samples_in_audio_buffer -= nb_samples_to_drop;
756 		} else {
757 			ctx->samples_in_audio_buffer = 0;
758 		}
759 
760 	}
761 
762 	//increase timestamp
763 	ts_diff = ctx->frame->nb_samples;
764 	if (ctx->timescale!=ctx->sample_rate) {
765 		ts_diff *= ctx->timescale;
766 		ts_diff /= ctx->sample_rate;
767 	}
768 	ctx->first_byte_cts += ts_diff;
769 
770 	if (pck && !from_internal_buffer_only) {
771 		ctx->samples_in_audio_buffer = 0;
772 		if (nb_samples > nb_copy) {
773 			ffenc_audio_append_samples(ctx, data, size, nb_copy, nb_samples - nb_copy);
774 		}
775 		gf_filter_pid_drop_packet(ctx->in_pid);
776 	}
777 
778 	if (res<0) {
779 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Error encoding frame: %s\n", av_err2str(res) ));
780 		return GF_SERVICE_ERROR;
781 	}
782 	if (!gotpck) {
783 		return GF_OK;
784 	}
785 	dst_pck = gf_filter_pck_new_alloc(ctx->out_pid, pkt.size, &output);
786 	memcpy(output, pkt.data, pkt.size);
787 
788 	if (ctx->init_cts_setup) {
789 		ctx->init_cts_setup = GF_FALSE;
790 		if (ctx->frame->pts != pkt.pts) {
791 			ctx->ts_shift = (s32) ( (s64) ctx->frame->pts - (s64) pkt.pts );
792 		}
793 //		if (ctx->ts_shift) {
794 //			s64 shift = ctx->ts_shift;
795 //			gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DELAY, &PROP_SINT((s32) -shift) );
796 //		}
797 	}
798 
799 	//try to locate first source packet with cts greater than this packet cts and use it as source for properties
800 	//this is not optimal because we dont produce N for N because of different window coding sizes
801 	src_pck = NULL;
802 	count = gf_list_count(ctx->src_packets);
803 	for (i=0; i<count; i++) {
804 		u64 acts;
805 		u32 adur;
806 		src_pck = gf_list_get(ctx->src_packets, i);
807 		acts = gf_filter_pck_get_cts(src_pck);
808 		adur = gf_filter_pck_get_duration(src_pck);
809 
810 		if ((s64) acts >= pkt.pts) {
811 			break;
812 		}
813 
814 		if (acts + adur <= (u64) ( pkt.pts + ctx->ts_shift) ) {
815 			gf_list_rem(ctx->src_packets, i);
816 			gf_filter_pck_unref(src_pck);
817 			i--;
818 			count--;
819 		}
820 		src_pck = NULL;
821 	}
822 	if (src_pck) {
823 		gf_filter_pck_merge_properties(src_pck, dst_pck);
824 		gf_list_del_item(ctx->src_packets, src_pck);
825 		gf_filter_pck_unref(src_pck);
826 	}
827 	gf_filter_pck_set_cts(dst_pck, pkt.pts + ctx->ts_shift);
828 	gf_filter_pck_set_dts(dst_pck, pkt.dts + ctx->ts_shift);
829 	//this is not 100% correct since we don't have any clue if this is SAP1/4 (roll info missing)
830 	if (pkt.flags & AV_PKT_FLAG_KEY)
831 		gf_filter_pck_set_sap(dst_pck, GF_FILTER_SAP_1);
832 	else
833 		gf_filter_pck_set_sap(dst_pck, 0);
834 
835 	gf_filter_pck_set_duration(dst_pck, (u32) pkt.duration);
836 
837 	gf_filter_pck_send(dst_pck);
838 
839 	//we're in final flush, request a process task until all frames flushe
840 	//we could recursiveley call ourselves, same result
841 	if (!pck) {
842 		gf_filter_post_process_task(filter);
843 	}
844 	return GF_OK;
845 }
846 
ffenc_process(GF_Filter * filter)847 static GF_Err ffenc_process(GF_Filter *filter)
848 {
849 	GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
850 	if (!ctx->out_pid || gf_filter_pid_would_block(ctx->out_pid))
851 		return GF_OK;
852 	return ctx->process(filter, ctx);
853 }
854 
ffenc_copy_pid_props(GF_FFEncodeCtx * ctx)855 static void ffenc_copy_pid_props(GF_FFEncodeCtx *ctx)
856 {
857 	//copy properties at init or reconfig
858 	gf_filter_pid_copy_properties(ctx->out_pid, ctx->in_pid);
859 	gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, NULL);
860 	gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_CODECID, &PROP_UINT(ctx->codecid) );
861 
862 	switch (ctx->codecid) {
863 	case GF_CODECID_AVC:
864 	case GF_CODECID_HEVC:
865 	case GF_CODECID_MPEG4_PART2:
866 		gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_UNFRAMED, &PROP_BOOL(GF_TRUE) );
867 		gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_UNFRAMED_FULL_AU, &PROP_BOOL(GF_TRUE) );
868 		break;
869 	default:
870 		if (ctx->encoder && ctx->encoder->extradata_size && ctx->encoder->extradata) {
871 			gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA(ctx->encoder->extradata, ctx->encoder->extradata_size) );
872 		}
873 		break;
874 	}
875 }
876 
ffenc_configure_pid(GF_Filter * filter,GF_FilterPid * pid,Bool is_remove)877 static GF_Err ffenc_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
878 {
879 	s32 res;
880 	u32 type=0, fftype, ff_codectag=0;
881 	u32 i=0;
882 	u32 change_input_fmt = 0;
883 	const GF_PropertyValue *prop;
884 	const AVCodec *codec=NULL;
885 	const AVCodec *desired_codec=NULL;
886 	u32 codec_id, pfmt, afmt;
887 	GF_FFEncodeCtx *ctx = (GF_FFEncodeCtx *) gf_filter_get_udta(filter);
888 
889 	//disconnect of src pid (not yet supported)
890 	if (is_remove) {
891 		ctx->in_pid = NULL;
892 		//one in one out, this is simple
893 		if (ctx->out_pid) gf_filter_pid_remove(ctx->out_pid);
894 		return GF_OK;
895 	}
896 	//check our PID: streamtype and codecid
897 	prop = gf_filter_pid_get_property(pid, GF_PROP_PID_STREAM_TYPE);
898 	if (!prop) return GF_NOT_SUPPORTED;
899 
900 	type = prop->value.uint;
901 	switch (type) {
902 	case GF_STREAM_AUDIO:
903 	case GF_STREAM_VISUAL:
904 		break;
905 	default:
906 		return GF_NOT_SUPPORTED;
907 	}
908 	prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CODECID);
909 	if (!prop || prop->value.uint!=GF_CODECID_RAW) return GF_NOT_SUPPORTED;
910 
911 	//figure out if output was preconfigured during filter chain setup
912 	prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_CODECID);
913 	if (prop) {
914 		ctx->codecid = prop->value.uint;
915 	} else if (!ctx->codecid && ctx->c) {
916 		ctx->codecid = gf_codec_parse(ctx->c);
917 		if (!ctx->codecid) {
918 			codec = avcodec_find_encoder_by_name(ctx->c);
919 			if (codec)
920 				ctx->codecid = ffmpeg_codecid_to_gpac(codec->id);
921 		}
922 	}
923 	//if the codec was set using ffc, get it
924 	if (ctx->ffc) {
925 		desired_codec = avcodec_find_encoder_by_name(ctx->ffc);
926 	}
927 
928 	if (!ctx->codecid && !desired_codec) {
929 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] No codecid specified\n" ));
930 		return GF_BAD_PARAM;
931 	}
932 
933 	//initial config or update
934 	if (!ctx->in_pid || (ctx->in_pid==pid)) {
935 		ctx->in_pid = pid;
936 		if (!ctx->type) ctx->type = type;
937 		//no support for dynamic changes of stream types
938 		else if (ctx->type != type) {
939 			return GF_NOT_SUPPORTED;
940 		}
941 	} else {
942 		//only one input pid in ctx
943 		if (ctx->in_pid) return GF_REQUIRES_NEW_INSTANCE;
944 	}
945 
946 	if (ctx->codecid) {
947 		codec_id = ffmpeg_codecid_from_gpac(ctx->codecid, &ff_codectag);
948 		if (codec_id) {
949 			if (desired_codec && desired_codec->id==codec_id)
950 				codec = desired_codec;
951 			else
952 				codec = avcodec_find_encoder(codec_id);
953 		}
954 	} else {
955 		codec = desired_codec;
956 	}
957 	if (!codec) {
958 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Cannot find encoder for codec %s\n", gf_codecid_name(ctx->codecid) ));
959 		return GF_NOT_SUPPORTED;
960 	}
961 	codec_id = codec->id;
962 	if (!ctx->codecid)
963 		ctx->codecid = ffmpeg_codecid_to_gpac(codec->id);
964 
965 	fftype = ffmpeg_stream_type_to_gpac(codec->type);
966 	if (fftype != type) {
967 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Mismatch between stream type, codec indicates %s but source type is %s\n", gf_stream_type_name(fftype), gf_stream_type_name(type) ));
968 		return GF_NOT_SUPPORTED;
969 	}
970 
971 	//declare our output pid to make sure we connect the chain
972 	ctx->in_pid = pid;
973 	if (!ctx->out_pid) {
974 		char szCodecName[1000];
975 		ctx->out_pid = gf_filter_pid_new(filter);
976 
977 		//to change once we implement on-the-fly codec change
978 		sprintf(szCodecName, "ffenc:%s", codec->name ? codec->name : "unknown");
979 		gf_filter_set_name(filter, szCodecName);
980 		gf_filter_pid_set_framing_mode(ctx->in_pid, GF_TRUE);
981 	}
982 	if (type==GF_STREAM_AUDIO) {
983 		ctx->process = ffenc_process_audio;
984 	} else {
985 		ctx->process = ffenc_process_video;
986 	}
987 
988 	ffenc_copy_pid_props(ctx);
989 	if (ctx->target_rate)
990 		gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_BITRATE, &PROP_UINT(ctx->target_rate));
991 
992 
993 #define GET_PROP(_a, _code, _name) \
994 	prop = gf_filter_pid_get_property(pid, _code); \
995 	if (!prop) {\
996 		GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[FFEnc] Input %s unknown, waiting for reconfigure\n", _name)); \
997 		return GF_OK; \
998 	}\
999 	_a  =prop->value.uint;
1000 
1001 	pfmt = afmt = 0;
1002 	if (type==GF_STREAM_VISUAL) {
1003 		GET_PROP(ctx->width, GF_PROP_PID_WIDTH, "width")
1004 		GET_PROP(ctx->height, GF_PROP_PID_HEIGHT, "height")
1005 		GET_PROP(pfmt, GF_PROP_PID_PIXFMT, "pixel format")
1006 
1007 		prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_STRIDE);
1008 		//keep stride and stride_uv to 0 i fnot set, and recompute from pixel format
1009 		if (prop) ctx->stride = prop->value.uint;
1010 		prop = gf_filter_pid_caps_query(pid, GF_PROP_PID_STRIDE_UV);
1011 		if (prop) ctx->stride_uv = prop->value.uint;
1012 	} else {
1013 		GET_PROP(ctx->sample_rate, GF_PROP_PID_SAMPLE_RATE, "sample rate")
1014 		GET_PROP(ctx->channels, GF_PROP_PID_NUM_CHANNELS, "nb channels")
1015 		GET_PROP(afmt, GF_PROP_PID_AUDIO_FORMAT, "audio format")
1016 	}
1017 
1018 	if (ctx->encoder) {
1019 		codec_id = ffmpeg_codecid_from_gpac(ctx->codecid, &ff_codectag);
1020 
1021 		if (type==GF_STREAM_AUDIO) {
1022 			if ((ctx->encoder->codec->id==codec_id) && (ctx->encoder->sample_rate==ctx->sample_rate) && (ctx->encoder->channels==ctx->channels) && (ctx->gpac_audio_fmt == afmt ) ) {
1023 				return GF_OK;
1024 			}
1025 		} else {
1026 			if ((ctx->encoder->codec->id==codec_id) && (ctx->encoder->width==ctx->width) && (ctx->encoder->height==ctx->height) && (ctx->gpac_pixel_fmt == pfmt ) ) {
1027 				return GF_OK;
1028 			}
1029 		}
1030 
1031 		GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[FFEnc] codec reconfiguration, begining flush\n"));
1032 		ctx->reconfig_pending = GF_TRUE;
1033 		return GF_OK;
1034 	}
1035 
1036 	if (type==GF_STREAM_VISUAL) {
1037 		u32 force_pfmt = AV_PIX_FMT_NONE;
1038 		if (ctx->pfmt) {
1039 			u32 ff_pfmt = ffmpeg_pixfmt_from_gpac(ctx->pfmt);
1040 			i=0;
1041 			while (codec->pix_fmts) {
1042 				if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1043 				if (codec->pix_fmts[i] == ff_pfmt) {
1044 					force_pfmt = ff_pfmt;
1045 					break;
1046 				}
1047 				//handle pixel formats aliases
1048 				if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i]) == ctx->pfmt) {
1049 					force_pfmt = ctx->pixel_fmt;
1050 					break;
1051 				}
1052 				i++;
1053 			}
1054 			if (force_pfmt == AV_PIX_FMT_NONE) {
1055 				GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Requested source format %s not supported by codec, using default one\n", gf_pixel_fmt_name(ctx->pfmt) ));
1056 			} else {
1057 				change_input_fmt = force_pfmt;
1058 			}
1059 		}
1060 		ctx->pixel_fmt = ffmpeg_pixfmt_from_gpac(pfmt);
1061 		//check pixel format
1062 		if (force_pfmt == AV_PIX_FMT_NONE) {
1063 			change_input_fmt = AV_PIX_FMT_NONE;
1064 			i=0;
1065 			while (codec->pix_fmts) {
1066 				if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1067 				if (codec->pix_fmts[i] == ctx->pixel_fmt) {
1068 					change_input_fmt = ctx->pixel_fmt;
1069 					break;
1070 				}
1071 				//handle pixel formats aliases
1072 				if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i]) == pfmt) {
1073 					ctx->pixel_fmt = change_input_fmt = codec->pix_fmts[i];
1074 					break;
1075 				}
1076 				i++;
1077 			}
1078 			if (!ctx->ffc && (change_input_fmt == AV_PIX_FMT_NONE)) {
1079 #if (LIBAVCODEC_VERSION_MAJOR >= 58) && (LIBAVCODEC_VERSION_MINOR>=20)
1080 				void *ff_opaque=NULL;
1081 #else
1082 				AVCodec *codec_alt = NULL;
1083 #endif
1084 				while (1) {
1085 #if (LIBAVCODEC_VERSION_MAJOR >= 58) && (LIBAVCODEC_VERSION_MINOR>=20)
1086 					const AVCodec *codec_alt = av_codec_iterate(&ff_opaque);
1087 #else
1088 					codec_alt = av_codec_next(codec_alt);
1089 #endif
1090 					if (!codec_alt) break;
1091 					if (codec_alt==codec) continue;
1092 					if (codec_alt->id == codec_id) {
1093 						i=0;
1094 						while (codec_alt->pix_fmts) {
1095 							if (codec_alt->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1096 							if (codec_alt->pix_fmts[i] == ctx->pixel_fmt) {
1097 								change_input_fmt = ctx->pixel_fmt;
1098 								GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[FFEnc] Reassigning codec from %s to %s to match pixel format\n", codec->name, codec_alt->name ));
1099 								codec = codec_alt;
1100 								break;
1101 							}
1102 							i++;
1103 						}
1104 					}
1105 				}
1106 			}
1107 		}
1108 
1109 		if (ctx->pixel_fmt != change_input_fmt) {
1110 			u32 ff_pmft = ctx->pixel_fmt;
1111 
1112 			if (force_pfmt == AV_PIX_FMT_NONE) {
1113 				ff_pmft = AV_PIX_FMT_NONE;
1114 				i=0;
1115 				//find a mapped pixel format
1116 				while (codec->pix_fmts) {
1117 					if (codec->pix_fmts[i] == AV_PIX_FMT_NONE) break;
1118 					if (ffmpeg_pixfmt_to_gpac(codec->pix_fmts[i])) {
1119 						ff_pmft = codec->pix_fmts[i];
1120 						break;
1121 					}
1122 					i++;
1123 				}
1124 				if (ff_pmft == AV_PIX_FMT_NONE) {
1125 					GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Could not find a matching GPAC pixel format for encoder %s\n", codec->name ));
1126 					return GF_NOT_SUPPORTED;
1127 				}
1128 			} else if (ctx->pfmt) {
1129 				ff_pmft = ffmpeg_pixfmt_from_gpac(ctx->pfmt);
1130 			}
1131 			pfmt = ffmpeg_pixfmt_to_gpac(ff_pmft);
1132 			gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_PIXFMT, &PROP_UINT(pfmt) );
1133 			ctx->infmt_negociate = GF_TRUE;
1134 		} else {
1135 			ctx->infmt_negociate = GF_FALSE;
1136 		}
1137 	} else {
1138 		u32 change_input_sr = 0;
1139 		//check audio format
1140 		ctx->sample_fmt = ffmpeg_audio_fmt_from_gpac(afmt);
1141 		change_input_fmt = 0;
1142 		while (codec->sample_fmts) {
1143 			if (codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) break;
1144 			if (codec->sample_fmts[i] == ctx->sample_fmt) {
1145 				change_input_fmt = ctx->sample_fmt;
1146 				break;
1147 			}
1148 			i++;
1149 		}
1150 		i=0;
1151 		if (!codec->supported_samplerates)
1152 			change_input_sr = ctx->sample_rate;
1153 
1154 		while (codec->supported_samplerates) {
1155 			if (!codec->supported_samplerates[i]) break;
1156 			if (codec->supported_samplerates[i]==ctx->sample_rate) {
1157 				change_input_sr = ctx->sample_rate;
1158 				break;
1159 			}
1160 			i++;
1161 		}
1162 		if ((ctx->sample_fmt != change_input_fmt) || (ctx->sample_rate != change_input_sr)) {
1163 			if (ctx->sample_fmt != change_input_fmt) {
1164 				ctx->sample_fmt = codec->sample_fmts ? codec->sample_fmts[0] : AV_SAMPLE_FMT_S16;
1165 				afmt = ffmpeg_audio_fmt_to_gpac(ctx->sample_fmt);
1166 				gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_AUDIO_FORMAT, &PROP_UINT(afmt) );
1167 			}
1168 			if (ctx->sample_rate != change_input_sr) {
1169 				gf_filter_pid_negociate_property(ctx->in_pid, GF_PROP_PID_SAMPLE_RATE, &PROP_UINT(codec->supported_samplerates[0]) );
1170 			}
1171 			ctx->infmt_negociate = GF_TRUE;
1172 		} else {
1173 			ctx->infmt_negociate = GF_FALSE;
1174 		}
1175 	}
1176 
1177 	//renegociate input, wait for reconfig call
1178 	if (ctx->infmt_negociate) return GF_OK;
1179 
1180 	ctx->gpac_pixel_fmt = pfmt;
1181 	ctx->gpac_audio_fmt = afmt;
1182 	ctx->dsi_crc = 0;
1183 
1184 	ctx->encoder = avcodec_alloc_context3(codec);
1185 	if (! ctx->encoder) return GF_OUT_OF_MEM;
1186 
1187 	ctx->encoder->codec_tag = ff_codectag;
1188 	if (type==GF_STREAM_VISUAL) {
1189 		ctx->encoder->width = ctx->width;
1190 		ctx->encoder->height = ctx->height;
1191 		prop = gf_filter_pid_get_property(pid, GF_PROP_PID_SAR);
1192 		if (prop) {
1193 			ctx->encoder->sample_aspect_ratio.num = prop->value.frac.num;
1194 			ctx->timescale = ctx->encoder->sample_aspect_ratio.den = prop->value.frac.den;
1195 		} else {
1196 			ctx->encoder->sample_aspect_ratio.num = 1;
1197 			ctx->encoder->sample_aspect_ratio.den = 1;
1198 		}
1199 		//CHECKME: do we need to use 1/FPS ?
1200 		prop = gf_filter_pid_get_property(pid, GF_PROP_PID_TIMESCALE);
1201 		if (prop) {
1202 			ctx->encoder->time_base.num = 1;
1203 			ctx->timescale = ctx->encoder->time_base.den = prop->value.uint;
1204 		}
1205 		prop = gf_filter_pid_get_property(pid, GF_PROP_PID_FPS);
1206 		if (prop) {
1207 			ctx->encoder->gop_size = prop->value.frac.num / prop->value.frac.den;
1208 			ctx->encoder->time_base.num = prop->value.frac.den;
1209 			ctx->encoder->time_base.den = prop->value.frac.num;
1210 		}
1211 
1212 		gf_media_get_reduced_frame_rate(&ctx->encoder->time_base.den, &ctx->encoder->time_base.num);
1213 
1214 		if (ctx->low_delay) {
1215 			av_dict_set(&ctx->options, "vprofile", "baseline", 0);
1216 			av_dict_set(&ctx->options, "preset", "ultrafast", 0);
1217 			av_dict_set(&ctx->options, "tune", "zerolatency", 0);
1218 			if (ctx->codecid==GF_CODECID_AVC) {
1219 				av_dict_set(&ctx->options, "x264opts", "no-mbtree:sliced-threads:sync-lookahead=0", 0);
1220 			}
1221 #if LIBAVCODEC_VERSION_MAJOR >= 58
1222 			ctx->encoder->flags |= AV_CODEC_FLAG_LOW_DELAY;
1223 #endif
1224 		}
1225 
1226 		if (ctx->fintra.den && ctx->fintra.num) {
1227 			av_dict_set(&ctx->options, "forced-idr", "1", 0);
1228 		}
1229 
1230 		//we don't use out of band headers, since x264 in ffmpeg (and likely other) do not output in MP4 format but
1231 		//in annexB (extradata only contains SPS/PPS/etc in annexB)
1232 		//so we indicate unframed for these codecs and use our own filter for annexB->MP4
1233 
1234 		if (!ctx->frame)
1235 			ctx->frame = av_frame_alloc();
1236 
1237 		ctx->enc_buffer_size = ctx->width*ctx->height + ENC_BUF_ALLOC_SAFE;
1238 		ctx->enc_buffer = gf_realloc(ctx->enc_buffer, sizeof(char)*ctx->enc_buffer_size);
1239 
1240 		gf_pixel_get_size_info(pfmt, ctx->width, ctx->height, NULL, &ctx->stride, &ctx->stride_uv, &ctx->nb_planes, &ctx->uv_height);
1241 
1242 		ctx->encoder->pix_fmt = ctx->pixel_fmt;
1243 		ctx->init_cts_setup = GF_TRUE;
1244 		ctx->frame->format = ctx->encoder->pix_fmt;
1245 	} else if (type==GF_STREAM_AUDIO) {
1246 		ctx->process = ffenc_process_audio;
1247 
1248 		ctx->encoder->sample_rate = ctx->sample_rate;
1249 		ctx->encoder->channels = ctx->channels;
1250 
1251 		//TODO
1252 		prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CHANNEL_LAYOUT);
1253 		if (prop) {
1254 			ctx->encoder->channel_layout = ffmpeg_channel_layout_from_gpac(prop->value.longuint);
1255 		} else if (ctx->channels==1) {
1256 			ctx->encoder->channel_layout = AV_CH_LAYOUT_MONO;
1257 		} else if (ctx->channels==2) {
1258 			ctx->encoder->channel_layout = AV_CH_LAYOUT_STEREO;
1259 		}
1260 
1261 		prop = gf_filter_pid_get_property(pid, GF_PROP_PID_TIMESCALE);
1262 		if (prop) {
1263 			ctx->encoder->time_base.num = 1;
1264 			ctx->encoder->time_base.den = prop->value.uint;
1265 			ctx->timescale = prop->value.uint;
1266 		} else {
1267 			ctx->encoder->time_base.num = 1;
1268 			ctx->encoder->time_base.den = ctx->sample_rate;
1269 			ctx->timescale = ctx->sample_rate;
1270 		}
1271 
1272 		//for aac
1273 		switch (ctx->codecid) {
1274 		case GF_CODECID_AAC_MPEG4:
1275 		case GF_CODECID_AAC_MPEG2_MP:
1276 		case GF_CODECID_AAC_MPEG2_LCP:
1277 		case GF_CODECID_AAC_MPEG2_SSRP:
1278 			av_dict_set(&ctx->options, "strict", "experimental", 0);
1279 			break;
1280 		}
1281 
1282 		if (!ctx->frame)
1283 			ctx->frame = av_frame_alloc();
1284 
1285 		ctx->enc_buffer_size = ctx->channels*ctx->sample_rate + ENC_BUF_ALLOC_SAFE;
1286 		ctx->enc_buffer = gf_realloc(ctx->enc_buffer, sizeof(char) * ctx->enc_buffer_size);
1287 
1288 		ctx->encoder->sample_fmt = ctx->sample_fmt;
1289 		ctx->planar_audio = gf_audio_fmt_is_planar(afmt);
1290 		ctx->frame->format = ctx->encoder->sample_fmt;
1291 
1292 		ctx->audio_buffer_size = ctx->sample_rate;
1293 		ctx->audio_buffer = gf_realloc(ctx->audio_buffer, sizeof(char) * ctx->enc_buffer_size);
1294 		ctx->bytes_per_sample = ctx->channels * gf_audio_fmt_bit_depth(afmt) / 8;
1295 		ctx->init_cts_setup = GF_TRUE;
1296 
1297 		switch (ctx->codecid) {
1298 		case GF_CODECID_AAC_MPEG4:
1299 		case GF_CODECID_AAC_MPEG2_MP:
1300 		case GF_CODECID_AAC_MPEG2_LCP:
1301 		case GF_CODECID_AAC_MPEG2_SSRP:
1302 		{
1303 #ifndef GPAC_DISABLE_AV_PARSERS
1304 			GF_M4ADecSpecInfo acfg;
1305 			u8 *dsi;
1306 			u32 dsi_len;
1307 			memset(&acfg, 0, sizeof(GF_M4ADecSpecInfo));
1308 			acfg.base_object_type = GF_M4A_AAC_LC;
1309 			acfg.base_sr = ctx->sample_rate;
1310 			acfg.nb_chan = ctx->channels;
1311 			acfg.sbr_object_type = 0;
1312 			acfg.audioPL = gf_m4a_get_profile(&acfg);
1313 
1314 			gf_m4a_write_config(&acfg, &dsi, &dsi_len);
1315 			gf_filter_pid_set_property(ctx->out_pid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
1316 #endif
1317 
1318 		}
1319 			break;
1320 		}
1321 	}
1322 
1323 	ffmpeg_set_enc_dec_flags(ctx->options, ctx->encoder);
1324 
1325 	if (ctx->all_intra) ctx->encoder->gop_size = 0;
1326 	else if (ctx->gop_size) ctx->encoder->gop_size = ctx->gop_size;
1327 
1328 	res = avcodec_open2(ctx->encoder, codec, &ctx->options );
1329 	if (res < 0) {
1330 		GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] PID %s failed to open codec context: %s\n", gf_filter_pid_get_name(pid), av_err2str(res) ));
1331 		return GF_BAD_PARAM;
1332 	}
1333 	ctx->remap_ts = (ctx->encoder->time_base.den != ctx->timescale) ? GF_TRUE : GF_FALSE;
1334 
1335 
1336 	ffmpeg_report_unused_options(filter, ctx->options);
1337 
1338 	ffenc_copy_pid_props(ctx);
1339 	return GF_OK;
1340 }
1341 
1342 
ffenc_update_arg(GF_Filter * filter,const char * arg_name,const GF_PropertyValue * arg_val)1343 static GF_Err ffenc_update_arg(GF_Filter *filter, const char *arg_name, const GF_PropertyValue *arg_val)
1344 {
1345 	s32 res;
1346 	GF_FFEncodeCtx *ctx = gf_filter_get_udta(filter);
1347 
1348 	if (!strcmp(arg_name, "global_header"))	return GF_OK;
1349 	else if (!strcmp(arg_name, "local_header"))	return GF_OK;
1350 	else if (!strcmp(arg_name, "low_delay"))	ctx->low_delay = GF_TRUE;
1351 	//remap some options
1352 	else if (!strcmp(arg_name, "bitrate") || !strcmp(arg_name, "rate"))	arg_name = "b";
1353 //	else if (!strcmp(arg_name, "gop")) arg_name = "g";
1354 	//disable low delay if these options are set
1355 	else if (!strcmp(arg_name, "x264opts")) ctx->low_delay = GF_FALSE;
1356 	else if (!strcmp(arg_name, "vprofile")) ctx->low_delay = GF_FALSE;
1357 	else if (!strcmp(arg_name, "preset")) ctx->low_delay = GF_FALSE;
1358 	else if (!strcmp(arg_name, "tune")) ctx->low_delay = GF_FALSE;
1359 
1360 	if (!strcmp(arg_name, "g") || !strcmp(arg_name, "gop"))
1361 		ctx->gop_size = arg_val->value.string ? atoi(arg_val->value.string) : 25;
1362 
1363 	if (!strcmp(arg_name, "b") && arg_val->value.string) {
1364 		ctx->target_rate = atoi(arg_val->value.string);
1365 		if (strchr(arg_val->value.string, 'm') || strchr(arg_val->value.string, 'M'))
1366 			ctx->target_rate *= 1000000;
1367 		else if (strchr(arg_val->value.string, 'k') || strchr(arg_val->value.string, 'K'))
1368 			ctx->target_rate *= 1000;
1369 	}
1370 
1371 	//initial parsing of arguments
1372 	if (!ctx->initialized) {
1373 		const char *arg_val_str;
1374 		switch (arg_val->type) {
1375 		case GF_PROP_STRING:
1376 			arg_val_str = arg_val->value.string;
1377 			if (!arg_val_str) arg_val_str = "1";
1378 			res = av_dict_set(&ctx->options, arg_name, arg_val_str, 0);
1379 			if (res<0) {
1380 				GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to set option %s:%s\n", arg_name, arg_val ));
1381 			}
1382 			break;
1383 		default:
1384 			GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[FFEnc] Failed to set option %s:%s, unrecognized type %d\n", arg_name, arg_val, arg_val->type ));
1385 			return GF_NOT_SUPPORTED;
1386 		}
1387 		return GF_OK;
1388 	}
1389 	//updates of arguments, not supported for ffmpeg decoders
1390 	return GF_NOT_SUPPORTED;
1391 }
1392 
1393 static const GF_FilterCapability FFEncodeCaps[] =
1394 {
1395 	CAP_UINT(GF_CAPS_INPUT_OUTPUT,GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
1396 	CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1397 	CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
1398 	CAP_UINT(GF_CAPS_OUTPUT_EXCLUDED, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1399 	//some video encoding dumps in unframe mode, we declare the pid property at runtime
1400 	{0},
1401 	CAP_UINT(GF_CAPS_INPUT_OUTPUT,GF_PROP_PID_STREAM_TYPE, GF_STREAM_AUDIO),
1402 	CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
1403 	CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1404 	CAP_UINT(GF_CAPS_OUTPUT_EXCLUDED, GF_PROP_PID_CODECID, GF_CODECID_RAW),
1405 
1406 };
1407 
1408 GF_FilterRegister FFEncodeRegister = {
1409 	.name = "ffenc",
1410 	.version=LIBAVCODEC_IDENT,
1411 	GF_FS_SET_DESCRIPTION("FFMPEG encoder")
1412 	GF_FS_SET_HELP("Encodes audio and video streams.\nSee FFMPEG documentation (https://ffmpeg.org/documentation.html) for more details"
1413 		"\n"
1414 		"Note: if no codec is explicited through [-ffc]() option and no pixel format is given, codecs will be enumerated to find a matching pixel format.\n"
1415 		"\n"
1416 		"The encoder will force a closed gop boundary at each packet with a `FileNumber` property set.\n"
1417 	)
1418 	.private_size = sizeof(GF_FFEncodeCtx),
1419 	SETCAPS(FFEncodeCaps),
1420 	.initialize = ffenc_initialize,
1421 	.finalize = ffenc_finalize,
1422 	.configure_pid = ffenc_configure_pid,
1423 	.process = ffenc_process,
1424 	.update_arg = ffenc_update_arg,
1425 	.flags = GF_FS_REG_META,
1426 };
1427 
1428 #define OFFS(_n)	#_n, offsetof(GF_FFEncodeCtx, _n)
1429 static const GF_FilterArgs FFEncodeArgs[] =
1430 {
1431 	{ OFFS(c), "codec identifier. Can be any supported GPAC ID or ffmpeg ID or filter subclass name", GF_PROP_STRING, NULL, NULL, 0},
1432 	{ OFFS(pfmt), "pixel format for input video. When not set, input format is used", GF_PROP_PIXFMT, "none", NULL, 0},
1433 	{ OFFS(fintra), "force intra / IDR frames at the given period in sec, eg `fintra=60000/1001` will force an intra every 2 seconds on 29.97 fps video; ignored for audio", GF_PROP_FRACTION, "0", NULL, 0},
1434 
1435 	{ OFFS(all_intra), "only produce intra frames", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_UPDATE|GF_FS_ARG_HINT_ADVANCED},
1436 	{ OFFS(ls), "log stats", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
1437 	{ OFFS(ffc), "ffmpeg codec name. This allows enforcing a given codec if multiple codecs support the codec ID set (eg aac vs vo_aacenc)", GF_PROP_STRING, NULL, NULL, 0},
1438 
1439 	{ "*", -1, "any possible options defined for AVCodecContext and sub-classes. see `gpac -hx ffenc` and `gpac -hx ffenc:*`", GF_PROP_STRING, NULL, NULL, GF_FS_ARG_META},
1440 	{0}
1441 };
1442 
1443 const int FFENC_STATIC_ARGS = (sizeof (FFEncodeArgs) / sizeof (GF_FilterArgs)) - 1;
1444 
ffenc_register(GF_FilterSession * session)1445 const GF_FilterRegister *ffenc_register(GF_FilterSession *session)
1446 {
1447 	ffmpeg_build_register(session, &FFEncodeRegister, FFEncodeArgs, FFENC_STATIC_ARGS, FF_REG_TYPE_ENCODE);
1448 	return &FFEncodeRegister;
1449 }
1450 
1451 
1452 #else
1453 #include <gpac/filters.h>
ffenc_register(GF_FilterSession * session)1454 const GF_FilterRegister *ffenc_register(GF_FilterSession *session)
1455 {
1456 	return NULL;
1457 }
1458 
1459 #endif //GPAC_HAS_FFMPEG
1460 
1461