1 /*
2  * iaxclient: a cross-platform IAX softphone library
3  *
4  * Copyrights:
5  * Copyright (C) 2003-2006, Horizon Wimba, Inc.
6  * Copyright (C) 2007, Wimba, Inc.
7  *
8  * Contributors:
9  * Steve Kann <stevek@stevek.com>
10  * Mihai Balea <mihai at hates dot ms>
11  *
12  * This program is free software, distributed under the terms of
13  * the GNU Lesser (Library) General Public License.
14  */
15 
16 /*
17  * Some comments about Theora streaming
18  * Theora video codec has two problems when it comes to streaming
19  * and broadcasting video:
20  *
21  * - Large headers that need to be passed from the encoder to the decoder
22  *   to initialize it. The conventional wisdom says we should transfer the
23  *   headers out of band, but that complicates things with IAX, which does
24  *   not have a separate signalling channel. Also, it makes things really
25  *   difficult in a video conference scenario, where video gets switched
26  *   between participants regularly. To solve this issue, we initialize
27  *   the encoder and the decoder at the same time, using the headers from
28  *   the local encoder to initialize the decoder. This works if the
29  *   endpoints use the exact same version of Theora and the exact same
30  *   parameters for initialization.
31  *
32  * - No support for splitting the frame into multiple slices.  Frames can
33  *   be relatively large. For a 320x240 video stream, you can see key
34  *   frames larger than 9KB, which is the maximum UDP packet size on Mac
35  *   OS X. To work around this limitation, we use the slice API to fragment
36  *   encoded frames to a reasonable size that UDP can safely transport
37  *
38  * Other miscellaneous comments:
39  *
40  * - For quality reasons, when we detect a video stream switch, we reject all
41  *   incoming frames until we receive a key frame.
42  *
43  * - Theora only accepts video that has dimensions multiple of 16. If we combine
44  *   his with a 4:3 aspect ratio requirement, we get a very limited number
45  *   of available resolutions. To work around this limitation, we pad the video
46  *   on encoding, up to the closest multiple of 16. On the decoding side, we
47  *   remove the padding. This way, video resolution can be any multiple of 2
48  *
49  * We should probably look more into this (how to deal with missing and
50  * out of order slices)
51  */
52 
53 #include <stdlib.h>
54 #include "iaxclient_lib.h"
55 #include "video.h"
56 #include "slice.h"
57 #include "codec_theora.h"
58 #include <theora/theora.h>
59 
60 #define MAX_SLICE_SIZE		8000
61 
62 struct theora_decoder
63 {
64 	theora_state            td;
65 	theora_info             ti;
66 	theora_comment          tc;
67 	struct deslicer_context *dsc;
68 	int                     got_key_frame;
69 };
70 
71 struct theora_encoder
72 {
73 	theora_state          td;
74 	theora_info           ti;
75 	theora_comment        tc;
76 	int                   needs_padding;
77 	struct slicer_context *sc;
78 	unsigned char         *pad_buffer;
79 };
80 
destroy(struct iaxc_video_codec * c)81 static void destroy( struct iaxc_video_codec *c)
82 {
83 	struct theora_encoder *e;
84 	struct theora_decoder *d;
85 
86 	if ( !c )
87 		return;
88 
89 	if ( c->encstate )
90 	{
91 		e = (struct theora_encoder *)c->encstate;
92 		if ( e->pad_buffer )
93 			free(e->pad_buffer);
94 		if ( e->sc )
95 			free_slicer_context(e->sc);
96 		theora_comment_clear(&e->tc);
97 		theora_info_clear(&e->ti);
98 		theora_clear(&e->td);
99 		free(e);
100 	}
101 	if ( c->decstate )
102 	{
103 		d = (struct theora_decoder *)c->decstate;
104 		if ( d->dsc )
105 			free_deslicer_context(d->dsc);
106 		theora_comment_clear(&d->tc);
107 		theora_info_clear(&d->ti);
108 		theora_clear(&d->td);
109 		free(c->decstate);
110 	}
111 	free(c);
112 }
113 
decode(struct iaxc_video_codec * c,int inlen,const char * in,int * outlen,char * out)114 static int decode(struct iaxc_video_codec *c, int inlen, const char *in,
115 		int *outlen, char *out)
116 {
117 	struct theora_decoder *d;
118 	ogg_packet            op;
119 	yuv_buffer            picture;
120 	unsigned int          line;
121 	int                   my_out_len;
122 	int                   w, h, ph;
123 	int                   flen;
124 	char                  *frame;
125 
126 	// Sanity checks
127 	if ( !c || !c->decstate || !in || inlen <= 0 || !out || !outlen )
128 		return -1;
129 
130 	// Assemble slices
131 	d = (struct theora_decoder *)c->decstate;
132 	if ( !d->dsc )
133 		return -1;
134 
135 	frame = deslice(in, inlen, &flen, d->dsc);
136 	if ( frame == NULL )
137 		return 1;
138 
139 	/* decode into an OP structure */
140 	memset(&op, 0, sizeof(op));
141 	op.bytes = flen;
142 	op.packet = (unsigned char *)frame;
143 
144 	/* reject all incoming frames until we get a key frame */
145 	if ( !d->got_key_frame )
146 	{
147 		if ( theora_packet_iskeyframe(&op) )
148 			d->got_key_frame = 1;
149 		else
150 			return 1;
151 	}
152 
153 	if ( theora_decode_packetin(&d->td, &op) == OC_BADPACKET )
154 	{
155 		fprintf(stderr,
156 			"codec_theora: warning: theora_decode_packetin says bad packet\n");
157 		return -1;
158 	}
159 
160 	w = d->ti.frame_width;
161 	h = d->ti.frame_height;
162 	ph = d->ti.height;
163 
164 	my_out_len = d->ti.frame_width * d->ti.frame_height * 3 / 2;
165 
166 	/* make sure we have enough room for the goodies */
167 	if ( *outlen < my_out_len )
168 	{
169 		fprintf(stderr, "codec_theora: not enough room for decoding\n");
170 		return -1;
171 	}
172 
173 	/* finally, here's where we get our goodies */
174 	if ( theora_decode_YUVout(&d->td, &picture) )
175 	{
176 		fprintf(stderr, "codec_theora: error getting our goodies\n");
177 		return -1;
178 	}
179 
180 	//clear output
181 	memset(out, 127, my_out_len);
182 
183 	for( line = 0 ; line < d->ti.frame_height / 2 ; line++ )
184 	{
185 		// Y-even
186 		memcpy(out + picture.y_width * 2 * line,
187 		       picture.y + 2 * line * picture.y_stride,
188 		       picture.y_width);
189 		// Y-odd
190 		memcpy(out + picture.y_width * (2 * line + 1),
191 		       picture.y + (2 * line + 1) * picture.y_stride,
192 		       picture.y_width);
193 		// U + V
194 		memcpy(out + (d->ti.frame_width * d->ti.frame_height) + line * d->ti.frame_width / 2,
195 		       picture.u + line * picture.uv_stride,
196 		       picture.uv_width);
197 		memcpy(out + (d->ti.frame_width * d->ti.frame_height * 5 / 4) + line * d->ti.frame_width / 2,
198 		       picture.v + line * picture.uv_stride,
199 		       picture.uv_width);
200 	}
201 
202 	*outlen = my_out_len;
203 
204 	return 0;
205 }
206 
207 // Pads a w by h frame to bring it up to pw by ph size using value
pad_channel(const char * src,int w,int h,unsigned char * dst,int pw,int ph,unsigned char value)208 static void pad_channel(const char *src, int w, int h, unsigned char *dst,
209 		int pw, int ph, unsigned char value)
210 {
211 	int i;
212 
213 	if ( w == pw )
214 	{
215 		// We don't need to pad each line, just copy the data
216 		memcpy(dst, src, w * h);
217 	} else
218 	{
219 		// We DO need to pad each line
220 		for ( i=0 ; i<h ; i++ )
221 		{
222 			memcpy(&dst[i*pw], &src[i*w], w);
223 			memset(&dst[i*pw+w], value, pw-w);
224 		}
225 	}
226 	// Pad the bottom of the frame if necessary
227 	if ( h < ph )
228 		memset(dst + pw * h, value, (ph - h) * pw);
229 }
230 
encode(struct iaxc_video_codec * c,int inlen,const char * in,struct slice_set_t * slice_set)231 static int encode(struct iaxc_video_codec * c, int inlen, const char * in,
232 		struct slice_set_t * slice_set)
233 {
234 	struct theora_encoder	*e;
235 	ogg_packet		op;
236 	yuv_buffer		picture;
237 
238 	// Sanity checks
239 	if ( !c || !c->encstate || !in || !slice_set )
240 		return -1;
241 
242 	e = (struct theora_encoder *)c->encstate;
243 
244 	// Prepare the YUV buffer
245 	if ( e->needs_padding )
246 	{
247 		// We copy a padded image into the pad buffer and set up the pointers
248 		// Use pad_channel for each of the YUV channels
249 		// Use a pad value of 0 for luma and 128 for chroma
250 		pad_channel(in,
251 				e->ti.frame_width,
252 				e->ti.frame_height,
253 				e->pad_buffer,
254 				e->ti.width,
255 				e->ti.height,
256 				0);
257 
258 		pad_channel(in + e->ti.frame_width * e->ti.frame_height,
259 				e->ti.frame_width / 2,
260 				e->ti.frame_height / 2,
261 				e->pad_buffer + e->ti.width * e->ti.height,
262 				e->ti.width / 2,
263 				e->ti.height / 2,
264 				128);
265 
266 		pad_channel(in + e->ti.frame_width * e->ti.frame_height * 5 / 4,
267 				e->ti.frame_width / 2,
268 				e->ti.frame_height / 2,
269 				e->pad_buffer + e->ti.width * e->ti.height * 5 / 4,
270 				e->ti.width / 2,
271 				e->ti.height / 2,
272 				128);
273 
274 		picture.y = e->pad_buffer;
275 	} else
276 	{
277 		// use the original buffer
278 		picture.y = (unsigned char *)in;
279 	}
280 	picture.u = picture.y + e->ti.width * e->ti.height;
281 	picture.v = picture.u + e->ti.width * e->ti.height / 4;
282 	picture.y_width = e->ti.width;
283 	picture.y_height = e->ti.height;
284 	picture.y_stride = e->ti.width;
285 	picture.uv_width = e->ti.width / 2;
286 	picture.uv_height = e->ti.height / 2;
287 	picture.uv_stride = e->ti.width / 2;
288 
289 	// Send data in for encoding
290 	if ( theora_encode_YUVin(&e->td, &picture) )
291 	{
292 		fprintf(stderr, "codec_theora: failed theora_encode_YUVin\n");
293 		return -1;
294 	}
295 
296 	// Get data from the encoder
297 	if ( theora_encode_packetout(&e->td, 0, &op) != 1 )
298 	{
299 		fprintf(stderr, "codec_theora: failed theora_encode_packetout\n");
300 		return -1;
301 	}
302 
303 	// Check to see if we have a key frame
304 	slice_set->key_frame = theora_packet_iskeyframe(&op) == 1;
305 
306 	// Slice the frame
307 	slice((char *)op.packet, op.bytes, slice_set, e->sc);
308 
309 	return 0;
310 }
311 
codec_video_theora_new(int format,int w,int h,int framerate,int bitrate,int fragsize)312 struct iaxc_video_codec *codec_video_theora_new(int format, int w, int h,
313 		int framerate, int bitrate, int fragsize)
314 {
315 	struct iaxc_video_codec *c;
316 	struct theora_encoder   *e;
317 	struct theora_decoder   *d;
318 	unsigned short          source_id;
319 	ogg_packet              headerp, commentp, tablep;
320 
321 	/* Basic sanity checks */
322 	if ( w <= 0 || h <= 0 || framerate <= 0 || bitrate <= 0 || fragsize <= 0 )
323 	{
324 		fprintf(stderr, "codec_theora: bogus codec params: %d %d %d %d %d\n",
325 				w, h, framerate, bitrate, fragsize);
326 		return NULL;
327 	}
328 
329 	if ( w % 2 || h % 2 )
330 	{
331 		fprintf(stderr, "codec_theora: video dimensions must be multiples of 2\n");
332 		return NULL;
333 	}
334 
335 	if ( fragsize > MAX_SLICE_SIZE )
336 		fragsize = MAX_SLICE_SIZE;
337 
338 	c = (struct iaxc_video_codec *)calloc(sizeof(struct iaxc_video_codec), 1);
339 
340 	if ( !c )
341 		goto bail;
342 
343 	c->decstate = calloc(sizeof(struct theora_decoder), 1);
344 
345 	if ( !c->decstate )
346 		goto bail;
347 
348 	c->encstate = calloc(sizeof(struct theora_encoder), 1);
349 
350 	if ( !c->encstate )
351 		goto bail;
352 
353 	c->format = format;
354 	c->width = w;
355 	c->height = h;
356 	c->framerate = framerate;
357 	c->bitrate = bitrate;
358 	c->fragsize = fragsize;
359 
360 	c->encode = encode;
361 	c->decode = decode;
362 	c->destroy = destroy;
363 
364 	e = (struct theora_encoder *)c->encstate;
365 	d = (struct theora_decoder *)c->decstate;
366 
367 	// Initialize slicer
368 	// Generate random source id
369 	srand((unsigned int)time(0));
370 	source_id = rand() & 0xffff;
371 	e->sc = create_slicer_context(source_id, fragsize);
372 	if ( !e->sc )
373 		goto bail;
374 
375 
376 	/* set up some parameters in the contexts */
377 
378 	theora_info_init(&e->ti);
379 
380 	/* set up common parameters */
381 	e->ti.frame_width = w;
382 	e->ti.frame_height = h;
383 	e->ti.width = ((w - 1) / 16 + 1) * 16;
384 	e->ti.height = ((h - 1) / 16 + 1) * 16;
385 	e->ti.offset_x = 0;
386 	e->ti.offset_y = 0;
387 
388 	// We set up a padded frame with dimensions that are multiple of 16
389 	// We allocate a buffer to hold this frame
390 	e->needs_padding = e->ti.width != e->ti.frame_width ||
391 		e->ti.height != e->ti.frame_height;
392 
393 	if ( e->needs_padding )
394 	{
395 		e->pad_buffer = (unsigned char *)
396 			malloc(e->ti.width * e->ti.height * 3 / 2);
397 
398 		if ( !e->pad_buffer )
399 			goto bail;
400 	}
401 	else
402 	{
403 		e->pad_buffer = 0;
404 	}
405 
406 	e->ti.fps_numerator = framerate;
407 	e->ti.fps_denominator = 1;
408 
409 	e->ti.aspect_numerator = 1;
410 	e->ti.aspect_denominator = 1;
411 
412 	e->ti.colorspace = OC_CS_UNSPECIFIED;
413 	e->ti.pixelformat = OC_PF_420;
414 
415 	e->ti.target_bitrate = bitrate;
416 
417 	e->ti.quality = 0;
418 
419 	e->ti.dropframes_p = 0;
420 	e->ti.quick_p = 1;
421 	e->ti.keyframe_auto_p = 0;
422 	e->ti.keyframe_frequency = framerate;
423 	e->ti.keyframe_frequency_force = framerate;
424 	e->ti.keyframe_data_target_bitrate = bitrate * 3;
425 	e->ti.keyframe_auto_threshold = 80;
426 	e->ti.keyframe_mindistance = 8;
427 	e->ti.noise_sensitivity = 0;
428 
429 	if ( theora_encode_init(&e->td, &e->ti) )
430 		goto bail;
431 
432 	// Obtain the encoder headers and set up the decoder headers from
433 	// data in the encoder headers
434 	memset(&headerp, 0, sizeof(headerp));
435 	memset(&commentp, 0, sizeof(commentp));
436 	memset(&tablep, 0, sizeof(tablep));
437 
438 	// Set up the decoder using the encoder headers
439 	theora_info_init(&d->ti);
440 	theora_comment_init(&d->tc);
441 	theora_comment_init(&e->tc);
442 
443 	if ( theora_encode_header(&e->td, &headerp) )
444 		goto bail;
445 
446 	headerp.b_o_s = 1;
447 
448 	if ( theora_decode_header(&d->ti, &d->tc, &headerp) )
449 		goto bail;
450 
451 	if ( theora_encode_comment(&e->tc, &commentp) )
452 		goto bail;
453 
454 	if ( theora_decode_header(&d->ti, &d->tc, &commentp) )
455 		goto bail;
456 
457 	theora_comment_clear(&e->tc);
458 
459 	if ( theora_encode_tables(&e->td, &tablep) )
460 		goto bail;
461 
462 	if ( theora_decode_header(&d->ti, &d->tc, &tablep) )
463 		goto bail;
464 
465 	if ( theora_decode_init(&d->td, &d->ti) )
466 		goto bail;
467 
468 	d->got_key_frame = 0;
469 
470 	// Initialize deslicer context
471 	d->dsc = create_deslicer_context(c->fragsize);
472 	if ( !d->dsc )
473 		goto bail;
474 
475 	strcpy(c->name, "Theora");
476 	return c;
477 
478 bail:
479 	fprintf(stderr, "codec_theora: failed to initialize encoder or decoder\n");
480 
481 	if ( c )
482 	{
483 		if ( c->encstate )
484 		{
485 			e = (struct theora_encoder *)c->encstate;
486 			if ( e->sc )
487 				free_slicer_context(e->sc);
488 			free(c->encstate);
489 		}
490 		if ( c->decstate )
491 		{
492 			d = (struct theora_decoder *)c->decstate;
493 			if ( d->dsc )
494 				free_deslicer_context(d->dsc);
495 			free(c->decstate);
496 		}
497 		free(c);
498 	}
499 
500 	return NULL;
501 }
502 
503