1 /*
2 * iaxclient: a cross-platform IAX softphone library
3 *
4 * Copyrights:
5 * Copyright (C) 2003-2006, Horizon Wimba, Inc.
6 * Copyright (C) 2007, Wimba, Inc.
7 *
8 * Contributors:
9 * Steve Kann <stevek@stevek.com>
10 * Mihai Balea <mihai at hates dot ms>
11 *
12 * This program is free software, distributed under the terms of
13 * the GNU Lesser (Library) General Public License.
14 */
15
16 /*
17 * Some comments about Theora streaming
18 * Theora video codec has two problems when it comes to streaming
19 * and broadcasting video:
20 *
21 * - Large headers that need to be passed from the encoder to the decoder
22 * to initialize it. The conventional wisdom says we should transfer the
23 * headers out of band, but that complicates things with IAX, which does
24 * not have a separate signalling channel. Also, it makes things really
25 * difficult in a video conference scenario, where video gets switched
26 * between participants regularly. To solve this issue, we initialize
27 * the encoder and the decoder at the same time, using the headers from
28 * the local encoder to initialize the decoder. This works if the
29 * endpoints use the exact same version of Theora and the exact same
30 * parameters for initialization.
31 *
32 * - No support for splitting the frame into multiple slices. Frames can
33 * be relatively large. For a 320x240 video stream, you can see key
34 * frames larger than 9KB, which is the maximum UDP packet size on Mac
35 * OS X. To work around this limitation, we use the slice API to fragment
36 * encoded frames to a reasonable size that UDP can safely transport
37 *
38 * Other miscellaneous comments:
39 *
40 * - For quality reasons, when we detect a video stream switch, we reject all
41 * incoming frames until we receive a key frame.
42 *
43 * - Theora only accepts video that has dimensions multiple of 16. If we combine
44 * his with a 4:3 aspect ratio requirement, we get a very limited number
45 * of available resolutions. To work around this limitation, we pad the video
46 * on encoding, up to the closest multiple of 16. On the decoding side, we
47 * remove the padding. This way, video resolution can be any multiple of 2
48 *
49 * We should probably look more into this (how to deal with missing and
50 * out of order slices)
51 */
52
53 #include <stdlib.h>
54 #include "iaxclient_lib.h"
55 #include "video.h"
56 #include "slice.h"
57 #include "codec_theora.h"
58 #include <theora/theora.h>
59
60 #define MAX_SLICE_SIZE 8000
61
62 struct theora_decoder
63 {
64 theora_state td;
65 theora_info ti;
66 theora_comment tc;
67 struct deslicer_context *dsc;
68 int got_key_frame;
69 };
70
71 struct theora_encoder
72 {
73 theora_state td;
74 theora_info ti;
75 theora_comment tc;
76 int needs_padding;
77 struct slicer_context *sc;
78 unsigned char *pad_buffer;
79 };
80
destroy(struct iaxc_video_codec * c)81 static void destroy( struct iaxc_video_codec *c)
82 {
83 struct theora_encoder *e;
84 struct theora_decoder *d;
85
86 if ( !c )
87 return;
88
89 if ( c->encstate )
90 {
91 e = (struct theora_encoder *)c->encstate;
92 if ( e->pad_buffer )
93 free(e->pad_buffer);
94 if ( e->sc )
95 free_slicer_context(e->sc);
96 theora_comment_clear(&e->tc);
97 theora_info_clear(&e->ti);
98 theora_clear(&e->td);
99 free(e);
100 }
101 if ( c->decstate )
102 {
103 d = (struct theora_decoder *)c->decstate;
104 if ( d->dsc )
105 free_deslicer_context(d->dsc);
106 theora_comment_clear(&d->tc);
107 theora_info_clear(&d->ti);
108 theora_clear(&d->td);
109 free(c->decstate);
110 }
111 free(c);
112 }
113
decode(struct iaxc_video_codec * c,int inlen,const char * in,int * outlen,char * out)114 static int decode(struct iaxc_video_codec *c, int inlen, const char *in,
115 int *outlen, char *out)
116 {
117 struct theora_decoder *d;
118 ogg_packet op;
119 yuv_buffer picture;
120 unsigned int line;
121 int my_out_len;
122 int w, h, ph;
123 int flen;
124 char *frame;
125
126 // Sanity checks
127 if ( !c || !c->decstate || !in || inlen <= 0 || !out || !outlen )
128 return -1;
129
130 // Assemble slices
131 d = (struct theora_decoder *)c->decstate;
132 if ( !d->dsc )
133 return -1;
134
135 frame = deslice(in, inlen, &flen, d->dsc);
136 if ( frame == NULL )
137 return 1;
138
139 /* decode into an OP structure */
140 memset(&op, 0, sizeof(op));
141 op.bytes = flen;
142 op.packet = (unsigned char *)frame;
143
144 /* reject all incoming frames until we get a key frame */
145 if ( !d->got_key_frame )
146 {
147 if ( theora_packet_iskeyframe(&op) )
148 d->got_key_frame = 1;
149 else
150 return 1;
151 }
152
153 if ( theora_decode_packetin(&d->td, &op) == OC_BADPACKET )
154 {
155 fprintf(stderr,
156 "codec_theora: warning: theora_decode_packetin says bad packet\n");
157 return -1;
158 }
159
160 w = d->ti.frame_width;
161 h = d->ti.frame_height;
162 ph = d->ti.height;
163
164 my_out_len = d->ti.frame_width * d->ti.frame_height * 3 / 2;
165
166 /* make sure we have enough room for the goodies */
167 if ( *outlen < my_out_len )
168 {
169 fprintf(stderr, "codec_theora: not enough room for decoding\n");
170 return -1;
171 }
172
173 /* finally, here's where we get our goodies */
174 if ( theora_decode_YUVout(&d->td, &picture) )
175 {
176 fprintf(stderr, "codec_theora: error getting our goodies\n");
177 return -1;
178 }
179
180 //clear output
181 memset(out, 127, my_out_len);
182
183 for( line = 0 ; line < d->ti.frame_height / 2 ; line++ )
184 {
185 // Y-even
186 memcpy(out + picture.y_width * 2 * line,
187 picture.y + 2 * line * picture.y_stride,
188 picture.y_width);
189 // Y-odd
190 memcpy(out + picture.y_width * (2 * line + 1),
191 picture.y + (2 * line + 1) * picture.y_stride,
192 picture.y_width);
193 // U + V
194 memcpy(out + (d->ti.frame_width * d->ti.frame_height) + line * d->ti.frame_width / 2,
195 picture.u + line * picture.uv_stride,
196 picture.uv_width);
197 memcpy(out + (d->ti.frame_width * d->ti.frame_height * 5 / 4) + line * d->ti.frame_width / 2,
198 picture.v + line * picture.uv_stride,
199 picture.uv_width);
200 }
201
202 *outlen = my_out_len;
203
204 return 0;
205 }
206
207 // Pads a w by h frame to bring it up to pw by ph size using value
pad_channel(const char * src,int w,int h,unsigned char * dst,int pw,int ph,unsigned char value)208 static void pad_channel(const char *src, int w, int h, unsigned char *dst,
209 int pw, int ph, unsigned char value)
210 {
211 int i;
212
213 if ( w == pw )
214 {
215 // We don't need to pad each line, just copy the data
216 memcpy(dst, src, w * h);
217 } else
218 {
219 // We DO need to pad each line
220 for ( i=0 ; i<h ; i++ )
221 {
222 memcpy(&dst[i*pw], &src[i*w], w);
223 memset(&dst[i*pw+w], value, pw-w);
224 }
225 }
226 // Pad the bottom of the frame if necessary
227 if ( h < ph )
228 memset(dst + pw * h, value, (ph - h) * pw);
229 }
230
encode(struct iaxc_video_codec * c,int inlen,const char * in,struct slice_set_t * slice_set)231 static int encode(struct iaxc_video_codec * c, int inlen, const char * in,
232 struct slice_set_t * slice_set)
233 {
234 struct theora_encoder *e;
235 ogg_packet op;
236 yuv_buffer picture;
237
238 // Sanity checks
239 if ( !c || !c->encstate || !in || !slice_set )
240 return -1;
241
242 e = (struct theora_encoder *)c->encstate;
243
244 // Prepare the YUV buffer
245 if ( e->needs_padding )
246 {
247 // We copy a padded image into the pad buffer and set up the pointers
248 // Use pad_channel for each of the YUV channels
249 // Use a pad value of 0 for luma and 128 for chroma
250 pad_channel(in,
251 e->ti.frame_width,
252 e->ti.frame_height,
253 e->pad_buffer,
254 e->ti.width,
255 e->ti.height,
256 0);
257
258 pad_channel(in + e->ti.frame_width * e->ti.frame_height,
259 e->ti.frame_width / 2,
260 e->ti.frame_height / 2,
261 e->pad_buffer + e->ti.width * e->ti.height,
262 e->ti.width / 2,
263 e->ti.height / 2,
264 128);
265
266 pad_channel(in + e->ti.frame_width * e->ti.frame_height * 5 / 4,
267 e->ti.frame_width / 2,
268 e->ti.frame_height / 2,
269 e->pad_buffer + e->ti.width * e->ti.height * 5 / 4,
270 e->ti.width / 2,
271 e->ti.height / 2,
272 128);
273
274 picture.y = e->pad_buffer;
275 } else
276 {
277 // use the original buffer
278 picture.y = (unsigned char *)in;
279 }
280 picture.u = picture.y + e->ti.width * e->ti.height;
281 picture.v = picture.u + e->ti.width * e->ti.height / 4;
282 picture.y_width = e->ti.width;
283 picture.y_height = e->ti.height;
284 picture.y_stride = e->ti.width;
285 picture.uv_width = e->ti.width / 2;
286 picture.uv_height = e->ti.height / 2;
287 picture.uv_stride = e->ti.width / 2;
288
289 // Send data in for encoding
290 if ( theora_encode_YUVin(&e->td, &picture) )
291 {
292 fprintf(stderr, "codec_theora: failed theora_encode_YUVin\n");
293 return -1;
294 }
295
296 // Get data from the encoder
297 if ( theora_encode_packetout(&e->td, 0, &op) != 1 )
298 {
299 fprintf(stderr, "codec_theora: failed theora_encode_packetout\n");
300 return -1;
301 }
302
303 // Check to see if we have a key frame
304 slice_set->key_frame = theora_packet_iskeyframe(&op) == 1;
305
306 // Slice the frame
307 slice((char *)op.packet, op.bytes, slice_set, e->sc);
308
309 return 0;
310 }
311
codec_video_theora_new(int format,int w,int h,int framerate,int bitrate,int fragsize)312 struct iaxc_video_codec *codec_video_theora_new(int format, int w, int h,
313 int framerate, int bitrate, int fragsize)
314 {
315 struct iaxc_video_codec *c;
316 struct theora_encoder *e;
317 struct theora_decoder *d;
318 unsigned short source_id;
319 ogg_packet headerp, commentp, tablep;
320
321 /* Basic sanity checks */
322 if ( w <= 0 || h <= 0 || framerate <= 0 || bitrate <= 0 || fragsize <= 0 )
323 {
324 fprintf(stderr, "codec_theora: bogus codec params: %d %d %d %d %d\n",
325 w, h, framerate, bitrate, fragsize);
326 return NULL;
327 }
328
329 if ( w % 2 || h % 2 )
330 {
331 fprintf(stderr, "codec_theora: video dimensions must be multiples of 2\n");
332 return NULL;
333 }
334
335 if ( fragsize > MAX_SLICE_SIZE )
336 fragsize = MAX_SLICE_SIZE;
337
338 c = (struct iaxc_video_codec *)calloc(sizeof(struct iaxc_video_codec), 1);
339
340 if ( !c )
341 goto bail;
342
343 c->decstate = calloc(sizeof(struct theora_decoder), 1);
344
345 if ( !c->decstate )
346 goto bail;
347
348 c->encstate = calloc(sizeof(struct theora_encoder), 1);
349
350 if ( !c->encstate )
351 goto bail;
352
353 c->format = format;
354 c->width = w;
355 c->height = h;
356 c->framerate = framerate;
357 c->bitrate = bitrate;
358 c->fragsize = fragsize;
359
360 c->encode = encode;
361 c->decode = decode;
362 c->destroy = destroy;
363
364 e = (struct theora_encoder *)c->encstate;
365 d = (struct theora_decoder *)c->decstate;
366
367 // Initialize slicer
368 // Generate random source id
369 srand((unsigned int)time(0));
370 source_id = rand() & 0xffff;
371 e->sc = create_slicer_context(source_id, fragsize);
372 if ( !e->sc )
373 goto bail;
374
375
376 /* set up some parameters in the contexts */
377
378 theora_info_init(&e->ti);
379
380 /* set up common parameters */
381 e->ti.frame_width = w;
382 e->ti.frame_height = h;
383 e->ti.width = ((w - 1) / 16 + 1) * 16;
384 e->ti.height = ((h - 1) / 16 + 1) * 16;
385 e->ti.offset_x = 0;
386 e->ti.offset_y = 0;
387
388 // We set up a padded frame with dimensions that are multiple of 16
389 // We allocate a buffer to hold this frame
390 e->needs_padding = e->ti.width != e->ti.frame_width ||
391 e->ti.height != e->ti.frame_height;
392
393 if ( e->needs_padding )
394 {
395 e->pad_buffer = (unsigned char *)
396 malloc(e->ti.width * e->ti.height * 3 / 2);
397
398 if ( !e->pad_buffer )
399 goto bail;
400 }
401 else
402 {
403 e->pad_buffer = 0;
404 }
405
406 e->ti.fps_numerator = framerate;
407 e->ti.fps_denominator = 1;
408
409 e->ti.aspect_numerator = 1;
410 e->ti.aspect_denominator = 1;
411
412 e->ti.colorspace = OC_CS_UNSPECIFIED;
413 e->ti.pixelformat = OC_PF_420;
414
415 e->ti.target_bitrate = bitrate;
416
417 e->ti.quality = 0;
418
419 e->ti.dropframes_p = 0;
420 e->ti.quick_p = 1;
421 e->ti.keyframe_auto_p = 0;
422 e->ti.keyframe_frequency = framerate;
423 e->ti.keyframe_frequency_force = framerate;
424 e->ti.keyframe_data_target_bitrate = bitrate * 3;
425 e->ti.keyframe_auto_threshold = 80;
426 e->ti.keyframe_mindistance = 8;
427 e->ti.noise_sensitivity = 0;
428
429 if ( theora_encode_init(&e->td, &e->ti) )
430 goto bail;
431
432 // Obtain the encoder headers and set up the decoder headers from
433 // data in the encoder headers
434 memset(&headerp, 0, sizeof(headerp));
435 memset(&commentp, 0, sizeof(commentp));
436 memset(&tablep, 0, sizeof(tablep));
437
438 // Set up the decoder using the encoder headers
439 theora_info_init(&d->ti);
440 theora_comment_init(&d->tc);
441 theora_comment_init(&e->tc);
442
443 if ( theora_encode_header(&e->td, &headerp) )
444 goto bail;
445
446 headerp.b_o_s = 1;
447
448 if ( theora_decode_header(&d->ti, &d->tc, &headerp) )
449 goto bail;
450
451 if ( theora_encode_comment(&e->tc, &commentp) )
452 goto bail;
453
454 if ( theora_decode_header(&d->ti, &d->tc, &commentp) )
455 goto bail;
456
457 theora_comment_clear(&e->tc);
458
459 if ( theora_encode_tables(&e->td, &tablep) )
460 goto bail;
461
462 if ( theora_decode_header(&d->ti, &d->tc, &tablep) )
463 goto bail;
464
465 if ( theora_decode_init(&d->td, &d->ti) )
466 goto bail;
467
468 d->got_key_frame = 0;
469
470 // Initialize deslicer context
471 d->dsc = create_deslicer_context(c->fragsize);
472 if ( !d->dsc )
473 goto bail;
474
475 strcpy(c->name, "Theora");
476 return c;
477
478 bail:
479 fprintf(stderr, "codec_theora: failed to initialize encoder or decoder\n");
480
481 if ( c )
482 {
483 if ( c->encstate )
484 {
485 e = (struct theora_encoder *)c->encstate;
486 if ( e->sc )
487 free_slicer_context(e->sc);
488 free(c->encstate);
489 }
490 if ( c->decstate )
491 {
492 d = (struct theora_decoder *)c->decstate;
493 if ( d->dsc )
494 free_deslicer_context(d->dsc);
495 free(c->decstate);
496 }
497 free(c);
498 }
499
500 return NULL;
501 }
502
503