1 /* -*- c-basic-offset: 8; -*- */
2 /* webm.c: WebM data handler
3 * $Id$
4 *
5 * Copyright (C) 2002-2012 the Icecast team <team@icecast.org>
6 * Copyright (C) 2015-2019 Philipp "ph3-der-loewe" Schafft <lion@lion.leolix.org>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public
19 * License along with this library; if not, write to the Free
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #ifdef HAVE_INTTYPES_H
32 # include <inttypes.h>
33 #endif
34
35 #include <shout/shout.h>
36 #include "shout_private.h"
37
38 /* -- local datatypes -- */
39
40 /* A value that no EBML var-int is allowed to take. */
41 #define EBML_UNKNOWN ((uint64_t) -1)
42
43 /* masks to turn the tag ID varints from the Matroska spec
44 * into their parsed-as-number equivalents */
45 #define EBML_LONG_MASK (~0x10000000)
46 #define EBML_MID3_MASK (~0x200000)
47 #define EBML_MID2_MASK (~0x4000)
48 #define EBML_SHORT_MASK (~0x80)
49
50 /* tag IDs we're interested in */
51 #define WEBM_EBML_ID (0x1A45DFA3 & EBML_LONG_MASK)
52 #define WEBM_SEGMENT_ID (0x18538067 & EBML_LONG_MASK)
53 #define WEBM_CLUSTER_ID (0x1F43B675 & EBML_LONG_MASK)
54 #define WEBM_SEGMENT_INFO_ID (0x1549A966 & EBML_LONG_MASK)
55
56 #define WEBM_TIMESTAMPSCALE_ID (0x2AD7B1 & EBML_MID3_MASK)
57
58 #define WEBM_TIMECODE_ID (0xE7 & EBML_SHORT_MASK)
59 #define WEBM_SIMPLE_BLOCK_ID (0xA3 & EBML_SHORT_MASK)
60 #define WEBM_BLOCK_GROUP_ID (0xA0 & EBML_SHORT_MASK)
61 #define WEBM_BLOCK_ID (0xA1 & EBML_SHORT_MASK)
62
63 typedef enum webm_parsing_state {
64 WEBM_STATE_READ_TAG = 0,
65 WEBM_STATE_COPY_THRU
66 } webm_parsing_state;
67
68 /* state for a filter that extracts timestamp
69 * information from a WebM stream
70 */
71 /* TODO: provide for "fake chaining", where
72 * concatinated files have extra headers stripped
73 * and Cluster timestamps rewritten
74 */
75 typedef struct _webm_t {
76
77 /* processing state */
78 bool waiting_for_more_input;
79 webm_parsing_state parsing_state;
80 uint64_t copy_len;
81
82 /* buffer state */
83 size_t input_write_position;
84 size_t input_read_position;
85 size_t output_position;
86
87 /* Metadata */
88 uint64_t timestamp_scale;
89
90 /* statistics */
91 uint64_t cluster_timestamp;
92 uint64_t latest_timestamp;
93
94 /* buffer storage */
95 unsigned char input_buffer[SHOUT_BUFSIZE];
96 unsigned char output_buffer[SHOUT_BUFSIZE];
97
98 } webm_t;
99
100 /* -- static prototypes -- */
101 static int send_webm(shout_t *self, const unsigned char *data, size_t len);
102 static void close_webm(shout_t *self);
103
104 static int webm_process(shout_t *self, webm_t *webm);
105 static int webm_process_tag(shout_t *self, webm_t *webm);
106 static int webm_output(shout_t *self, webm_t *webm, const unsigned char *data, size_t len);
107
108 static size_t copy_possible(const void *src_base,
109 size_t *src_position,
110 size_t src_len,
111 void *target_base,
112 size_t *target_position,
113 size_t target_len);
114 static int flush_output(shout_t *self, webm_t *webm);
115
116 static ssize_t ebml_parse_tag(unsigned char *buffer,
117 unsigned char *buffer_end,
118 uint64_t *tag_id,
119 uint64_t *payload_length);
120 static ssize_t ebml_parse_var_int(unsigned char *buffer,
121 unsigned char *buffer_end,
122 uint64_t *out_value);
123 static ssize_t ebml_parse_sized_int(unsigned char *buffer,
124 unsigned char *buffer_end,
125 size_t len,
126 bool is_signed,
127 uint64_t *out_value);
128
129 /* -- interface functions -- */
shout_open_webm(shout_t * self)130 int shout_open_webm(shout_t *self)
131 {
132 webm_t *webm_filter;
133
134 /* Alloc WebM filter */
135 if (!(webm_filter = (webm_t *)calloc(1, sizeof(webm_t)))) {
136 return self->error = SHOUTERR_MALLOC;
137 }
138
139 /* configure shout state */
140 self->format_data = webm_filter;
141
142 self->send = send_webm;
143 self->close = close_webm;
144
145 return SHOUTERR_SUCCESS;
146 }
147
send_webm(shout_t * self,const unsigned char * data,size_t len)148 static int send_webm(shout_t *self, const unsigned char *data, size_t len)
149 {
150 webm_t *webm = (webm_t *) self->format_data;
151 size_t input_progress = 0;
152
153 self->error = SHOUTERR_SUCCESS;
154
155 while (input_progress < len && self->error == SHOUTERR_SUCCESS) {
156 copy_possible(data, &input_progress, len,
157 webm->input_buffer, &webm->input_write_position, SHOUT_BUFSIZE);
158
159 self->error = webm_process(self, webm);
160 }
161
162 /* Squeeze out any possible output, unless we're failing */
163 if (self->error == SHOUTERR_SUCCESS) {
164 self->error = flush_output(self, webm);
165 }
166
167 /* Report latest known timecode for rate-control */
168 self->senttime = (webm->latest_timestamp * webm->timestamp_scale) / 1000;
169
170 return self->error;
171 }
172
close_webm(shout_t * self)173 static void close_webm(shout_t *self)
174 {
175 webm_t *webm_filter = (webm_t *) self->format_data;
176
177 if (webm_filter)
178 free(webm_filter);
179 }
180
181 /* -- processing functions -- */
182
183 /* Process what we can of the input buffer,
184 * extracting statistics or rewriting the
185 * stream as necessary.
186 * Returns a status code to indicate socket errors.
187 */
webm_process(shout_t * self,webm_t * webm)188 static int webm_process(shout_t *self, webm_t *webm)
189 {
190 size_t to_process;
191
192 /* loop as long as buffer holds process-able data */
193 webm->waiting_for_more_input = false;
194 while (webm->input_read_position < webm->input_write_position
195 && !webm->waiting_for_more_input
196 && self->error == SHOUTERR_SUCCESS) {
197
198 /* calculate max space an operation can work on */
199 to_process = webm->input_write_position - webm->input_read_position;
200
201 /* perform appropriate operation */
202 switch (webm->parsing_state) {
203 case WEBM_STATE_READ_TAG:
204 self->error = webm_process_tag(self, webm);
205 break;
206
207 case WEBM_STATE_COPY_THRU:
208 /* copy a known quantity of bytes to the output */
209
210 /* calculate size needing to be copied this step */
211 if (webm->copy_len < to_process) {
212 to_process = webm->copy_len;
213 }
214
215 /* do copy */
216 self->error = webm_output(self, webm,
217 webm->input_buffer + webm->input_read_position,
218 to_process);
219
220 /* update state with copy progress */
221 webm->copy_len -= to_process;
222 webm->input_read_position += to_process;
223 if (webm->copy_len == 0) {
224 webm->parsing_state = WEBM_STATE_READ_TAG;
225 }
226
227 break;
228
229 }
230
231 }
232
233 if (webm->input_read_position < webm->input_write_position) {
234 /* slide unprocessed data to front of buffer */
235 to_process = webm->input_write_position - webm->input_read_position;
236 memmove(webm->input_buffer, webm->input_buffer + webm->input_read_position, to_process);
237
238 webm->input_read_position = 0;
239 webm->input_write_position = to_process;
240 } else {
241 /* subtract read position instead of zeroing;
242 * this allows skipping over large spans of data by
243 * setting the read pointer far ahead. Processing won't
244 * resume until the read pointer is actually within the buffer.
245 */
246 webm->input_read_position -= webm->input_write_position;
247 webm->input_write_position = 0;
248 }
249
250 return self->error;
251 }
252
253 /* Try to read a tag header & handle it appropriately.
254 * Returns an error code for socket errors or malformed input.
255 */
webm_process_tag(shout_t * self,webm_t * webm)256 static int webm_process_tag(shout_t *self, webm_t *webm)
257 {
258 ssize_t tag_length;
259 uint64_t tag_id;
260 uint64_t payload_length;
261
262 uint64_t timecode;
263 ssize_t track_number_length;
264 uint64_t track_number;
265 uint64_t timestamp_scale;
266
267 uint64_t to_copy;
268
269 ssize_t status;
270
271 unsigned char *start_of_buffer = webm->input_buffer + webm->input_read_position;
272 unsigned char *end_of_buffer = webm->input_buffer + webm->input_write_position;
273
274 /* parse tag header */
275 tag_length = ebml_parse_tag(start_of_buffer, end_of_buffer, &tag_id, &payload_length);
276 if (tag_length == 0) {
277 webm->waiting_for_more_input = true;
278 return self->error;
279 } else if (tag_length < 0) {
280 return self->error = SHOUTERR_INSANE;
281 }
282
283 /* most tags will be copied, header & payload, to output unaltered */
284 to_copy = tag_length + payload_length;
285
286 /* break open tags of unknown length, to process all children */
287 if (payload_length == EBML_UNKNOWN) {
288 to_copy = tag_length;
289 }
290
291 /* handle tag appropriately */
292
293 switch (tag_id) {
294 case WEBM_SEGMENT_ID:
295 case WEBM_CLUSTER_ID:
296 /* open containers to process children */
297 to_copy = tag_length;
298 break;
299
300 case WEBM_SEGMENT_INFO_ID:
301 /* open containers to process children */
302 to_copy = tag_length;
303 /* set defaults */
304 webm->timestamp_scale = 1000000;
305 break;
306
307 case WEBM_TIMESTAMPSCALE_ID:
308 /* read cluster timecode */
309 status = ebml_parse_sized_int(start_of_buffer + tag_length,
310 end_of_buffer,
311 payload_length,
312 false, ×tamp_scale);
313
314 if (status == 0) {
315 webm->waiting_for_more_input = true;
316 return self->error;
317 } else if (status < 0) {
318 return self->error = SHOUTERR_INSANE;
319 }
320
321 webm->timestamp_scale = timestamp_scale;
322 break;
323
324 case WEBM_TIMECODE_ID:
325 /* read cluster timecode */
326 status = ebml_parse_sized_int(start_of_buffer + tag_length,
327 end_of_buffer,
328 payload_length,
329 false, &timecode);
330
331 if (status == 0) {
332 webm->waiting_for_more_input = true;
333 return self->error;
334 } else if (status < 0) {
335 return self->error = SHOUTERR_INSANE;
336 }
337
338 /* report timecode */
339 webm->cluster_timestamp = timecode;
340 webm->latest_timestamp = timecode;
341
342 /* TODO: detect backwards jumps and rewrite to be monotonic */
343 break;
344
345 case WEBM_BLOCK_GROUP_ID:
346 /* open container to process children */
347 to_copy = tag_length;
348
349 break;
350
351 case WEBM_SIMPLE_BLOCK_ID:
352 case WEBM_BLOCK_ID:
353 /* extract block or simple block timecode */
354
355 /* [simple] blocks start with a varint, so read it to
356 * know the offset of the following fields
357 */
358 track_number_length = ebml_parse_var_int(start_of_buffer + tag_length,
359 end_of_buffer, &track_number);
360 if (track_number_length == 0) {
361 webm->waiting_for_more_input = true;
362 return self->error;
363 } else if (track_number_length < 0) {
364 return self->error = SHOUTERR_INSANE;
365 }
366
367 /* now read the actual (signed 16-bit) timecode;
368 * this code is relative to the Cluster's timecode.
369 *
370 * ASSUMPTION: it will not actually be negative,
371 * since WebM encoding guidelines advise all timestamps
372 * be monotonically increasing.
373 */
374 status = ebml_parse_sized_int(start_of_buffer + tag_length + track_number_length,
375 end_of_buffer, 2, true, &timecode);
376
377 if (status == 0) {
378 webm->waiting_for_more_input = true;
379 return self->error;
380 } else if (status < 0) {
381 return self->error = SHOUTERR_INSANE;
382 }
383
384 /* report timecode */
385 webm->latest_timestamp = webm->cluster_timestamp + timecode;
386
387 break;
388 }
389
390 /* queue copying */
391
392 if (to_copy > 0) {
393 webm->copy_len = to_copy;
394 webm->parsing_state = WEBM_STATE_COPY_THRU;
395 }
396
397 return self->error;
398 }
399
400 /* Queue the given data in the output buffer,
401 * flushing as needed. Returns a status code
402 * to allow detecting socket errors on a flush.
403 */
webm_output(shout_t * self,webm_t * webm,const unsigned char * data,size_t len)404 static int webm_output(shout_t *self, webm_t *webm, const unsigned char *data, size_t len)
405 {
406 size_t output_progress = 0;
407
408 while (output_progress < len && self->error == SHOUTERR_SUCCESS)
409 {
410 copy_possible(data, &output_progress, len,
411 webm->output_buffer, &webm->output_position, SHOUT_BUFSIZE);
412
413 if (webm->output_position == SHOUT_BUFSIZE) {
414 self->error = flush_output(self, webm);
415 }
416 }
417
418 return self->error;
419 }
420
421 /* -- utility functions -- */
422
423 /* Copies as much of the source buffer into the target
424 * as will fit, and returns the actual size copied.
425 * Updates position pointers to match.
426 */
copy_possible(const void * src_base,size_t * src_position,size_t src_len,void * target_base,size_t * target_position,size_t target_len)427 static size_t copy_possible(const void *src_base,
428 size_t *src_position,
429 size_t src_len,
430 void *target_base,
431 size_t *target_position,
432 size_t target_len)
433 {
434 size_t src_space = src_len - *src_position;
435 size_t target_space = target_len - *target_position;
436 size_t to_copy = src_space;
437
438 if (target_space < to_copy) to_copy = target_space;
439
440 memcpy(target_base + *target_position, src_base + *src_position, to_copy);
441
442 *src_position += to_copy;
443 *target_position += to_copy;
444
445 return to_copy;
446 }
447
448 /* Send currently buffered output to the server.
449 * Output buffering is needed because parsing
450 * and/or rewriting code may pass through small
451 * chunks at a time, and we don't want to expend a
452 * syscall on each one.
453 * However, we do not want to leave sendable data
454 * in the buffer before we return to the client and
455 * potentially sleep, so this is called before
456 * send_webm() returns.
457 */
flush_output(shout_t * self,webm_t * webm)458 static int flush_output(shout_t *self, webm_t *webm)
459 {
460 ssize_t ret;
461
462 if (webm->output_position == 0) {
463 return self->error;
464 }
465
466 ret = shout_send_raw(self, webm->output_buffer, webm->output_position);
467 if (ret != (ssize_t) webm->output_position) {
468 return self->error = SHOUTERR_SOCKET;
469 }
470
471 webm->output_position = 0;
472 return self->error;
473 }
474
475 /* -- EBML helper functions -- */
476
477 /* Try to parse an EBML tag at the given location, returning the
478 * length of the tag & the length of the associated payload.
479 *
480 * Returns the length of the tag on success, and writes the payload
481 * size to *payload_length.
482 *
483 * Return 0 if it would be necessary to read past the
484 * given end-of-buffer address to read a complete tag.
485 *
486 * Returns -1 if the tag is corrupt.
487 */
488
ebml_parse_tag(unsigned char * buffer,unsigned char * buffer_end,uint64_t * tag_id,uint64_t * payload_length)489 static ssize_t ebml_parse_tag(unsigned char *buffer,
490 unsigned char *buffer_end,
491 uint64_t *tag_id,
492 uint64_t *payload_length)
493 {
494 ssize_t type_length;
495 ssize_t size_length;
496
497 *tag_id = 0;
498 *payload_length = 0;
499
500 /* read past the type tag */
501 type_length = ebml_parse_var_int(buffer, buffer_end, tag_id);
502
503 if (type_length <= 0) {
504 return type_length;
505 }
506
507 /* read the length tag */
508 size_length = ebml_parse_var_int(buffer + type_length, buffer_end, payload_length);
509
510 if (size_length <= 0) {
511 return size_length;
512 }
513
514 return type_length + size_length;
515 }
516
517 /* Try to parse an EBML variable-length integer.
518 * Returns 0 if there's not enough space to read the number;
519 * Returns -1 if the number is malformed.
520 * Else, returns the length of the number in bytes and writes the
521 * value to *out_value.
522 */
ebml_parse_var_int(unsigned char * buffer,unsigned char * buffer_end,uint64_t * out_value)523 static ssize_t ebml_parse_var_int(unsigned char *buffer,
524 unsigned char *buffer_end,
525 uint64_t *out_value)
526 {
527 ssize_t size = 1;
528 ssize_t i;
529 unsigned char mask = 0x80;
530 uint64_t value;
531 uint64_t unknown_marker;
532
533 if (buffer >= buffer_end) {
534 return 0;
535 }
536
537 /* find the length marker bit in the first byte */
538 value = buffer[0];
539
540 while (mask) {
541 if (value & mask) {
542 value = value & ~mask;
543 unknown_marker = mask - 1;
544 break;
545 }
546 size++;
547 mask = mask >> 1;
548 }
549
550 /* catch malformed number (no prefix) */
551 if (mask == 0) {
552 return -1;
553 }
554
555 /* catch number bigger than parsing buffer */
556 if (buffer + size - 1 >= buffer_end) {
557 return 0;
558 }
559
560 /* read remaining bytes of (big-endian) number */
561 for (i = 1; i < size; i++) {
562 value = (value << 8) + buffer[i];
563 unknown_marker = (unknown_marker << 8) + 0xFF;
564 }
565
566 /* catch special "unknown" length */
567
568 if (value == unknown_marker) {
569 *out_value = EBML_UNKNOWN;
570 } else {
571 *out_value = value;
572 }
573
574 return size;
575 }
576
577 /* Parse a big-endian int that may be from 1-8 bytes long.
578 * Returns 0 if there's not enough space to read the number;
579 * Returns -1 if the number is mis-sized.
580 * Else, returns the length of the number in bytes and writes the
581 * value to *out_value.
582 * If is_signed is true, then the int is assumed to be two's complement
583 * signed, negative values will be correctly promoted, and the returned
584 * unsigned number can be safely cast to a signed number on systems using
585 * two's complement arithmatic.
586 */
ebml_parse_sized_int(unsigned char * buffer,unsigned char * buffer_end,size_t len,bool is_signed,uint64_t * out_value)587 static ssize_t ebml_parse_sized_int(unsigned char *buffer,
588 unsigned char *buffer_end,
589 size_t len,
590 bool is_signed,
591 uint64_t *out_value)
592 {
593 uint64_t value;
594 size_t i;
595
596 if (len < 1 || len > 8) {
597 return -1;
598 }
599
600 if (buffer + len >= buffer_end) {
601 return 0;
602 }
603
604 if (is_signed && ((signed char) buffer[0]) < 0) {
605 value = -1;
606 } else {
607 value = 0;
608 }
609
610 for (i = 0; i < len; i++) {
611 value = (value << 8) + ((unsigned char) buffer[i]);
612 }
613
614 *out_value = value;
615
616 return len;
617 }
618