1 /* -*- c-basic-offset: 8; -*- */
2 /* webm.c: WebM data handler
3  * $Id$
4  *
5  *  Copyright (C) 2002-2012 the Icecast team <team@icecast.org>
6  *  Copyright (C) 2015-2019 Philipp "ph3-der-loewe" Schafft <lion@lion.leolix.org>
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public
19  *  License along with this library; if not, write to the Free
20  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  */
22 
23 #ifdef HAVE_CONFIG_H
24 #   include <config.h>
25 #endif
26 
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #ifdef HAVE_INTTYPES_H
32 #   include <inttypes.h>
33 #endif
34 
35 #include <shout/shout.h>
36 #include "shout_private.h"
37 
38 /* -- local datatypes -- */
39 
40 /* A value that no EBML var-int is allowed to take. */
41 #define EBML_UNKNOWN            ((uint64_t) -1)
42 
43 /* masks to turn the tag ID varints from the Matroska spec
44  * into their parsed-as-number equivalents */
45 #define EBML_LONG_MASK          (~0x10000000)
46 #define EBML_MID3_MASK          (~0x200000)
47 #define EBML_MID2_MASK          (~0x4000)
48 #define EBML_SHORT_MASK         (~0x80)
49 
50 /* tag IDs we're interested in */
51 #define WEBM_EBML_ID            (0x1A45DFA3 & EBML_LONG_MASK)
52 #define WEBM_SEGMENT_ID         (0x18538067 & EBML_LONG_MASK)
53 #define WEBM_CLUSTER_ID         (0x1F43B675 & EBML_LONG_MASK)
54 #define WEBM_SEGMENT_INFO_ID    (0x1549A966 & EBML_LONG_MASK)
55 
56 #define WEBM_TIMESTAMPSCALE_ID  (0x2AD7B1 & EBML_MID3_MASK)
57 
58 #define WEBM_TIMECODE_ID        (0xE7 & EBML_SHORT_MASK)
59 #define WEBM_SIMPLE_BLOCK_ID    (0xA3 & EBML_SHORT_MASK)
60 #define WEBM_BLOCK_GROUP_ID     (0xA0 & EBML_SHORT_MASK)
61 #define WEBM_BLOCK_ID           (0xA1 & EBML_SHORT_MASK)
62 
63 typedef enum webm_parsing_state {
64     WEBM_STATE_READ_TAG = 0,
65     WEBM_STATE_COPY_THRU
66 } webm_parsing_state;
67 
68 /* state for a filter that extracts timestamp
69  * information from a WebM stream
70  */
71 /* TODO: provide for "fake chaining", where
72  * concatinated files have extra headers stripped
73  * and Cluster timestamps rewritten
74  */
75 typedef struct _webm_t {
76 
77     /* processing state */
78     bool waiting_for_more_input;
79     webm_parsing_state parsing_state;
80     uint64_t copy_len;
81 
82     /* buffer state */
83     size_t input_write_position;
84     size_t input_read_position;
85     size_t output_position;
86 
87     /* Metadata */
88     uint64_t timestamp_scale;
89 
90     /* statistics */
91     uint64_t cluster_timestamp;
92     uint64_t latest_timestamp;
93 
94     /* buffer storage */
95     unsigned char input_buffer[SHOUT_BUFSIZE];
96     unsigned char output_buffer[SHOUT_BUFSIZE];
97 
98 } webm_t;
99 
100 /* -- static prototypes -- */
101 static int  send_webm(shout_t *self, const unsigned char *data, size_t len);
102 static void close_webm(shout_t *self);
103 
104 static int webm_process(shout_t *self, webm_t *webm);
105 static int webm_process_tag(shout_t *self, webm_t *webm);
106 static int webm_output(shout_t *self, webm_t *webm, const unsigned char *data, size_t len);
107 
108 static size_t copy_possible(const void *src_base,
109                             size_t *src_position,
110                             size_t src_len,
111                             void *target_base,
112                             size_t *target_position,
113                             size_t target_len);
114 static int flush_output(shout_t *self, webm_t *webm);
115 
116 static ssize_t ebml_parse_tag(unsigned char *buffer,
117                               unsigned char *buffer_end,
118                               uint64_t *tag_id,
119                               uint64_t *payload_length);
120 static ssize_t ebml_parse_var_int(unsigned char *buffer,
121                                   unsigned char *buffer_end,
122                                   uint64_t *out_value);
123 static ssize_t ebml_parse_sized_int(unsigned char       *buffer,
124                                     unsigned char       *buffer_end,
125                                     size_t              len,
126                                     bool                 is_signed,
127                                     uint64_t  *out_value);
128 
129 /* -- interface functions -- */
shout_open_webm(shout_t * self)130 int shout_open_webm(shout_t *self)
131 {
132     webm_t *webm_filter;
133 
134     /* Alloc WebM filter */
135     if (!(webm_filter = (webm_t *)calloc(1, sizeof(webm_t)))) {
136         return self->error = SHOUTERR_MALLOC;
137     }
138 
139     /* configure shout state */
140     self->format_data = webm_filter;
141 
142     self->send = send_webm;
143     self->close = close_webm;
144 
145     return SHOUTERR_SUCCESS;
146 }
147 
send_webm(shout_t * self,const unsigned char * data,size_t len)148 static int send_webm(shout_t *self, const unsigned char *data, size_t len)
149 {
150     webm_t *webm = (webm_t *) self->format_data;
151     size_t input_progress = 0;
152 
153     self->error = SHOUTERR_SUCCESS;
154 
155     while (input_progress < len && self->error == SHOUTERR_SUCCESS) {
156         copy_possible(data, &input_progress, len,
157                       webm->input_buffer, &webm->input_write_position, SHOUT_BUFSIZE);
158 
159         self->error = webm_process(self, webm);
160     }
161 
162     /* Squeeze out any possible output, unless we're failing */
163     if (self->error == SHOUTERR_SUCCESS) {
164         self->error = flush_output(self, webm);
165     }
166 
167     /* Report latest known timecode for rate-control */
168     self->senttime = (webm->latest_timestamp * webm->timestamp_scale) / 1000;
169 
170     return self->error;
171 }
172 
close_webm(shout_t * self)173 static void close_webm(shout_t *self)
174 {
175     webm_t *webm_filter = (webm_t *) self->format_data;
176 
177     if (webm_filter)
178         free(webm_filter);
179 }
180 
181 /* -- processing functions -- */
182 
183 /* Process what we can of the input buffer,
184  * extracting statistics or rewriting the
185  * stream as necessary.
186  * Returns a status code to indicate socket errors.
187  */
webm_process(shout_t * self,webm_t * webm)188 static int webm_process(shout_t *self, webm_t *webm)
189 {
190     size_t to_process;
191 
192     /* loop as long as buffer holds process-able data */
193     webm->waiting_for_more_input = false;
194     while (webm->input_read_position < webm->input_write_position
195            && !webm->waiting_for_more_input
196            && self->error == SHOUTERR_SUCCESS) {
197 
198         /* calculate max space an operation can work on */
199         to_process = webm->input_write_position - webm->input_read_position;
200 
201         /* perform appropriate operation */
202         switch (webm->parsing_state) {
203             case WEBM_STATE_READ_TAG:
204                 self->error =  webm_process_tag(self, webm);
205                 break;
206 
207             case WEBM_STATE_COPY_THRU:
208                 /* copy a known quantity of bytes to the output */
209 
210                 /* calculate size needing to be copied this step */
211                 if (webm->copy_len < to_process) {
212                     to_process = webm->copy_len;
213                 }
214 
215                 /* do copy */
216                 self->error = webm_output(self, webm,
217                                           webm->input_buffer + webm->input_read_position,
218                                           to_process);
219 
220                 /* update state with copy progress */
221                 webm->copy_len -= to_process;
222                 webm->input_read_position += to_process;
223                 if (webm->copy_len == 0) {
224                     webm->parsing_state = WEBM_STATE_READ_TAG;
225                 }
226 
227                 break;
228 
229         }
230 
231     }
232 
233     if (webm->input_read_position < webm->input_write_position) {
234         /* slide unprocessed data to front of buffer */
235         to_process = webm->input_write_position - webm->input_read_position;
236         memmove(webm->input_buffer, webm->input_buffer + webm->input_read_position, to_process);
237 
238         webm->input_read_position = 0;
239         webm->input_write_position = to_process;
240     } else {
241         /* subtract read position instead of zeroing;
242          * this allows skipping over large spans of data by
243          * setting the read pointer far ahead. Processing won't
244          * resume until the read pointer is actually within the buffer.
245          */
246         webm->input_read_position -= webm->input_write_position;
247         webm->input_write_position = 0;
248     }
249 
250     return self->error;
251 }
252 
253 /* Try to read a tag header & handle it appropriately.
254  * Returns an error code for socket errors or malformed input.
255  */
webm_process_tag(shout_t * self,webm_t * webm)256 static int webm_process_tag(shout_t *self, webm_t *webm)
257 {
258     ssize_t tag_length;
259     uint64_t tag_id;
260     uint64_t payload_length;
261 
262     uint64_t timecode;
263     ssize_t track_number_length;
264     uint64_t track_number;
265     uint64_t timestamp_scale;
266 
267     uint64_t to_copy;
268 
269     ssize_t status;
270 
271     unsigned char *start_of_buffer = webm->input_buffer + webm->input_read_position;
272     unsigned char *end_of_buffer = webm->input_buffer + webm->input_write_position;
273 
274     /* parse tag header */
275     tag_length = ebml_parse_tag(start_of_buffer, end_of_buffer, &tag_id, &payload_length);
276     if (tag_length == 0) {
277         webm->waiting_for_more_input = true;
278         return self->error;
279     } else if (tag_length < 0) {
280         return self->error = SHOUTERR_INSANE;
281     }
282 
283     /* most tags will be copied, header & payload, to output unaltered */
284     to_copy = tag_length + payload_length;
285 
286     /* break open tags of unknown length, to process all children */
287     if (payload_length == EBML_UNKNOWN) {
288         to_copy = tag_length;
289     }
290 
291     /* handle tag appropriately */
292 
293     switch (tag_id) {
294         case WEBM_SEGMENT_ID:
295         case WEBM_CLUSTER_ID:
296             /* open containers to process children */
297             to_copy = tag_length;
298             break;
299 
300         case WEBM_SEGMENT_INFO_ID:
301             /* open containers to process children */
302             to_copy = tag_length;
303             /* set defaults */
304             webm->timestamp_scale = 1000000;
305             break;
306 
307         case WEBM_TIMESTAMPSCALE_ID:
308             /* read cluster timecode */
309             status = ebml_parse_sized_int(start_of_buffer + tag_length,
310                                           end_of_buffer,
311                                           payload_length,
312                                           false, &timestamp_scale);
313 
314             if (status == 0) {
315                 webm->waiting_for_more_input = true;
316                 return self->error;
317             } else if (status < 0) {
318                 return self->error = SHOUTERR_INSANE;
319             }
320 
321             webm->timestamp_scale = timestamp_scale;
322             break;
323 
324         case WEBM_TIMECODE_ID:
325             /* read cluster timecode */
326             status = ebml_parse_sized_int(start_of_buffer + tag_length,
327                                           end_of_buffer,
328                                           payload_length,
329                                           false, &timecode);
330 
331             if (status == 0) {
332                 webm->waiting_for_more_input = true;
333                 return self->error;
334             } else if (status < 0) {
335                 return self->error = SHOUTERR_INSANE;
336             }
337 
338             /* report timecode */
339             webm->cluster_timestamp = timecode;
340             webm->latest_timestamp = timecode;
341 
342             /* TODO: detect backwards jumps and rewrite to be monotonic */
343             break;
344 
345         case WEBM_BLOCK_GROUP_ID:
346             /* open container to process children */
347             to_copy = tag_length;
348 
349             break;
350 
351         case WEBM_SIMPLE_BLOCK_ID:
352         case WEBM_BLOCK_ID:
353             /* extract block or simple block timecode */
354 
355             /* [simple] blocks start with a varint, so read it to
356              * know the offset of the following fields
357              */
358             track_number_length = ebml_parse_var_int(start_of_buffer + tag_length,
359                                                      end_of_buffer, &track_number);
360             if (track_number_length == 0) {
361                 webm->waiting_for_more_input = true;
362                 return self->error;
363             } else if (track_number_length < 0) {
364                 return self->error = SHOUTERR_INSANE;
365             }
366 
367             /* now read the actual (signed 16-bit) timecode;
368              * this code is relative to the Cluster's timecode.
369              *
370              * ASSUMPTION: it will not actually be negative,
371              * since WebM encoding guidelines advise all timestamps
372              * be monotonically increasing.
373              */
374             status = ebml_parse_sized_int(start_of_buffer + tag_length + track_number_length,
375                                           end_of_buffer, 2, true, &timecode);
376 
377             if (status == 0) {
378                 webm->waiting_for_more_input = true;
379                 return self->error;
380             } else if (status < 0) {
381                 return self->error = SHOUTERR_INSANE;
382             }
383 
384             /* report timecode */
385             webm->latest_timestamp = webm->cluster_timestamp + timecode;
386 
387             break;
388     }
389 
390     /* queue copying */
391 
392     if (to_copy > 0) {
393         webm->copy_len = to_copy;
394         webm->parsing_state = WEBM_STATE_COPY_THRU;
395     }
396 
397     return self->error;
398 }
399 
400 /* Queue the given data in the output buffer,
401  * flushing as needed. Returns a status code
402  * to allow detecting socket errors on a flush.
403  */
webm_output(shout_t * self,webm_t * webm,const unsigned char * data,size_t len)404 static int webm_output(shout_t *self, webm_t *webm, const unsigned char *data, size_t len)
405 {
406     size_t output_progress = 0;
407 
408     while (output_progress < len && self->error == SHOUTERR_SUCCESS)
409     {
410         copy_possible(data, &output_progress, len,
411                       webm->output_buffer, &webm->output_position, SHOUT_BUFSIZE);
412 
413         if (webm->output_position == SHOUT_BUFSIZE) {
414             self->error = flush_output(self, webm);
415         }
416     }
417 
418     return self->error;
419 }
420 
421 /* -- utility functions -- */
422 
423 /* Copies as much of the source buffer into the target
424  * as will fit, and returns the actual size copied.
425  * Updates position pointers to match.
426  */
copy_possible(const void * src_base,size_t * src_position,size_t src_len,void * target_base,size_t * target_position,size_t target_len)427 static size_t copy_possible(const void *src_base,
428                             size_t *src_position,
429                             size_t src_len,
430                             void *target_base,
431                             size_t *target_position,
432                             size_t target_len)
433 {
434     size_t src_space = src_len - *src_position;
435     size_t target_space = target_len - *target_position;
436     size_t to_copy = src_space;
437 
438     if (target_space < to_copy) to_copy = target_space;
439 
440     memcpy(target_base + *target_position, src_base + *src_position, to_copy);
441 
442     *src_position += to_copy;
443     *target_position += to_copy;
444 
445     return to_copy;
446 }
447 
448 /* Send currently buffered output to the server.
449  * Output buffering is needed because parsing
450  * and/or rewriting code may pass through small
451  * chunks at a time, and we don't want to expend a
452  * syscall on each one.
453  * However, we do not want to leave sendable data
454  * in the buffer before we return to the client and
455  * potentially sleep, so this is called before
456  * send_webm() returns.
457  */
flush_output(shout_t * self,webm_t * webm)458 static int flush_output(shout_t *self, webm_t *webm)
459 {
460     ssize_t ret;
461 
462     if (webm->output_position == 0) {
463         return self->error;
464     }
465 
466     ret = shout_send_raw(self, webm->output_buffer, webm->output_position);
467     if (ret != (ssize_t) webm->output_position) {
468         return self->error = SHOUTERR_SOCKET;
469     }
470 
471     webm->output_position = 0;
472     return self->error;
473 }
474 
475 /* -- EBML helper functions -- */
476 
477 /* Try to parse an EBML tag at the given location, returning the
478  * length of the tag & the length of the associated payload.
479  *
480  * Returns the length of the tag on success, and writes the payload
481  * size to *payload_length.
482  *
483  * Return 0 if it would be necessary to read past the
484  * given end-of-buffer address to read a complete tag.
485  *
486  * Returns -1 if the tag is corrupt.
487  */
488 
ebml_parse_tag(unsigned char * buffer,unsigned char * buffer_end,uint64_t * tag_id,uint64_t * payload_length)489 static ssize_t ebml_parse_tag(unsigned char *buffer,
490                               unsigned char *buffer_end,
491                               uint64_t *tag_id,
492                               uint64_t *payload_length)
493 {
494     ssize_t type_length;
495     ssize_t size_length;
496 
497     *tag_id = 0;
498     *payload_length = 0;
499 
500     /* read past the type tag */
501     type_length = ebml_parse_var_int(buffer, buffer_end, tag_id);
502 
503     if (type_length <= 0) {
504         return type_length;
505     }
506 
507     /* read the length tag */
508     size_length = ebml_parse_var_int(buffer + type_length, buffer_end, payload_length);
509 
510     if (size_length <= 0) {
511         return size_length;
512     }
513 
514     return type_length + size_length;
515 }
516 
517 /* Try to parse an EBML variable-length integer.
518  * Returns 0 if there's not enough space to read the number;
519  * Returns -1 if the number is malformed.
520  * Else, returns the length of the number in bytes and writes the
521  * value to *out_value.
522  */
ebml_parse_var_int(unsigned char * buffer,unsigned char * buffer_end,uint64_t * out_value)523 static ssize_t ebml_parse_var_int(unsigned char *buffer,
524                                   unsigned char *buffer_end,
525                                   uint64_t *out_value)
526 {
527     ssize_t size = 1;
528     ssize_t i;
529     unsigned char mask = 0x80;
530     uint64_t value;
531     uint64_t unknown_marker;
532 
533     if (buffer >= buffer_end) {
534         return 0;
535     }
536 
537     /* find the length marker bit in the first byte */
538     value = buffer[0];
539 
540     while (mask) {
541         if (value & mask) {
542             value = value & ~mask;
543             unknown_marker = mask - 1;
544             break;
545         }
546         size++;
547         mask = mask >> 1;
548     }
549 
550     /* catch malformed number (no prefix) */
551     if (mask == 0) {
552         return -1;
553     }
554 
555     /* catch number bigger than parsing buffer */
556     if (buffer + size - 1 >= buffer_end) {
557         return 0;
558     }
559 
560     /* read remaining bytes of (big-endian) number */
561     for (i = 1; i < size; i++) {
562         value = (value << 8) + buffer[i];
563         unknown_marker = (unknown_marker << 8) + 0xFF;
564     }
565 
566     /* catch special "unknown" length */
567 
568     if (value == unknown_marker) {
569         *out_value = EBML_UNKNOWN;
570     } else {
571         *out_value = value;
572     }
573 
574     return size;
575 }
576 
577 /* Parse a big-endian int that may be from 1-8 bytes long.
578  * Returns 0 if there's not enough space to read the number;
579  * Returns -1 if the number is mis-sized.
580  * Else, returns the length of the number in bytes and writes the
581  * value to *out_value.
582  * If is_signed is true, then the int is assumed to be two's complement
583  * signed, negative values will be correctly promoted, and the returned
584  * unsigned number can be safely cast to a signed number on systems using
585  * two's complement arithmatic.
586  */
ebml_parse_sized_int(unsigned char * buffer,unsigned char * buffer_end,size_t len,bool is_signed,uint64_t * out_value)587 static ssize_t ebml_parse_sized_int(unsigned char       *buffer,
588                                     unsigned char       *buffer_end,
589                                     size_t              len,
590                                     bool                 is_signed,
591                                     uint64_t  *out_value)
592 {
593     uint64_t value;
594     size_t i;
595 
596     if (len < 1 || len > 8) {
597         return -1;
598     }
599 
600     if (buffer + len >= buffer_end) {
601         return 0;
602     }
603 
604     if (is_signed && ((signed char) buffer[0]) < 0) {
605         value = -1;
606     } else {
607         value = 0;
608     }
609 
610     for (i = 0; i < len; i++) {
611         value = (value << 8) + ((unsigned char) buffer[i]);
612     }
613 
614     *out_value = value;
615 
616     return len;
617 }
618