1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "media/base/container_names.h"
6 
7 #include <stddef.h>
8 #include <string.h>
9 
10 #include <cctype>
11 #include <limits>
12 
13 #include "base/check_op.h"
14 #include "base/numerics/safe_conversions.h"
15 #include "base/stl_util.h"
16 #include "media/base/bit_reader.h"
17 
18 namespace media {
19 
20 namespace container_names {
21 
22 #define TAG(a, b, c, d)                                     \
23   ((static_cast<uint32_t>(static_cast<uint8_t>(a)) << 24) | \
24    (static_cast<uint32_t>(static_cast<uint8_t>(b)) << 16) | \
25    (static_cast<uint32_t>(static_cast<uint8_t>(c)) << 8) |  \
26    (static_cast<uint32_t>(static_cast<uint8_t>(d))))
27 
28 #define RCHECK(x)     \
29     do {              \
30       if (!(x))       \
31         return false; \
32     } while (0)
33 
34 #define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
35 
36 // Helper function to read 2 bytes (16 bits, big endian) from a buffer.
Read16(const uint8_t * p)37 static int Read16(const uint8_t* p) {
38   return p[0] << 8 | p[1];
39 }
40 
41 // Helper function to read 3 bytes (24 bits, big endian) from a buffer.
Read24(const uint8_t * p)42 static uint32_t Read24(const uint8_t* p) {
43   return p[0] << 16 | p[1] << 8 | p[2];
44 }
45 
46 // Helper function to read 4 bytes (32 bits, big endian) from a buffer.
Read32(const uint8_t * p)47 static uint32_t Read32(const uint8_t* p) {
48   return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
49 }
50 
51 // Helper function to read 4 bytes (32 bits, little endian) from a buffer.
Read32LE(const uint8_t * p)52 static uint32_t Read32LE(const uint8_t* p) {
53   return p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
54 }
55 
56 // Helper function to do buffer comparisons with a string without going off the
57 // end of the buffer.
StartsWith(const uint8_t * buffer,size_t buffer_size,const char * prefix)58 static bool StartsWith(const uint8_t* buffer,
59                        size_t buffer_size,
60                        const char* prefix) {
61   size_t prefix_size = strlen(prefix);
62   return (prefix_size <= buffer_size &&
63           memcmp(buffer, prefix, prefix_size) == 0);
64 }
65 
66 // Helper function to do buffer comparisons with another buffer (to allow for
67 // embedded \0 in the comparison) without going off the end of the buffer.
StartsWith(const uint8_t * buffer,size_t buffer_size,const uint8_t * prefix,size_t prefix_size)68 static bool StartsWith(const uint8_t* buffer,
69                        size_t buffer_size,
70                        const uint8_t* prefix,
71                        size_t prefix_size) {
72   return (prefix_size <= buffer_size &&
73           memcmp(buffer, prefix, prefix_size) == 0);
74 }
75 
76 // Helper function to read up to 64 bits from a bit stream.
77 // TODO(chcunningham): Delete this helper and replace with direct calls to
78 // reader that handle read failure. As-is, we hide failure because returning 0
79 // is valid for both a successful and failed read.
ReadBits(BitReader * reader,int num_bits)80 static uint64_t ReadBits(BitReader* reader, int num_bits) {
81   DCHECK_GE(reader->bits_available(), num_bits);
82   DCHECK((num_bits > 0) && (num_bits <= 64));
83   uint64_t value = 0;
84 
85   if (!reader->ReadBits(num_bits, &value))
86     return 0;
87 
88   return value;
89 }
90 
91 const int kAc3FrameSizeTable[38][3] = {
92   { 128, 138, 192 }, { 128, 140, 192 }, { 160, 174, 240 }, { 160, 176, 240 },
93   { 192, 208, 288 }, { 192, 210, 288 }, { 224, 242, 336 }, { 224, 244, 336 },
94   { 256, 278, 384 }, { 256, 280, 384 }, { 320, 348, 480 }, { 320, 350, 480 },
95   { 384, 416, 576 }, { 384, 418, 576 }, { 448, 486, 672 }, { 448, 488, 672 },
96   { 512, 556, 768 }, { 512, 558, 768 }, { 640, 696, 960 }, { 640, 698, 960 },
97   { 768, 834, 1152 }, { 768, 836, 1152 }, { 896, 974, 1344 },
98   { 896, 976, 1344 }, { 1024, 1114, 1536 }, { 1024, 1116, 1536 },
99   { 1280, 1392, 1920 }, { 1280, 1394, 1920 }, { 1536, 1670, 2304 },
100   { 1536, 1672, 2304 }, { 1792, 1950, 2688 }, { 1792, 1952, 2688 },
101   { 2048, 2228, 3072 }, { 2048, 2230, 3072 }, { 2304, 2506, 3456 },
102   { 2304, 2508, 3456 }, { 2560, 2768, 3840 }, { 2560, 2770, 3840 }
103 };
104 
105 // Checks for an ADTS AAC container.
CheckAac(const uint8_t * buffer,int buffer_size)106 static bool CheckAac(const uint8_t* buffer, int buffer_size) {
107   // Audio Data Transport Stream (ADTS) header is 7 or 9 bytes
108   // (from http://wiki.multimedia.cx/index.php?title=ADTS)
109   RCHECK(buffer_size > 6);
110 
111   int offset = 0;
112   while (offset + 6 < buffer_size) {
113     BitReader reader(buffer + offset, 6);
114 
115     // Syncword must be 0xfff.
116     RCHECK(ReadBits(&reader, 12) == 0xfff);
117 
118     // Skip MPEG version.
119     reader.SkipBits(1);
120 
121     // Layer is always 0.
122     RCHECK(ReadBits(&reader, 2) == 0);
123 
124     // Skip protection + profile.
125     reader.SkipBits(1 + 2);
126 
127     // Check sampling frequency index.
128     RCHECK(ReadBits(&reader, 4) != 15);  // Forbidden.
129 
130     // Skip private stream, channel configuration, originality, home,
131     // copyrighted stream, and copyright_start.
132     reader.SkipBits(1 + 3 + 1 + 1 + 1 + 1);
133 
134     // Get frame length (includes header).
135     int size = ReadBits(&reader, 13);
136     RCHECK(size > 0);
137     offset += size;
138   }
139   return true;
140 }
141 
142 const uint16_t kAc3SyncWord = 0x0b77;
143 
144 // Checks for an AC3 container.
CheckAc3(const uint8_t * buffer,int buffer_size)145 static bool CheckAc3(const uint8_t* buffer, int buffer_size) {
146   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
147   //            Doc. A/52:2012
148   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
149 
150   // AC3 container looks like syncinfo | bsi | audblk * 6 | aux | check.
151   RCHECK(buffer_size > 6);
152 
153   int offset = 0;
154   while (offset + 6 < buffer_size) {
155     BitReader reader(buffer + offset, 6);
156 
157     // Check syncinfo.
158     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
159 
160     // Skip crc1.
161     reader.SkipBits(16);
162 
163     // Verify fscod.
164     int sample_rate_code = ReadBits(&reader, 2);
165     RCHECK(sample_rate_code != 3);  // Reserved.
166 
167     // Verify frmsizecod.
168     int frame_size_code = ReadBits(&reader, 6);
169     RCHECK(frame_size_code < 38);  // Undefined.
170 
171     // Verify bsid.
172     RCHECK(ReadBits(&reader, 5) < 10);  // Normally 8 or 6, 16 used by EAC3.
173 
174     offset += kAc3FrameSizeTable[frame_size_code][sample_rate_code];
175   }
176   return true;
177 }
178 
179 // Checks for an EAC3 container (very similar to AC3)
CheckEac3(const uint8_t * buffer,int buffer_size)180 static bool CheckEac3(const uint8_t* buffer, int buffer_size) {
181   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
182   //            Doc. A/52:2012
183   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
184 
185   // EAC3 container looks like syncinfo | bsi | audfrm | audblk* | aux | check.
186   RCHECK(buffer_size > 6);
187 
188   int offset = 0;
189   while (offset + 6 < buffer_size) {
190     BitReader reader(buffer + offset, 6);
191 
192     // Check syncinfo.
193     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
194 
195     // Verify strmtyp.
196     RCHECK(ReadBits(&reader, 2) != 3);
197 
198     // Skip substreamid.
199     reader.SkipBits(3);
200 
201     // Get frmsize. Include syncinfo size and convert to bytes.
202     int frame_size = (ReadBits(&reader, 11) + 1) * 2;
203     RCHECK(frame_size >= 7);
204 
205     // Skip fscod, fscod2, acmod, and lfeon.
206     reader.SkipBits(2 + 2 + 3 + 1);
207 
208     // Verify bsid.
209     int bit_stream_id = ReadBits(&reader, 5);
210     RCHECK(bit_stream_id >= 11 && bit_stream_id <= 16);
211 
212     offset += frame_size;
213   }
214   return true;
215 }
216 
217 // Additional checks for a BINK container.
CheckBink(const uint8_t * buffer,int buffer_size)218 static bool CheckBink(const uint8_t* buffer, int buffer_size) {
219   // Reference: http://wiki.multimedia.cx/index.php?title=Bink_Container
220   RCHECK(buffer_size >= 44);
221 
222   // Verify number of frames specified.
223   RCHECK(Read32LE(buffer + 8) > 0);
224 
225   // Verify width in range.
226   int width = Read32LE(buffer + 20);
227   RCHECK(width > 0 && width <= 32767);
228 
229   // Verify height in range.
230   int height = Read32LE(buffer + 24);
231   RCHECK(height > 0 && height <= 32767);
232 
233   // Verify frames per second specified.
234   RCHECK(Read32LE(buffer + 28) > 0);
235 
236   // Verify video frames per second specified.
237   RCHECK(Read32LE(buffer + 32) > 0);
238 
239   // Number of audio tracks must be 256 or less.
240   return (Read32LE(buffer + 40) <= 256);
241 }
242 
243 // Additional checks for a CAF container.
CheckCaf(const uint8_t * buffer,int buffer_size)244 static bool CheckCaf(const uint8_t* buffer, int buffer_size) {
245   // Reference: Apple Core Audio Format Specification 1.0
246   // (https://developer.apple.com/library/mac/#documentation/MusicAudio/Reference/CAFSpec/CAF_spec/CAF_spec.html)
247   RCHECK(buffer_size >= 52);
248   BitReader reader(buffer, buffer_size);
249 
250   // mFileType should be "caff".
251   RCHECK(ReadBits(&reader, 32) == TAG('c', 'a', 'f', 'f'));
252 
253   // mFileVersion should be 1.
254   RCHECK(ReadBits(&reader, 16) == 1);
255 
256   // Skip mFileFlags.
257   reader.SkipBits(16);
258 
259   // First chunk should be Audio Description chunk, size 32l.
260   RCHECK(ReadBits(&reader, 32) == TAG('d', 'e', 's', 'c'));
261   RCHECK(ReadBits(&reader, 64) == 32);
262 
263   // CAFAudioFormat.mSampleRate(float64) not 0
264   RCHECK(ReadBits(&reader, 64) != 0);
265 
266   // CAFAudioFormat.mFormatID not 0
267   RCHECK(ReadBits(&reader, 32) != 0);
268 
269   // Skip CAFAudioFormat.mBytesPerPacket and mFramesPerPacket.
270   reader.SkipBits(32 + 32);
271 
272   // CAFAudioFormat.mChannelsPerFrame not 0
273   RCHECK(ReadBits(&reader, 32) != 0);
274   return true;
275 }
276 
277 static bool kSamplingFrequencyValid[16] = { false, true, true, true, false,
278                                             false, true, true, true, false,
279                                             false, true, true, true, false,
280                                             false };
281 static bool kExtAudioIdValid[8] = { true, false, true, false, false, false,
282                                     true, false };
283 
284 // Additional checks for a DTS container.
CheckDts(const uint8_t * buffer,int buffer_size)285 static bool CheckDts(const uint8_t* buffer, int buffer_size) {
286   // Reference: ETSI TS 102 114 V1.3.1 (2011-08)
287   // (http://www.etsi.org/deliver/etsi_ts/102100_102199/102114/01.03.01_60/ts_102114v010301p.pdf)
288   RCHECK(buffer_size > 11);
289 
290   int offset = 0;
291   while (offset + 11 < buffer_size) {
292     BitReader reader(buffer + offset, 11);
293 
294     // Verify sync word.
295     RCHECK(ReadBits(&reader, 32) == 0x7ffe8001);
296 
297     // Skip frame type and deficit sample count.
298     reader.SkipBits(1 + 5);
299 
300     // Verify CRC present flag.
301     RCHECK(ReadBits(&reader, 1) == 0);  // CPF must be 0.
302 
303     // Verify number of PCM sample blocks.
304     RCHECK(ReadBits(&reader, 7) >= 5);
305 
306     // Verify primary frame byte size.
307     int frame_size = ReadBits(&reader, 14);
308     RCHECK(frame_size >= 95);
309 
310     // Skip audio channel arrangement.
311     reader.SkipBits(6);
312 
313     // Verify core audio sampling frequency is an allowed value.
314     size_t sampling_freq_index = ReadBits(&reader, 4);
315     RCHECK(sampling_freq_index < base::size(kSamplingFrequencyValid));
316     RCHECK(kSamplingFrequencyValid[sampling_freq_index]);
317 
318     // Verify transmission bit rate is valid.
319     RCHECK(ReadBits(&reader, 5) <= 25);
320 
321     // Verify reserved field is 0.
322     RCHECK(ReadBits(&reader, 1) == 0);
323 
324     // Skip dynamic range flag, time stamp flag, auxiliary data flag, and HDCD.
325     reader.SkipBits(1 + 1 + 1 + 1);
326 
327     // Verify extension audio descriptor flag is an allowed value.
328     size_t audio_id_index = ReadBits(&reader, 3);
329     RCHECK(audio_id_index < base::size(kExtAudioIdValid));
330     RCHECK(kExtAudioIdValid[audio_id_index]);
331 
332     // Skip extended coding flag and audio sync word insertion flag.
333     reader.SkipBits(1 + 1);
334 
335     // Verify low frequency effects flag is an allowed value.
336     RCHECK(ReadBits(&reader, 2) != 3);
337 
338     offset += frame_size + 1;
339   }
340   return true;
341 }
342 
343 // Checks for a DV container.
CheckDV(const uint8_t * buffer,int buffer_size)344 static bool CheckDV(const uint8_t* buffer, int buffer_size) {
345   // Reference: SMPTE 314M (Annex A has differences with IEC 61834).
346   // (http://standards.smpte.org/content/978-1-61482-454-1/st-314-2005/SEC1.body.pdf)
347   RCHECK(buffer_size > 11);
348 
349   int offset = 0;
350   int current_sequence_number = -1;
351   int last_block_number[6] = {0};
352   while (offset + 11 < buffer_size) {
353     BitReader reader(buffer + offset, 11);
354 
355     // Decode ID data. Sections 5, 6, and 7 are reserved.
356     int section = ReadBits(&reader, 3);
357     RCHECK(section < 5);
358 
359     // Next bit must be 1.
360     RCHECK(ReadBits(&reader, 1) == 1);
361 
362     // Skip arbitrary bits.
363     reader.SkipBits(4);
364 
365     int sequence_number = ReadBits(&reader, 4);
366 
367     // Skip FSC.
368     reader.SkipBits(1);
369 
370     // Next 3 bits must be 1.
371     RCHECK(ReadBits(&reader, 3) == 7);
372 
373     int block_number = ReadBits(&reader, 8);
374 
375     if (section == 0) {  // Header.
376       // Validate the reserved bits in the next 8 bytes.
377       reader.SkipBits(1);
378       RCHECK(ReadBits(&reader, 1) == 0);
379       RCHECK(ReadBits(&reader, 11) == 0x7ff);
380       reader.SkipBits(4);
381       RCHECK(ReadBits(&reader, 4) == 0xf);
382       reader.SkipBits(4);
383       RCHECK(ReadBits(&reader, 4) == 0xf);
384       reader.SkipBits(4);
385       RCHECK(ReadBits(&reader, 4) == 0xf);
386       reader.SkipBits(3);
387       RCHECK(ReadBits(&reader, 24) == 0xffffff);
388       current_sequence_number = sequence_number;
389       for (size_t i = 0; i < base::size(last_block_number); ++i)
390         last_block_number[i] = -1;
391     } else {
392       // Sequence number must match (this will also fail if no header seen).
393       RCHECK(sequence_number == current_sequence_number);
394       // Block number should be increasing.
395       RCHECK(block_number > last_block_number[section]);
396       last_block_number[section] = block_number;
397     }
398 
399     // Move to next block.
400     offset += 80;
401   }
402   return true;
403 }
404 
405 
406 // Checks for a GSM container.
CheckGsm(const uint8_t * buffer,int buffer_size)407 static bool CheckGsm(const uint8_t* buffer, int buffer_size) {
408   // Reference: ETSI EN 300 961 V8.1.1
409   // (http://www.etsi.org/deliver/etsi_en/300900_300999/300961/08.01.01_60/en_300961v080101p.pdf)
410   // also http://tools.ietf.org/html/rfc3551#page-24
411   // GSM files have a 33 byte block, only first 4 bits are fixed.
412   RCHECK(buffer_size >= 1024);  // Need enough data to do a decent check.
413 
414   int offset = 0;
415   while (offset < buffer_size) {
416     // First 4 bits of each block are xD.
417     RCHECK((buffer[offset] & 0xf0) == 0xd0);
418     offset += 33;
419   }
420   return true;
421 }
422 
423 // Advance to the first set of |num_bits| bits that match |start_code|. |offset|
424 // is the current location in the buffer, and is updated. |bytes_needed| is the
425 // number of bytes that must remain in the buffer when |start_code| is found.
426 // Returns true if start_code found (and enough space in the buffer after it),
427 // false otherwise.
AdvanceToStartCode(const uint8_t * buffer,int buffer_size,int * offset,int bytes_needed,int num_bits,uint32_t start_code)428 static bool AdvanceToStartCode(const uint8_t* buffer,
429                                int buffer_size,
430                                int* offset,
431                                int bytes_needed,
432                                int num_bits,
433                                uint32_t start_code) {
434   DCHECK_GE(bytes_needed, 3);
435   DCHECK_LE(num_bits, 24);  // Only supports up to 24 bits.
436 
437   // Create a mask to isolate |num_bits| bits, once shifted over.
438   uint32_t bits_to_shift = 24 - num_bits;
439   uint32_t mask = (1 << num_bits) - 1;
440   while (*offset + bytes_needed < buffer_size) {
441     uint32_t next = Read24(buffer + *offset);
442     if (((next >> bits_to_shift) & mask) == start_code)
443       return true;
444     ++(*offset);
445   }
446   return false;
447 }
448 
449 // Checks for an H.261 container.
CheckH261(const uint8_t * buffer,int buffer_size)450 static bool CheckH261(const uint8_t* buffer, int buffer_size) {
451   // Reference: ITU-T Recommendation H.261 (03/1993)
452   // (http://www.itu.int/rec/T-REC-H.261-199303-I/en)
453   RCHECK(buffer_size > 16);
454 
455   int offset = 0;
456   bool seen_start_code = false;
457   while (true) {
458     // Advance to picture_start_code, if there is one.
459     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 20, 0x10)) {
460       // No start code found (or off end of buffer), so success if
461       // there was at least one valid header.
462       return seen_start_code;
463     }
464 
465     // Now verify the block. AdvanceToStartCode() made sure that there are
466     // at least 4 bytes remaining in the buffer.
467     BitReader reader(buffer + offset, buffer_size - offset);
468     RCHECK(ReadBits(&reader, 20) == 0x10);
469 
470     // Skip the temporal reference and PTYPE.
471     reader.SkipBits(5 + 6);
472 
473     // Skip any extra insertion information. Since this is open-ended, if we run
474     // out of bits assume that the buffer is correctly formatted.
475     int extra = ReadBits(&reader, 1);
476     while (extra == 1) {
477       if (!reader.SkipBits(8))
478         return seen_start_code;
479       if (!reader.ReadBits(1, &extra))
480         return seen_start_code;
481     }
482 
483     // Next should be a Group of Blocks start code. Again, if we run out of
484     // bits, then assume that the buffer up to here is correct, and the buffer
485     // just happened to end in the middle of a header.
486     int next;
487     if (!reader.ReadBits(16, &next))
488       return seen_start_code;
489     RCHECK(next == 1);
490 
491     // Move to the next block.
492     seen_start_code = true;
493     offset += 4;
494   }
495 }
496 
497 // Checks for an H.263 container.
CheckH263(const uint8_t * buffer,int buffer_size)498 static bool CheckH263(const uint8_t* buffer, int buffer_size) {
499   // Reference: ITU-T Recommendation H.263 (01/2005)
500   // (http://www.itu.int/rec/T-REC-H.263-200501-I/en)
501   // header is PSC(22b) + TR(8b) + PTYPE(8+b).
502   RCHECK(buffer_size > 16);
503 
504   int offset = 0;
505   bool seen_start_code = false;
506   while (true) {
507     // Advance to picture_start_code, if there is one.
508     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 9, 22, 0x20)) {
509       // No start code found (or off end of buffer), so success if
510       // there was at least one valid header.
511       return seen_start_code;
512     }
513 
514     // Now verify the block. AdvanceToStartCode() made sure that there are
515     // at least 9 bytes remaining in the buffer.
516     BitReader reader(buffer + offset, 9);
517     RCHECK(ReadBits(&reader, 22) == 0x20);
518 
519     // Skip the temporal reference.
520     reader.SkipBits(8);
521 
522     // Verify that the first 2 bits of PTYPE are 10b.
523     RCHECK(ReadBits(&reader, 2) == 2);
524 
525     // Skip the split screen indicator, document camera indicator, and full
526     // picture freeze release.
527     reader.SkipBits(1 + 1 + 1);
528 
529     // Verify Source Format.
530     int format = ReadBits(&reader, 3);
531     RCHECK(format != 0 && format != 6);  // Forbidden or reserved.
532 
533     if (format == 7) {
534       // Verify full extended PTYPE.
535       int ufep = ReadBits(&reader, 3);
536       if (ufep == 1) {
537         // Verify the optional part of PLUSPTYPE.
538         format = ReadBits(&reader, 3);
539         RCHECK(format != 0 && format != 7);  // Reserved.
540         reader.SkipBits(11);
541         // Next 4 bits should be b1000.
542         RCHECK(ReadBits(&reader, 4) == 8);  // Not allowed.
543       } else {
544         RCHECK(ufep == 0);  // Only 0 and 1 allowed.
545       }
546 
547       // Verify picture type code is not a reserved value.
548       int picture_type_code = ReadBits(&reader, 3);
549       RCHECK(picture_type_code != 6 && picture_type_code != 7);  // Reserved.
550 
551       // Skip picture resampling mode, reduced resolution mode,
552       // and rounding type.
553       reader.SkipBits(1 + 1 + 1);
554 
555       // Next 3 bits should be b001.
556       RCHECK(ReadBits(&reader, 3) == 1);  // Not allowed.
557     }
558 
559     // Move to the next block.
560     seen_start_code = true;
561     offset += 9;
562   }
563 }
564 
565 // Checks for an H.264 container.
CheckH264(const uint8_t * buffer,int buffer_size)566 static bool CheckH264(const uint8_t* buffer, int buffer_size) {
567   // Reference: ITU-T Recommendation H.264 (01/2012)
568   // (http://www.itu.int/rec/T-REC-H.264)
569   // Section B.1: Byte stream NAL unit syntax and semantics.
570   RCHECK(buffer_size > 4);
571 
572   int offset = 0;
573   int parameter_count = 0;
574   while (true) {
575     // Advance to picture_start_code, if there is one.
576     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 24, 1)) {
577       // No start code found (or off end of buffer), so success if
578       // there was at least one valid header.
579       return parameter_count > 0;
580     }
581 
582     // Now verify the block. AdvanceToStartCode() made sure that there are
583     // at least 4 bytes remaining in the buffer.
584     BitReader reader(buffer + offset, 4);
585     RCHECK(ReadBits(&reader, 24) == 1);
586 
587     // Verify forbidden_zero_bit.
588     RCHECK(ReadBits(&reader, 1) == 0);
589 
590     // Extract nal_ref_idc and nal_unit_type.
591     int nal_ref_idc = ReadBits(&reader, 2);
592     int nal_unit_type = ReadBits(&reader, 5);
593 
594     switch (nal_unit_type) {
595       case 5:  // Coded slice of an IDR picture.
596         RCHECK(nal_ref_idc != 0);
597         break;
598       case 6:   // Supplemental enhancement information (SEI).
599       case 9:   // Access unit delimiter.
600       case 10:  // End of sequence.
601       case 11:  // End of stream.
602       case 12:  // Filler data.
603         RCHECK(nal_ref_idc == 0);
604         break;
605       case 7:  // Sequence parameter set.
606       case 8:  // Picture parameter set.
607         ++parameter_count;
608         break;
609     }
610 
611     // Skip the current start_code_prefix and move to the next.
612     offset += 4;
613   }
614 }
615 
616 static const char kHlsSignature[] = "#EXTM3U";
617 static const char kHls1[] = "#EXT-X-STREAM-INF:";
618 static const char kHls2[] = "#EXT-X-TARGETDURATION:";
619 static const char kHls3[] = "#EXT-X-MEDIA-SEQUENCE:";
620 
621 // Additional checks for a HLS container.
CheckHls(const uint8_t * buffer,int buffer_size)622 static bool CheckHls(const uint8_t* buffer, int buffer_size) {
623   // HLS is simply a play list used for Apple HTTP Live Streaming.
624   // Reference: Apple HTTP Live Streaming Overview
625   // (http://goo.gl/MIwxj)
626 
627   if (StartsWith(buffer, buffer_size, kHlsSignature)) {
628     // Need to find "#EXT-X-STREAM-INF:", "#EXT-X-TARGETDURATION:", or
629     // "#EXT-X-MEDIA-SEQUENCE:" somewhere in the buffer. Other playlists (like
630     // WinAmp) only have additional lines with #EXTINF
631     // (http://en.wikipedia.org/wiki/M3U).
632     int offset = strlen(kHlsSignature);
633     while (offset < buffer_size) {
634       if (buffer[offset] == '#') {
635         if (StartsWith(buffer + offset, buffer_size - offset, kHls1) ||
636             StartsWith(buffer + offset, buffer_size - offset, kHls2) ||
637             StartsWith(buffer + offset, buffer_size - offset, kHls3)) {
638           return true;
639         }
640       }
641       ++offset;
642     }
643   }
644   return false;
645 }
646 
647 // Checks for a MJPEG stream.
CheckMJpeg(const uint8_t * buffer,int buffer_size)648 static bool CheckMJpeg(const uint8_t* buffer, int buffer_size) {
649   // Reference: ISO/IEC 10918-1 : 1993(E), Annex B
650   // (http://www.w3.org/Graphics/JPEG/itu-t81.pdf)
651   RCHECK(buffer_size >= 16);
652 
653   int offset = 0;
654   int last_restart = -1;
655   int num_codes = 0;
656   while (offset + 5 < buffer_size) {
657     // Marker codes are always a two byte code with the first byte xFF.
658     RCHECK(buffer[offset] == 0xff);
659     uint8_t code = buffer[offset + 1];
660     RCHECK(code >= 0xc0 || code == 1);
661 
662     // Skip sequences of xFF.
663     if (code == 0xff) {
664       ++offset;
665       continue;
666     }
667 
668     // Success if the next marker code is EOI (end of image)
669     if (code == 0xd9)
670       return true;
671 
672     // Check remaining codes.
673     if (code == 0xd8 || code == 1) {
674       // SOI (start of image) / TEM (private use). No other data with header.
675       offset += 2;
676     } else if (code >= 0xd0 && code <= 0xd7) {
677       // RST (restart) codes must be in sequence. No other data with header.
678       int restart = code & 0x07;
679       if (last_restart >= 0)
680         RCHECK(restart == (last_restart + 1) % 8);
681       last_restart = restart;
682       offset += 2;
683     } else {
684       // All remaining marker codes are followed by a length of the header.
685       int length = Read16(buffer + offset + 2) + 2;
686 
687       // Special handling of SOS (start of scan) marker since the entropy
688       // coded data follows the SOS. Any xFF byte in the data block must be
689       // followed by x00 in the data.
690       if (code == 0xda) {
691         int number_components = buffer[offset + 4];
692         RCHECK(length == 8 + 2 * number_components);
693 
694         // Advance to the next marker.
695         offset += length;
696         while (offset + 2 < buffer_size) {
697           if (buffer[offset] == 0xff && buffer[offset + 1] != 0)
698             break;
699           ++offset;
700         }
701       } else {
702         // Skip over the marker data for the other marker codes.
703         offset += length;
704       }
705     }
706     ++num_codes;
707   }
708   return (num_codes > 1);
709 }
710 
711 enum Mpeg2StartCodes {
712   PROGRAM_END_CODE = 0xb9,
713   PACK_START_CODE = 0xba
714 };
715 
716 // Checks for a MPEG2 Program Stream.
CheckMpeg2ProgramStream(const uint8_t * buffer,int buffer_size)717 static bool CheckMpeg2ProgramStream(const uint8_t* buffer, int buffer_size) {
718   // Reference: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
719   RCHECK(buffer_size > 14);
720 
721   int offset = 0;
722   while (offset + 14 < buffer_size) {
723     BitReader reader(buffer + offset, 14);
724 
725     // Must start with pack_start_code.
726     RCHECK(ReadBits(&reader, 24) == 1);
727     RCHECK(ReadBits(&reader, 8) == PACK_START_CODE);
728 
729     // Determine MPEG version (MPEG1 has b0010, while MPEG2 has b01).
730     int mpeg_version = ReadBits(&reader, 2);
731     if (mpeg_version == 0) {
732       // MPEG1, 10 byte header
733       // Validate rest of version code
734       RCHECK(ReadBits(&reader, 2) == 2);
735     } else {
736       RCHECK(mpeg_version == 1);
737     }
738 
739     // Skip system_clock_reference_base [32..30].
740     reader.SkipBits(3);
741 
742     // Verify marker bit.
743     RCHECK(ReadBits(&reader, 1) == 1);
744 
745     // Skip system_clock_reference_base [29..15].
746     reader.SkipBits(15);
747 
748     // Verify next marker bit.
749     RCHECK(ReadBits(&reader, 1) == 1);
750 
751     // Skip system_clock_reference_base [14..0].
752     reader.SkipBits(15);
753 
754     // Verify next marker bit.
755     RCHECK(ReadBits(&reader, 1) == 1);
756 
757     if (mpeg_version == 0) {
758       // Verify second marker bit.
759       RCHECK(ReadBits(&reader, 1) == 1);
760 
761       // Skip mux_rate.
762       reader.SkipBits(22);
763 
764       // Verify next marker bit.
765       RCHECK(ReadBits(&reader, 1) == 1);
766 
767       // Update offset to be after this header.
768       offset += 12;
769     } else {
770       // Must be MPEG2.
771       // Skip program_mux_rate.
772       reader.SkipBits(22);
773 
774       // Verify pair of marker bits.
775       RCHECK(ReadBits(&reader, 2) == 3);
776 
777       // Skip reserved.
778       reader.SkipBits(5);
779 
780       // Update offset to be after this header.
781       int pack_stuffing_length = ReadBits(&reader, 3);
782       offset += 14 + pack_stuffing_length;
783     }
784 
785     // Check for system headers and PES_packets.
786     while (offset + 6 < buffer_size && Read24(buffer + offset) == 1) {
787       // Next 8 bits determine stream type.
788       int stream_id = buffer[offset + 3];
789 
790       // Some stream types are reserved and shouldn't occur.
791       if (mpeg_version == 0)
792         RCHECK(stream_id != 0xbc && stream_id < 0xf0);
793       else
794         RCHECK(stream_id != 0xfc && stream_id != 0xfd && stream_id != 0xfe);
795 
796       // Some stream types are used for pack headers.
797       if (stream_id == PACK_START_CODE)  // back to outer loop.
798         break;
799       if (stream_id == PROGRAM_END_CODE)  // end of stream.
800         return true;
801 
802       int pes_length = Read16(buffer + offset + 4);
803       RCHECK(pes_length > 0);
804       offset = offset + 6 + pes_length;
805     }
806   }
807   // Success as we are off the end of the buffer and liked everything
808   // in the buffer.
809   return true;
810 }
811 
812 const uint8_t kMpeg2SyncWord = 0x47;
813 
814 // Checks for a MPEG2 Transport Stream.
CheckMpeg2TransportStream(const uint8_t * buffer,int buffer_size)815 static bool CheckMpeg2TransportStream(const uint8_t* buffer, int buffer_size) {
816   // Spec: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
817   // Normal packet size is 188 bytes. However, some systems add various error
818   // correction data at the end, resulting in packet of length 192/204/208
819   // (https://en.wikipedia.org/wiki/MPEG_transport_stream). Determine the
820   // length with the first packet.
821   RCHECK(buffer_size >= 250);  // Want more than 1 packet to check.
822 
823   int offset = 0;
824   int packet_length = -1;
825   while (buffer[offset] != kMpeg2SyncWord && offset < 20) {
826     // Skip over any header in the first 20 bytes.
827     ++offset;
828   }
829 
830   while (offset + 6 < buffer_size) {
831     BitReader reader(buffer + offset, 6);
832 
833     // Must start with sync byte.
834     RCHECK(ReadBits(&reader, 8) == kMpeg2SyncWord);
835 
836     // Skip transport_error_indicator, payload_unit_start_indicator, and
837     // transport_priority.
838     reader.SkipBits(1 + 1 + 1);
839 
840     // Verify the pid is not a reserved value.
841     int pid = ReadBits(&reader, 13);
842     RCHECK(pid < 3 || pid > 15);
843 
844     // Skip transport_scrambling_control.
845     reader.SkipBits(2);
846 
847     // Adaptation_field_control can not be 0.
848     int adaptation_field_control = ReadBits(&reader, 2);
849     RCHECK(adaptation_field_control != 0);
850 
851     // If there is an adaptation_field, verify it.
852     if (adaptation_field_control >= 2) {
853       // Skip continuity_counter.
854       reader.SkipBits(4);
855 
856       // Get adaptation_field_length and verify it.
857       int adaptation_field_length = ReadBits(&reader, 8);
858       if (adaptation_field_control == 2)
859         RCHECK(adaptation_field_length == 183);
860       else
861         RCHECK(adaptation_field_length <= 182);
862     }
863 
864     // Attempt to determine the packet length on the first packet.
865     if (packet_length < 0) {
866       if (buffer[offset + 188] == kMpeg2SyncWord)
867         packet_length = 188;
868       else if (buffer[offset + 192] == kMpeg2SyncWord)
869         packet_length = 192;
870       else if (buffer[offset + 204] == kMpeg2SyncWord)
871         packet_length = 204;
872       else
873         packet_length = 208;
874     }
875     offset += packet_length;
876   }
877   return true;
878 }
879 
880 enum Mpeg4StartCodes {
881   VISUAL_OBJECT_SEQUENCE_START_CODE = 0xb0,
882   VISUAL_OBJECT_SEQUENCE_END_CODE = 0xb1,
883   VISUAL_OBJECT_START_CODE = 0xb5,
884   VOP_START_CODE = 0xb6
885 };
886 
887 // Checks for a raw MPEG4 bitstream container.
CheckMpeg4BitStream(const uint8_t * buffer,int buffer_size)888 static bool CheckMpeg4BitStream(const uint8_t* buffer, int buffer_size) {
889   // Defined in ISO/IEC 14496-2:2001.
890   // However, no length ... simply scan for start code values.
891   // Note tags are very similar to H.264.
892   RCHECK(buffer_size > 4);
893 
894   int offset = 0;
895   int sequence_start_count = 0;
896   int sequence_end_count = 0;
897   int visual_object_count = 0;
898   int vop_count = 0;
899   while (true) {
900     // Advance to start_code, if there is one.
901     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 6, 24, 1)) {
902       // Not a complete sequence in memory, so return true if we've seen a
903       // visual_object_sequence_start_code and a visual_object_start_code.
904       return (sequence_start_count > 0 && visual_object_count > 0);
905     }
906 
907     // Now verify the block. AdvanceToStartCode() made sure that there are
908     // at least 6 bytes remaining in the buffer.
909     BitReader reader(buffer + offset, 6);
910     RCHECK(ReadBits(&reader, 24) == 1);
911 
912     int start_code = ReadBits(&reader, 8);
913     RCHECK(start_code < 0x30 || start_code > 0xaf);  // 30..AF and
914     RCHECK(start_code < 0xb7 || start_code > 0xb9);  // B7..B9 reserved
915 
916     switch (start_code) {
917       case VISUAL_OBJECT_SEQUENCE_START_CODE: {
918         ++sequence_start_count;
919         // Verify profile in not one of many reserved values.
920         int profile = ReadBits(&reader, 8);
921         RCHECK(profile > 0);
922         RCHECK(profile < 0x04 || profile > 0x10);
923         RCHECK(profile < 0x13 || profile > 0x20);
924         RCHECK(profile < 0x23 || profile > 0x31);
925         RCHECK(profile < 0x35 || profile > 0x41);
926         RCHECK(profile < 0x43 || profile > 0x60);
927         RCHECK(profile < 0x65 || profile > 0x70);
928         RCHECK(profile < 0x73 || profile > 0x80);
929         RCHECK(profile < 0x83 || profile > 0x90);
930         RCHECK(profile < 0x95 || profile > 0xa0);
931         RCHECK(profile < 0xa4 || profile > 0xb0);
932         RCHECK(profile < 0xb5 || profile > 0xc0);
933         RCHECK(profile < 0xc3 || profile > 0xd0);
934         RCHECK(profile < 0xe4);
935         break;
936       }
937 
938       case VISUAL_OBJECT_SEQUENCE_END_CODE:
939         RCHECK(++sequence_end_count == sequence_start_count);
940         break;
941 
942       case VISUAL_OBJECT_START_CODE: {
943         ++visual_object_count;
944         if (ReadBits(&reader, 1) == 1) {
945           int visual_object_verid = ReadBits(&reader, 4);
946           RCHECK(visual_object_verid > 0 && visual_object_verid < 3);
947           RCHECK(ReadBits(&reader, 3) != 0);
948         }
949         int visual_object_type = ReadBits(&reader, 4);
950         RCHECK(visual_object_type > 0 && visual_object_type < 6);
951         break;
952       }
953 
954       case VOP_START_CODE:
955         RCHECK(++vop_count <= visual_object_count);
956         break;
957     }
958     // Skip this block.
959     offset += 6;
960   }
961 }
962 
963 // Additional checks for a MOV/QuickTime/MPEG4 container.
CheckMov(const uint8_t * buffer,int buffer_size)964 static bool CheckMov(const uint8_t* buffer, int buffer_size) {
965   // Reference: ISO/IEC 14496-12:2005(E).
966   // (http://standards.iso.org/ittf/PubliclyAvailableStandards/c061988_ISO_IEC_14496-12_2012.zip)
967   RCHECK(buffer_size > 8);
968 
969   int offset = 0;
970   int valid_top_level_boxes = 0;
971   while (offset + 8 < buffer_size) {
972     uint32_t atomsize = Read32(buffer + offset);
973     uint32_t atomtype = Read32(buffer + offset + 4);
974 
975     // Only need to check for atoms that are valid at the top level. However,
976     // "Boxes with an unrecognized type shall be ignored and skipped." So
977     // simply make sure that at least two recognized top level boxes are found.
978     // This list matches BoxReader::IsValidTopLevelBox().
979     switch (atomtype) {
980       case TAG('f', 't', 'y', 'p'):
981       case TAG('p', 'd', 'i', 'n'):
982       case TAG('b', 'l', 'o', 'c'):
983       case TAG('m', 'o', 'o', 'v'):
984       case TAG('m', 'o', 'o', 'f'):
985       case TAG('m', 'f', 'r', 'a'):
986       case TAG('m', 'd', 'a', 't'):
987       case TAG('f', 'r', 'e', 'e'):
988       case TAG('s', 'k', 'i', 'p'):
989       case TAG('m', 'e', 't', 'a'):
990       case TAG('m', 'e', 'c', 'o'):
991       case TAG('s', 't', 'y', 'p'):
992       case TAG('s', 'i', 'd', 'x'):
993       case TAG('s', 's', 'i', 'x'):
994       case TAG('p', 'r', 'f', 't'):
995       case TAG('u', 'u', 'i', 'd'):
996       case TAG('e', 'm', 's', 'g'):
997         ++valid_top_level_boxes;
998         break;
999     }
1000     if (atomsize == 1) {
1001       // Indicates that the length is the next 64bits.
1002       if (offset + 16 > buffer_size)
1003         break;
1004       if (Read32(buffer + offset + 8) != 0)
1005         break;  // Offset is way past buffer size.
1006       atomsize = Read32(buffer + offset + 12);
1007     }
1008     if (atomsize == 0 || atomsize > static_cast<size_t>(buffer_size))
1009       break;  // Indicates the last atom or length too big.
1010     offset += atomsize;
1011   }
1012   return valid_top_level_boxes >= 2;
1013 }
1014 
1015 enum MPEGVersion {
1016   VERSION_25 = 0,
1017   VERSION_RESERVED,
1018   VERSION_2,
1019   VERSION_1
1020 };
1021 enum MPEGLayer {
1022   L_RESERVED = 0,
1023   LAYER_3,
1024   LAYER_2,
1025   LAYER_1
1026 };
1027 
1028 static int kSampleRateTable[4][4] = { { 11025, 12000, 8000, 0 },   // v2.5
1029                                       { 0, 0, 0, 0 },              // not used
1030                                       { 22050, 24000, 16000, 0 },  // v2
1031                                       { 44100, 48000, 32000, 0 }   // v1
1032 };
1033 
1034 static int kBitRateTableV1L1[16] = { 0, 32, 64, 96, 128, 160, 192, 224, 256,
1035                                      288, 320, 352, 384, 416, 448, 0 };
1036 static int kBitRateTableV1L2[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160,
1037                                      192, 224, 256, 320, 384, 0 };
1038 static int kBitRateTableV1L3[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128,
1039                                      160, 192, 224, 256, 320, 0 };
1040 static int kBitRateTableV2L1[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144,
1041                                      160, 176, 192, 224, 256, 0 };
1042 static int kBitRateTableV2L23[16] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,
1043                                       112, 128, 144, 160, 0 };
1044 
ValidMpegAudioFrameHeader(const uint8_t * header,int header_size,int * framesize)1045 static bool ValidMpegAudioFrameHeader(const uint8_t* header,
1046                                       int header_size,
1047                                       int* framesize) {
1048   // Reference: http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm.
1049   DCHECK_GE(header_size, 4);
1050   *framesize = 0;
1051   BitReader reader(header, 4);  // Header can only be 4 bytes long.
1052 
1053   // Verify frame sync (11 bits) are all set.
1054   RCHECK(ReadBits(&reader, 11) == 0x7ff);
1055 
1056   // Verify MPEG audio version id.
1057   int version = ReadBits(&reader, 2);
1058   RCHECK(version != 1);  // Reserved.
1059 
1060   // Verify layer.
1061   int layer = ReadBits(&reader, 2);
1062   RCHECK(layer != 0);
1063 
1064   // Skip protection bit.
1065   reader.SkipBits(1);
1066 
1067   // Verify bitrate index.
1068   int bitrate_index = ReadBits(&reader, 4);
1069   RCHECK(bitrate_index != 0xf);
1070 
1071   // Verify sampling rate frequency index.
1072   int sampling_index = ReadBits(&reader, 2);
1073   RCHECK(sampling_index != 3);
1074 
1075   // Get padding bit.
1076   int padding = ReadBits(&reader, 1);
1077 
1078   // Frame size:
1079   // For Layer I files = (12 * BitRate / SampleRate + Padding) * 4
1080   // For others = 144 * BitRate / SampleRate + Padding
1081   // Unfortunately, BitRate and SampleRate are coded.
1082   int sampling_rate = kSampleRateTable[version][sampling_index];
1083   int bitrate;
1084   if (version == VERSION_1) {
1085     if (layer == LAYER_1)
1086       bitrate = kBitRateTableV1L1[bitrate_index];
1087     else if (layer == LAYER_2)
1088       bitrate = kBitRateTableV1L2[bitrate_index];
1089     else
1090       bitrate = kBitRateTableV1L3[bitrate_index];
1091   } else {
1092     if (layer == LAYER_1)
1093       bitrate = kBitRateTableV2L1[bitrate_index];
1094     else
1095       bitrate = kBitRateTableV2L23[bitrate_index];
1096   }
1097   if (layer == LAYER_1)
1098     *framesize = ((12000 * bitrate) / sampling_rate + padding) * 4;
1099   else
1100     *framesize = (144000 * bitrate) / sampling_rate + padding;
1101   return (bitrate > 0 && sampling_rate > 0);
1102 }
1103 
1104 // Additional checks for a MP3 container.
CheckMp3(const uint8_t * buffer,int buffer_size)1105 static bool CheckMp3(const uint8_t* buffer, int buffer_size) {
1106   // This function assumes that the ID3 header is not present in the file and
1107   // simply checks for several valid MPEG audio buffers after skipping any
1108   // optional padding characters.
1109   int numSeen = 0;
1110   int offset = 0;
1111 
1112   // Skip over any padding (0's).
1113   while (offset < buffer_size && buffer[offset] == 0)
1114     ++offset;
1115 
1116   while (offset + 3 < buffer_size) {
1117     int framesize;
1118     RCHECK(ValidMpegAudioFrameHeader(
1119         buffer + offset, buffer_size - offset, &framesize));
1120 
1121     // Have we seen enough valid headers?
1122     if (++numSeen > 10)
1123       return true;
1124     offset += framesize;
1125   }
1126   // Off the end of the buffer, return success if a few valid headers seen.
1127   return numSeen > 2;
1128 }
1129 
1130 // Check that the next characters in |buffer| represent a number. The format
1131 // accepted is optional whitespace followed by 1 or more digits. |max_digits|
1132 // specifies the maximum number of digits to process. Returns true if a valid
1133 // number is found, false otherwise.
VerifyNumber(const uint8_t * buffer,int buffer_size,int * offset,int max_digits)1134 static bool VerifyNumber(const uint8_t* buffer,
1135                          int buffer_size,
1136                          int* offset,
1137                          int max_digits) {
1138   RCHECK(*offset < buffer_size);
1139 
1140   // Skip over any leading space.
1141   while (isspace(buffer[*offset])) {
1142     ++(*offset);
1143     RCHECK(*offset < buffer_size);
1144   }
1145 
1146   // Need to process up to max_digits digits.
1147   int numSeen = 0;
1148   while (--max_digits >= 0 && isdigit(buffer[*offset])) {
1149     ++numSeen;
1150     ++(*offset);
1151     if (*offset >= buffer_size)
1152       return true;  // Out of space but seen a digit.
1153   }
1154 
1155   // Success if at least one digit seen.
1156   return (numSeen > 0);
1157 }
1158 
1159 // Check that the next character in |buffer| is one of |c1| or |c2|. |c2| is
1160 // optional. Returns true if there is a match, false if no match or out of
1161 // space.
VerifyCharacters(const uint8_t * buffer,int buffer_size,int * offset,char c1,char c2)1162 static inline bool VerifyCharacters(const uint8_t* buffer,
1163                                     int buffer_size,
1164                                     int* offset,
1165                                     char c1,
1166                                     char c2) {
1167   RCHECK(*offset < buffer_size);
1168   char c = static_cast<char>(buffer[(*offset)++]);
1169   return (c == c1 || (c == c2 && c2 != 0));
1170 }
1171 
1172 // Checks for a SRT container.
CheckSrt(const uint8_t * buffer,int buffer_size)1173 static bool CheckSrt(const uint8_t* buffer, int buffer_size) {
1174   // Reference: http://en.wikipedia.org/wiki/SubRip
1175   RCHECK(buffer_size > 20);
1176 
1177   // First line should just be the subtitle sequence number.
1178   int offset = StartsWith(buffer, buffer_size, UTF8_BYTE_ORDER_MARK) ? 3 : 0;
1179   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1180   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r'));
1181 
1182   // Skip any additional \n\r.
1183   while (VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r')) {}
1184   --offset;  // Since VerifyCharacters() gobbled up the next non-CR/LF.
1185 
1186   // Second line should look like the following:
1187   //   00:00:10,500 --> 00:00:13,000
1188   // Units separator can be , or .
1189   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1190   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1191   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1192   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1193   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1194   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1195   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1196   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1197   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1198   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1199   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '>', 0));
1200   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1201   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1202   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1203   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1204   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1205   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1206   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1207   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1208   return true;
1209 }
1210 
1211 // Read a Matroska Element Id.
GetElementId(BitReader * reader)1212 static int GetElementId(BitReader* reader) {
1213   // Element ID is coded with the leading zero bits (max 3) determining size.
1214   // If it is an invalid encoding or the end of the buffer is reached,
1215   // return -1 as a tag that won't be expected.
1216   if (reader->bits_available() >= 8) {
1217     int num_bits_to_read = 0;
1218     static int prefix[] = { 0x80, 0x4000, 0x200000, 0x10000000 };
1219     for (int i = 0; i < 4; ++i) {
1220       num_bits_to_read += 7;
1221       if (ReadBits(reader, 1) == 1) {
1222         if (reader->bits_available() < num_bits_to_read)
1223           break;
1224         // prefix[] adds back the bits read individually.
1225         return ReadBits(reader, num_bits_to_read) | prefix[i];
1226       }
1227     }
1228   }
1229   // Invalid encoding, return something not expected.
1230   return -1;
1231 }
1232 
1233 // Read a Matroska Unsigned Integer (VINT).
GetVint(BitReader * reader)1234 static uint64_t GetVint(BitReader* reader) {
1235   // Values are coded with the leading zero bits (max 7) determining size.
1236   // If it is an invalid coding or the end of the buffer is reached,
1237   // return something that will go off the end of the buffer.
1238   if (reader->bits_available() >= 8) {
1239     int num_bits_to_read = 0;
1240     for (int i = 0; i < 8; ++i) {
1241       num_bits_to_read += 7;
1242       if (ReadBits(reader, 1) == 1) {
1243         if (reader->bits_available() < num_bits_to_read)
1244           break;
1245         return ReadBits(reader, num_bits_to_read);
1246       }
1247     }
1248   }
1249   // Incorrect format (more than 7 leading 0's) or off the end of the buffer.
1250   // Since the return value is used as a byte size, return a value that will
1251   // cause a failure when used.
1252   return (reader->bits_available() / 8) + 2;
1253 }
1254 
1255 // Additional checks for a WEBM container.
CheckWebm(const uint8_t * buffer,int buffer_size)1256 static bool CheckWebm(const uint8_t* buffer, int buffer_size) {
1257   // Reference: http://www.matroska.org/technical/specs/index.html
1258   RCHECK(buffer_size > 12);
1259 
1260   BitReader reader(buffer, buffer_size);
1261 
1262   // Verify starting Element Id.
1263   RCHECK(GetElementId(&reader) == 0x1a45dfa3);
1264 
1265   // Get the header size, and ensure there are enough bits to check.
1266   // Using saturated_cast<> in case the size read is really large
1267   // (in which case the bits_available() check will fail).
1268   int header_size = base::saturated_cast<int>(GetVint(&reader));
1269   RCHECK(reader.bits_available() / 8 >= header_size);
1270 
1271   // Loop through the header.
1272   while (reader.bits_available() > 0) {
1273     int tag = GetElementId(&reader);
1274     int tagsize = base::saturated_cast<int>(GetVint(&reader));
1275     switch (tag) {
1276       case 0x4286:  // EBMLVersion
1277       case 0x42f7:  // EBMLReadVersion
1278       case 0x42f2:  // EBMLMaxIdLength
1279       case 0x42f3:  // EBMLMaxSizeLength
1280       case 0x4287:  // DocTypeVersion
1281       case 0x4285:  // DocTypeReadVersion
1282       case 0xec:    // void
1283       case 0xbf:    // CRC32
1284         RCHECK(reader.bits_available() / 8 >= tagsize);
1285         RCHECK(reader.SkipBits(tagsize * 8));
1286         break;
1287 
1288       case 0x4282:  // EBMLDocType
1289         // Need to see "webm" or "matroska" next.
1290         RCHECK(reader.bits_available() >= 32);
1291         switch (ReadBits(&reader, 32)) {
1292           case TAG('w', 'e', 'b', 'm') :
1293             return true;
1294           case TAG('m', 'a', 't', 'r') :
1295             RCHECK(reader.bits_available() >= 32);
1296             return (ReadBits(&reader, 32) == TAG('o', 's', 'k', 'a'));
1297         }
1298         return false;
1299 
1300       default:  // Unrecognized tag
1301         return false;
1302     }
1303   }
1304   return false;
1305 }
1306 
1307 enum VC1StartCodes {
1308   VC1_FRAME_START_CODE = 0x0d,
1309   VC1_ENTRY_POINT_START_CODE = 0x0e,
1310   VC1_SEQUENCE_START_CODE = 0x0f
1311 };
1312 
1313 // Checks for a VC1 bitstream container.
CheckVC1(const uint8_t * buffer,int buffer_size)1314 static bool CheckVC1(const uint8_t* buffer, int buffer_size) {
1315   // Reference: SMPTE 421M
1316   // (http://standards.smpte.org/content/978-1-61482-555-5/st-421-2006/SEC1.body.pdf)
1317   // However, no length ... simply scan for start code values.
1318   // Expect to see SEQ | [ [ ENTRY ] PIC* ]*
1319   // Note tags are very similar to H.264.
1320 
1321   RCHECK(buffer_size >= 24);
1322 
1323   // First check for Bitstream Metadata Serialization (Annex L)
1324   if (buffer[0] == 0xc5 &&
1325       Read32(buffer + 4) == 0x04 &&
1326       Read32(buffer + 20) == 0x0c) {
1327     // Verify settings in STRUCT_C and STRUCT_A
1328     BitReader reader(buffer + 8, 12);
1329 
1330     int profile = ReadBits(&reader, 4);
1331     if (profile == 0 || profile == 4) {  // simple or main
1332       // Skip FRMRTQ_POSTPROC, BITRTQ_POSTPROC, and LOOPFILTER.
1333       reader.SkipBits(3 + 5 + 1);
1334 
1335       // Next bit must be 0.
1336       RCHECK(ReadBits(&reader, 1) == 0);
1337 
1338       // Skip MULTIRES.
1339       reader.SkipBits(1);
1340 
1341       // Next bit must be 1.
1342       RCHECK(ReadBits(&reader, 1) == 1);
1343 
1344       // Skip FASTUVMC, EXTENDED_MV, DQUANT, and VSTRANSFORM.
1345       reader.SkipBits(1 + 1 + 2 + 1);
1346 
1347       // Next bit must be 0.
1348       RCHECK(ReadBits(&reader, 1) == 0);
1349 
1350       // Skip OVERLAP, SYNCMARKER, RANGERED, MAXBFRAMES, QUANTIZER, and
1351       // FINTERPFLAG.
1352       reader.SkipBits(1 + 1 + 1 + 3 + 2 + 1);
1353 
1354       // Next bit must be 1.
1355       RCHECK(ReadBits(&reader, 1) == 1);
1356 
1357     } else {
1358       RCHECK(profile == 12);  // Other profile values not allowed.
1359       RCHECK(ReadBits(&reader, 28) == 0);
1360     }
1361 
1362     // Now check HORIZ_SIZE and VERT_SIZE, which must be 8192 or less.
1363     RCHECK(ReadBits(&reader, 32) <= 8192);
1364     RCHECK(ReadBits(&reader, 32) <= 8192);
1365     return true;
1366   }
1367 
1368   // Buffer isn't Bitstream Metadata, so scan for start codes.
1369   int offset = 0;
1370   int sequence_start_code = 0;
1371   int frame_start_code = 0;
1372   while (true) {
1373     // Advance to start_code, if there is one.
1374     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 5, 24, 1)) {
1375       // Not a complete sequence in memory, so return true if we've seen a
1376       // sequence start and a frame start (not checking entry points since
1377       // they only occur in advanced profiles).
1378       return (sequence_start_code > 0 && frame_start_code > 0);
1379     }
1380 
1381     // Now verify the block. AdvanceToStartCode() made sure that there are
1382     // at least 5 bytes remaining in the buffer.
1383     BitReader reader(buffer + offset, 5);
1384     RCHECK(ReadBits(&reader, 24) == 1);
1385 
1386     // Keep track of the number of certain types received.
1387     switch (ReadBits(&reader, 8)) {
1388       case VC1_SEQUENCE_START_CODE: {
1389         ++sequence_start_code;
1390         switch (ReadBits(&reader, 2)) {
1391           case 0:  // simple
1392           case 1:  // main
1393             RCHECK(ReadBits(&reader, 2) == 0);
1394             break;
1395           case 2:  // complex
1396             return false;
1397           case 3:  // advanced
1398             RCHECK(ReadBits(&reader, 3) <= 4);  // Verify level = 0..4
1399             RCHECK(ReadBits(&reader, 2) == 1);  // Verify colordiff_format = 1
1400             break;
1401         }
1402         break;
1403       }
1404 
1405       case VC1_ENTRY_POINT_START_CODE:
1406         // No fields in entry data to check. However, it must occur after
1407         // sequence header.
1408         RCHECK(sequence_start_code > 0);
1409         break;
1410 
1411       case VC1_FRAME_START_CODE:
1412         ++frame_start_code;
1413         break;
1414     }
1415     offset += 5;
1416   }
1417 }
1418 
1419 // For some formats the signature is a bunch of characters. They are defined
1420 // below. Note that the first 4 characters of the string may be used as a TAG
1421 // in LookupContainerByFirst4. For signatures that contain embedded \0, use
1422 // uint8_t[].
1423 static const char kAmrSignature[] = "#!AMR";
1424 static const uint8_t kAsfSignature[] = {0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66,
1425                                         0xcf, 0x11, 0xa6, 0xd9, 0x00, 0xaa,
1426                                         0x00, 0x62, 0xce, 0x6c};
1427 static const char kAssSignature[] = "[Script Info]";
1428 static const char kAssBomSignature[] = UTF8_BYTE_ORDER_MARK "[Script Info]";
1429 static const uint8_t kWtvSignature[] = {0xb7, 0xd8, 0x00, 0x20, 0x37, 0x49,
1430                                         0xda, 0x11, 0xa6, 0x4e, 0x00, 0x07,
1431                                         0xe9, 0x5e, 0xad, 0x8d};
1432 
1433 // Attempt to determine the container type from the buffer provided. This is
1434 // a simple pass, that uses the first 4 bytes of the buffer as an index to get
1435 // a rough idea of the container format.
LookupContainerByFirst4(const uint8_t * buffer,int buffer_size)1436 static MediaContainerName LookupContainerByFirst4(const uint8_t* buffer,
1437                                                   int buffer_size) {
1438   // Minimum size that the code expects to exist without checking size.
1439   if (buffer_size < kMinimumContainerSize)
1440     return CONTAINER_UNKNOWN;
1441 
1442   uint32_t first4 = Read32(buffer);
1443   switch (first4) {
1444     case 0x1a45dfa3:
1445       if (CheckWebm(buffer, buffer_size))
1446         return CONTAINER_WEBM;
1447       break;
1448 
1449     case 0x3026b275:
1450       if (StartsWith(buffer,
1451                      buffer_size,
1452                      kAsfSignature,
1453                      sizeof(kAsfSignature))) {
1454         return CONTAINER_ASF;
1455       }
1456       break;
1457 
1458     case TAG('#','!','A','M'):
1459       if (StartsWith(buffer, buffer_size, kAmrSignature))
1460         return CONTAINER_AMR;
1461       break;
1462 
1463     case TAG('#','E','X','T'):
1464       if (CheckHls(buffer, buffer_size))
1465         return CONTAINER_HLS;
1466       break;
1467 
1468     case TAG('.','R','M','F'):
1469       if (buffer[4] == 0 && buffer[5] == 0)
1470         return CONTAINER_RM;
1471       break;
1472 
1473     case TAG('.','r','a','\xfd'):
1474       return CONTAINER_RM;
1475 
1476     case TAG('B','I','K','b'):
1477     case TAG('B','I','K','d'):
1478     case TAG('B','I','K','f'):
1479     case TAG('B','I','K','g'):
1480     case TAG('B','I','K','h'):
1481     case TAG('B','I','K','i'):
1482       if (CheckBink(buffer, buffer_size))
1483         return CONTAINER_BINK;
1484       break;
1485 
1486     case TAG('c','a','f','f'):
1487       if (CheckCaf(buffer, buffer_size))
1488         return CONTAINER_CAF;
1489       break;
1490 
1491     case TAG('D','E','X','A'):
1492       if (buffer_size > 15 &&
1493           Read16(buffer + 11) <= 2048 &&
1494           Read16(buffer + 13) <= 2048) {
1495         return CONTAINER_DXA;
1496       }
1497       break;
1498 
1499     case TAG('D','T','S','H'):
1500       if (Read32(buffer + 4) == TAG('D','H','D','R'))
1501         return CONTAINER_DTSHD;
1502       break;
1503 
1504     case 0x64a30100:
1505     case 0x64a30200:
1506     case 0x64a30300:
1507     case 0x64a30400:
1508     case 0x0001a364:
1509     case 0x0002a364:
1510     case 0x0003a364:
1511       if (Read32(buffer + 4) != 0 && Read32(buffer + 8) != 0)
1512         return CONTAINER_IRCAM;
1513       break;
1514 
1515     case TAG('f','L','a','C'):
1516       return CONTAINER_FLAC;
1517 
1518     case TAG('F','L','V',0):
1519     case TAG('F','L','V',1):
1520     case TAG('F','L','V',2):
1521     case TAG('F','L','V',3):
1522     case TAG('F','L','V',4):
1523       if (buffer[5] == 0 && Read32(buffer + 5) > 8)
1524         return CONTAINER_FLV;
1525       break;
1526 
1527     case TAG('F','O','R','M'):
1528       switch (Read32(buffer + 8)) {
1529         case TAG('A','I','F','F'):
1530         case TAG('A','I','F','C'):
1531           return CONTAINER_AIFF;
1532       }
1533       break;
1534 
1535     case TAG('M','A','C',' '):
1536       return CONTAINER_APE;
1537 
1538     case TAG('O','N','2',' '):
1539       if (Read32(buffer + 8) == TAG('O','N','2','f'))
1540         return CONTAINER_AVI;
1541       break;
1542 
1543     case TAG('O','g','g','S'):
1544       if (buffer[5] <= 7)
1545         return CONTAINER_OGG;
1546       break;
1547 
1548     case TAG('R','F','6','4'):
1549       if (buffer_size > 16 && Read32(buffer + 12) == TAG('d','s','6','4'))
1550         return CONTAINER_WAV;
1551       break;
1552 
1553     case TAG('R','I','F','F'):
1554       switch (Read32(buffer + 8)) {
1555         case TAG('A','V','I',' '):
1556         case TAG('A','V','I','X'):
1557         case TAG('A','V','I','\x19'):
1558         case TAG('A','M','V',' '):
1559           return CONTAINER_AVI;
1560         case TAG('W','A','V','E'):
1561           return CONTAINER_WAV;
1562       }
1563       break;
1564 
1565     case TAG('[','S','c','r'):
1566       if (StartsWith(buffer, buffer_size, kAssSignature))
1567         return CONTAINER_ASS;
1568       break;
1569 
1570     case TAG('\xef','\xbb','\xbf','['):
1571       if (StartsWith(buffer, buffer_size, kAssBomSignature))
1572         return CONTAINER_ASS;
1573       break;
1574 
1575     case 0x7ffe8001:
1576     case 0xfe7f0180:
1577     case 0x1fffe800:
1578     case 0xff1f00e8:
1579       if (CheckDts(buffer, buffer_size))
1580         return CONTAINER_DTS;
1581       break;
1582 
1583     case 0xb7d80020:
1584       if (StartsWith(buffer,
1585                      buffer_size,
1586                      kWtvSignature,
1587                      sizeof(kWtvSignature))) {
1588         return CONTAINER_WTV;
1589       }
1590       break;
1591   }
1592 
1593   // Now try a few different ones that look at something other
1594   // than the first 4 bytes.
1595   uint32_t first3 = first4 & 0xffffff00;
1596   switch (first3) {
1597     case TAG('C','W','S',0):
1598     case TAG('F','W','S',0):
1599       return CONTAINER_SWF;
1600 
1601     case TAG('I','D','3',0):
1602       return CONTAINER_MP3;
1603   }
1604 
1605   // Maybe the first 2 characters are something we can use.
1606   uint32_t first2 = Read16(buffer);
1607   switch (first2) {
1608     case kAc3SyncWord:
1609       if (CheckAc3(buffer, buffer_size))
1610         return CONTAINER_AC3;
1611       if (CheckEac3(buffer, buffer_size))
1612         return CONTAINER_EAC3;
1613       break;
1614 
1615     case 0xfff0:
1616     case 0xfff1:
1617     case 0xfff8:
1618     case 0xfff9:
1619       if (CheckAac(buffer, buffer_size))
1620         return CONTAINER_AAC;
1621       break;
1622   }
1623 
1624   // Check if the file is in MP3 format without the ID3 header.
1625   if (CheckMp3(buffer, buffer_size))
1626     return CONTAINER_MP3;
1627 
1628   return CONTAINER_UNKNOWN;
1629 }
1630 
1631 // Attempt to determine the container name from the buffer provided.
DetermineContainer(const uint8_t * buffer,int buffer_size)1632 MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) {
1633   DCHECK(buffer);
1634 
1635   // Since MOV/QuickTime/MPEG4 streams are common, check for them first.
1636   if (CheckMov(buffer, buffer_size))
1637     return CONTAINER_MOV;
1638 
1639   // Next attempt the simple checks, that typically look at just the
1640   // first few bytes of the file.
1641   MediaContainerName result = LookupContainerByFirst4(buffer, buffer_size);
1642   if (result != CONTAINER_UNKNOWN)
1643     return result;
1644 
1645   // Additional checks that may scan a portion of the buffer.
1646   if (CheckMpeg2ProgramStream(buffer, buffer_size))
1647     return CONTAINER_MPEG2PS;
1648   if (CheckMpeg2TransportStream(buffer, buffer_size))
1649     return CONTAINER_MPEG2TS;
1650   if (CheckMJpeg(buffer, buffer_size))
1651     return CONTAINER_MJPEG;
1652   if (CheckDV(buffer, buffer_size))
1653     return CONTAINER_DV;
1654   if (CheckH261(buffer, buffer_size))
1655     return CONTAINER_H261;
1656   if (CheckH263(buffer, buffer_size))
1657     return CONTAINER_H263;
1658   if (CheckH264(buffer, buffer_size))
1659     return CONTAINER_H264;
1660   if (CheckMpeg4BitStream(buffer, buffer_size))
1661     return CONTAINER_MPEG4BS;
1662   if (CheckVC1(buffer, buffer_size))
1663     return CONTAINER_VC1;
1664   if (CheckSrt(buffer, buffer_size))
1665     return CONTAINER_SRT;
1666   if (CheckGsm(buffer, buffer_size))
1667     return CONTAINER_GSM;
1668 
1669   // AC3/EAC3 might not start at the beginning of the stream,
1670   // so scan for a start code.
1671   int offset = 1;  // No need to start at byte 0 due to First4 check.
1672   if (AdvanceToStartCode(buffer, buffer_size, &offset, 4, 16, kAc3SyncWord)) {
1673     if (CheckAc3(buffer + offset, buffer_size - offset))
1674       return CONTAINER_AC3;
1675     if (CheckEac3(buffer + offset, buffer_size - offset))
1676       return CONTAINER_EAC3;
1677   }
1678 
1679   return CONTAINER_UNKNOWN;
1680 }
1681 
1682 }  // namespace container_names
1683 
1684 }  // namespace media
1685