1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "media/base/container_names.h"
6 
7 #include <stddef.h>
8 
9 #include <cctype>
10 #include <limits>
11 
12 #include "base/logging.h"
13 #include "base/numerics/safe_conversions.h"
14 #include "base/stl_util.h"
15 #include "media/base/bit_reader.h"
16 
17 namespace media {
18 
19 namespace container_names {
20 
21 #define TAG(a, b, c, d)                                     \
22   ((static_cast<uint32_t>(static_cast<uint8_t>(a)) << 24) | \
23    (static_cast<uint32_t>(static_cast<uint8_t>(b)) << 16) | \
24    (static_cast<uint32_t>(static_cast<uint8_t>(c)) << 8) |  \
25    (static_cast<uint32_t>(static_cast<uint8_t>(d))))
26 
27 #define RCHECK(x)     \
28     do {              \
29       if (!(x))       \
30         return false; \
31     } while (0)
32 
33 #define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
34 
35 // Helper function to read 2 bytes (16 bits, big endian) from a buffer.
Read16(const uint8_t * p)36 static int Read16(const uint8_t* p) {
37   return p[0] << 8 | p[1];
38 }
39 
40 // Helper function to read 3 bytes (24 bits, big endian) from a buffer.
Read24(const uint8_t * p)41 static uint32_t Read24(const uint8_t* p) {
42   return p[0] << 16 | p[1] << 8 | p[2];
43 }
44 
45 // Helper function to read 4 bytes (32 bits, big endian) from a buffer.
Read32(const uint8_t * p)46 static uint32_t Read32(const uint8_t* p) {
47   return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
48 }
49 
50 // Helper function to read 4 bytes (32 bits, little endian) from a buffer.
Read32LE(const uint8_t * p)51 static uint32_t Read32LE(const uint8_t* p) {
52   return p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
53 }
54 
55 // Helper function to do buffer comparisons with a string without going off the
56 // end of the buffer.
StartsWith(const uint8_t * buffer,size_t buffer_size,const char * prefix)57 static bool StartsWith(const uint8_t* buffer,
58                        size_t buffer_size,
59                        const char* prefix) {
60   size_t prefix_size = strlen(prefix);
61   return (prefix_size <= buffer_size &&
62           memcmp(buffer, prefix, prefix_size) == 0);
63 }
64 
65 // Helper function to do buffer comparisons with another buffer (to allow for
66 // embedded \0 in the comparison) without going off the end of the buffer.
StartsWith(const uint8_t * buffer,size_t buffer_size,const uint8_t * prefix,size_t prefix_size)67 static bool StartsWith(const uint8_t* buffer,
68                        size_t buffer_size,
69                        const uint8_t* prefix,
70                        size_t prefix_size) {
71   return (prefix_size <= buffer_size &&
72           memcmp(buffer, prefix, prefix_size) == 0);
73 }
74 
75 // Helper function to read up to 64 bits from a bit stream.
76 // TODO(chcunningham): Delete this helper and replace with direct calls to
77 // reader that handle read failure. As-is, we hide failure because returning 0
78 // is valid for both a successful and failed read.
ReadBits(BitReader * reader,int num_bits)79 static uint64_t ReadBits(BitReader* reader, int num_bits) {
80   DCHECK_GE(reader->bits_available(), num_bits);
81   DCHECK((num_bits > 0) && (num_bits <= 64));
82   uint64_t value = 0;
83 
84   if (!reader->ReadBits(num_bits, &value))
85     return 0;
86 
87   return value;
88 }
89 
90 const int kAc3FrameSizeTable[38][3] = {
91   { 128, 138, 192 }, { 128, 140, 192 }, { 160, 174, 240 }, { 160, 176, 240 },
92   { 192, 208, 288 }, { 192, 210, 288 }, { 224, 242, 336 }, { 224, 244, 336 },
93   { 256, 278, 384 }, { 256, 280, 384 }, { 320, 348, 480 }, { 320, 350, 480 },
94   { 384, 416, 576 }, { 384, 418, 576 }, { 448, 486, 672 }, { 448, 488, 672 },
95   { 512, 556, 768 }, { 512, 558, 768 }, { 640, 696, 960 }, { 640, 698, 960 },
96   { 768, 834, 1152 }, { 768, 836, 1152 }, { 896, 974, 1344 },
97   { 896, 976, 1344 }, { 1024, 1114, 1536 }, { 1024, 1116, 1536 },
98   { 1280, 1392, 1920 }, { 1280, 1394, 1920 }, { 1536, 1670, 2304 },
99   { 1536, 1672, 2304 }, { 1792, 1950, 2688 }, { 1792, 1952, 2688 },
100   { 2048, 2228, 3072 }, { 2048, 2230, 3072 }, { 2304, 2506, 3456 },
101   { 2304, 2508, 3456 }, { 2560, 2768, 3840 }, { 2560, 2770, 3840 }
102 };
103 
104 // Checks for an ADTS AAC container.
CheckAac(const uint8_t * buffer,int buffer_size)105 static bool CheckAac(const uint8_t* buffer, int buffer_size) {
106   // Audio Data Transport Stream (ADTS) header is 7 or 9 bytes
107   // (from http://wiki.multimedia.cx/index.php?title=ADTS)
108   RCHECK(buffer_size > 6);
109 
110   int offset = 0;
111   while (offset + 6 < buffer_size) {
112     BitReader reader(buffer + offset, 6);
113 
114     // Syncword must be 0xfff.
115     RCHECK(ReadBits(&reader, 12) == 0xfff);
116 
117     // Skip MPEG version.
118     reader.SkipBits(1);
119 
120     // Layer is always 0.
121     RCHECK(ReadBits(&reader, 2) == 0);
122 
123     // Skip protection + profile.
124     reader.SkipBits(1 + 2);
125 
126     // Check sampling frequency index.
127     RCHECK(ReadBits(&reader, 4) != 15);  // Forbidden.
128 
129     // Skip private stream, channel configuration, originality, home,
130     // copyrighted stream, and copyright_start.
131     reader.SkipBits(1 + 3 + 1 + 1 + 1 + 1);
132 
133     // Get frame length (includes header).
134     int size = ReadBits(&reader, 13);
135     RCHECK(size > 0);
136     offset += size;
137   }
138   return true;
139 }
140 
141 const uint16_t kAc3SyncWord = 0x0b77;
142 
143 // Checks for an AC3 container.
CheckAc3(const uint8_t * buffer,int buffer_size)144 static bool CheckAc3(const uint8_t* buffer, int buffer_size) {
145   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
146   //            Doc. A/52:2012
147   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
148 
149   // AC3 container looks like syncinfo | bsi | audblk * 6 | aux | check.
150   RCHECK(buffer_size > 6);
151 
152   int offset = 0;
153   while (offset + 6 < buffer_size) {
154     BitReader reader(buffer + offset, 6);
155 
156     // Check syncinfo.
157     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
158 
159     // Skip crc1.
160     reader.SkipBits(16);
161 
162     // Verify fscod.
163     int sample_rate_code = ReadBits(&reader, 2);
164     RCHECK(sample_rate_code != 3);  // Reserved.
165 
166     // Verify frmsizecod.
167     int frame_size_code = ReadBits(&reader, 6);
168     RCHECK(frame_size_code < 38);  // Undefined.
169 
170     // Verify bsid.
171     RCHECK(ReadBits(&reader, 5) < 10);  // Normally 8 or 6, 16 used by EAC3.
172 
173     offset += kAc3FrameSizeTable[frame_size_code][sample_rate_code];
174   }
175   return true;
176 }
177 
178 // Checks for an EAC3 container (very similar to AC3)
CheckEac3(const uint8_t * buffer,int buffer_size)179 static bool CheckEac3(const uint8_t* buffer, int buffer_size) {
180   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
181   //            Doc. A/52:2012
182   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
183 
184   // EAC3 container looks like syncinfo | bsi | audfrm | audblk* | aux | check.
185   RCHECK(buffer_size > 6);
186 
187   int offset = 0;
188   while (offset + 6 < buffer_size) {
189     BitReader reader(buffer + offset, 6);
190 
191     // Check syncinfo.
192     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
193 
194     // Verify strmtyp.
195     RCHECK(ReadBits(&reader, 2) != 3);
196 
197     // Skip substreamid.
198     reader.SkipBits(3);
199 
200     // Get frmsize. Include syncinfo size and convert to bytes.
201     int frame_size = (ReadBits(&reader, 11) + 1) * 2;
202     RCHECK(frame_size >= 7);
203 
204     // Skip fscod, fscod2, acmod, and lfeon.
205     reader.SkipBits(2 + 2 + 3 + 1);
206 
207     // Verify bsid.
208     int bit_stream_id = ReadBits(&reader, 5);
209     RCHECK(bit_stream_id >= 11 && bit_stream_id <= 16);
210 
211     offset += frame_size;
212   }
213   return true;
214 }
215 
216 // Additional checks for a BINK container.
CheckBink(const uint8_t * buffer,int buffer_size)217 static bool CheckBink(const uint8_t* buffer, int buffer_size) {
218   // Reference: http://wiki.multimedia.cx/index.php?title=Bink_Container
219   RCHECK(buffer_size >= 44);
220 
221   // Verify number of frames specified.
222   RCHECK(Read32LE(buffer + 8) > 0);
223 
224   // Verify width in range.
225   int width = Read32LE(buffer + 20);
226   RCHECK(width > 0 && width <= 32767);
227 
228   // Verify height in range.
229   int height = Read32LE(buffer + 24);
230   RCHECK(height > 0 && height <= 32767);
231 
232   // Verify frames per second specified.
233   RCHECK(Read32LE(buffer + 28) > 0);
234 
235   // Verify video frames per second specified.
236   RCHECK(Read32LE(buffer + 32) > 0);
237 
238   // Number of audio tracks must be 256 or less.
239   return (Read32LE(buffer + 40) <= 256);
240 }
241 
242 // Additional checks for a CAF container.
CheckCaf(const uint8_t * buffer,int buffer_size)243 static bool CheckCaf(const uint8_t* buffer, int buffer_size) {
244   // Reference: Apple Core Audio Format Specification 1.0
245   // (https://developer.apple.com/library/mac/#documentation/MusicAudio/Reference/CAFSpec/CAF_spec/CAF_spec.html)
246   RCHECK(buffer_size >= 52);
247   BitReader reader(buffer, buffer_size);
248 
249   // mFileType should be "caff".
250   RCHECK(ReadBits(&reader, 32) == TAG('c', 'a', 'f', 'f'));
251 
252   // mFileVersion should be 1.
253   RCHECK(ReadBits(&reader, 16) == 1);
254 
255   // Skip mFileFlags.
256   reader.SkipBits(16);
257 
258   // First chunk should be Audio Description chunk, size 32l.
259   RCHECK(ReadBits(&reader, 32) == TAG('d', 'e', 's', 'c'));
260   RCHECK(ReadBits(&reader, 64) == 32);
261 
262   // CAFAudioFormat.mSampleRate(float64) not 0
263   RCHECK(ReadBits(&reader, 64) != 0);
264 
265   // CAFAudioFormat.mFormatID not 0
266   RCHECK(ReadBits(&reader, 32) != 0);
267 
268   // Skip CAFAudioFormat.mBytesPerPacket and mFramesPerPacket.
269   reader.SkipBits(32 + 32);
270 
271   // CAFAudioFormat.mChannelsPerFrame not 0
272   RCHECK(ReadBits(&reader, 32) != 0);
273   return true;
274 }
275 
276 static bool kSamplingFrequencyValid[16] = { false, true, true, true, false,
277                                             false, true, true, true, false,
278                                             false, true, true, true, false,
279                                             false };
280 static bool kExtAudioIdValid[8] = { true, false, true, false, false, false,
281                                     true, false };
282 
283 // Additional checks for a DTS container.
CheckDts(const uint8_t * buffer,int buffer_size)284 static bool CheckDts(const uint8_t* buffer, int buffer_size) {
285   // Reference: ETSI TS 102 114 V1.3.1 (2011-08)
286   // (http://www.etsi.org/deliver/etsi_ts/102100_102199/102114/01.03.01_60/ts_102114v010301p.pdf)
287   RCHECK(buffer_size > 11);
288 
289   int offset = 0;
290   while (offset + 11 < buffer_size) {
291     BitReader reader(buffer + offset, 11);
292 
293     // Verify sync word.
294     RCHECK(ReadBits(&reader, 32) == 0x7ffe8001);
295 
296     // Skip frame type and deficit sample count.
297     reader.SkipBits(1 + 5);
298 
299     // Verify CRC present flag.
300     RCHECK(ReadBits(&reader, 1) == 0);  // CPF must be 0.
301 
302     // Verify number of PCM sample blocks.
303     RCHECK(ReadBits(&reader, 7) >= 5);
304 
305     // Verify primary frame byte size.
306     int frame_size = ReadBits(&reader, 14);
307     RCHECK(frame_size >= 95);
308 
309     // Skip audio channel arrangement.
310     reader.SkipBits(6);
311 
312     // Verify core audio sampling frequency is an allowed value.
313     size_t sampling_freq_index = ReadBits(&reader, 4);
314     RCHECK(sampling_freq_index < base::size(kSamplingFrequencyValid));
315     RCHECK(kSamplingFrequencyValid[sampling_freq_index]);
316 
317     // Verify transmission bit rate is valid.
318     RCHECK(ReadBits(&reader, 5) <= 25);
319 
320     // Verify reserved field is 0.
321     RCHECK(ReadBits(&reader, 1) == 0);
322 
323     // Skip dynamic range flag, time stamp flag, auxiliary data flag, and HDCD.
324     reader.SkipBits(1 + 1 + 1 + 1);
325 
326     // Verify extension audio descriptor flag is an allowed value.
327     size_t audio_id_index = ReadBits(&reader, 3);
328     RCHECK(audio_id_index < base::size(kExtAudioIdValid));
329     RCHECK(kExtAudioIdValid[audio_id_index]);
330 
331     // Skip extended coding flag and audio sync word insertion flag.
332     reader.SkipBits(1 + 1);
333 
334     // Verify low frequency effects flag is an allowed value.
335     RCHECK(ReadBits(&reader, 2) != 3);
336 
337     offset += frame_size + 1;
338   }
339   return true;
340 }
341 
342 // Checks for a DV container.
CheckDV(const uint8_t * buffer,int buffer_size)343 static bool CheckDV(const uint8_t* buffer, int buffer_size) {
344   // Reference: SMPTE 314M (Annex A has differences with IEC 61834).
345   // (http://standards.smpte.org/content/978-1-61482-454-1/st-314-2005/SEC1.body.pdf)
346   RCHECK(buffer_size > 11);
347 
348   int offset = 0;
349   int current_sequence_number = -1;
350   int last_block_number[6] = {0};
351   while (offset + 11 < buffer_size) {
352     BitReader reader(buffer + offset, 11);
353 
354     // Decode ID data. Sections 5, 6, and 7 are reserved.
355     int section = ReadBits(&reader, 3);
356     RCHECK(section < 5);
357 
358     // Next bit must be 1.
359     RCHECK(ReadBits(&reader, 1) == 1);
360 
361     // Skip arbitrary bits.
362     reader.SkipBits(4);
363 
364     int sequence_number = ReadBits(&reader, 4);
365 
366     // Skip FSC.
367     reader.SkipBits(1);
368 
369     // Next 3 bits must be 1.
370     RCHECK(ReadBits(&reader, 3) == 7);
371 
372     int block_number = ReadBits(&reader, 8);
373 
374     if (section == 0) {  // Header.
375       // Validate the reserved bits in the next 8 bytes.
376       reader.SkipBits(1);
377       RCHECK(ReadBits(&reader, 1) == 0);
378       RCHECK(ReadBits(&reader, 11) == 0x7ff);
379       reader.SkipBits(4);
380       RCHECK(ReadBits(&reader, 4) == 0xf);
381       reader.SkipBits(4);
382       RCHECK(ReadBits(&reader, 4) == 0xf);
383       reader.SkipBits(4);
384       RCHECK(ReadBits(&reader, 4) == 0xf);
385       reader.SkipBits(3);
386       RCHECK(ReadBits(&reader, 24) == 0xffffff);
387       current_sequence_number = sequence_number;
388       for (size_t i = 0; i < base::size(last_block_number); ++i)
389         last_block_number[i] = -1;
390     } else {
391       // Sequence number must match (this will also fail if no header seen).
392       RCHECK(sequence_number == current_sequence_number);
393       // Block number should be increasing.
394       RCHECK(block_number > last_block_number[section]);
395       last_block_number[section] = block_number;
396     }
397 
398     // Move to next block.
399     offset += 80;
400   }
401   return true;
402 }
403 
404 
405 // Checks for a GSM container.
CheckGsm(const uint8_t * buffer,int buffer_size)406 static bool CheckGsm(const uint8_t* buffer, int buffer_size) {
407   // Reference: ETSI EN 300 961 V8.1.1
408   // (http://www.etsi.org/deliver/etsi_en/300900_300999/300961/08.01.01_60/en_300961v080101p.pdf)
409   // also http://tools.ietf.org/html/rfc3551#page-24
410   // GSM files have a 33 byte block, only first 4 bits are fixed.
411   RCHECK(buffer_size >= 1024);  // Need enough data to do a decent check.
412 
413   int offset = 0;
414   while (offset < buffer_size) {
415     // First 4 bits of each block are xD.
416     RCHECK((buffer[offset] & 0xf0) == 0xd0);
417     offset += 33;
418   }
419   return true;
420 }
421 
422 // Advance to the first set of |num_bits| bits that match |start_code|. |offset|
423 // is the current location in the buffer, and is updated. |bytes_needed| is the
424 // number of bytes that must remain in the buffer when |start_code| is found.
425 // Returns true if start_code found (and enough space in the buffer after it),
426 // false otherwise.
AdvanceToStartCode(const uint8_t * buffer,int buffer_size,int * offset,int bytes_needed,int num_bits,uint32_t start_code)427 static bool AdvanceToStartCode(const uint8_t* buffer,
428                                int buffer_size,
429                                int* offset,
430                                int bytes_needed,
431                                int num_bits,
432                                uint32_t start_code) {
433   DCHECK_GE(bytes_needed, 3);
434   DCHECK_LE(num_bits, 24);  // Only supports up to 24 bits.
435 
436   // Create a mask to isolate |num_bits| bits, once shifted over.
437   uint32_t bits_to_shift = 24 - num_bits;
438   uint32_t mask = (1 << num_bits) - 1;
439   while (*offset + bytes_needed < buffer_size) {
440     uint32_t next = Read24(buffer + *offset);
441     if (((next >> bits_to_shift) & mask) == start_code)
442       return true;
443     ++(*offset);
444   }
445   return false;
446 }
447 
448 // Checks for an H.261 container.
CheckH261(const uint8_t * buffer,int buffer_size)449 static bool CheckH261(const uint8_t* buffer, int buffer_size) {
450   // Reference: ITU-T Recommendation H.261 (03/1993)
451   // (http://www.itu.int/rec/T-REC-H.261-199303-I/en)
452   RCHECK(buffer_size > 16);
453 
454   int offset = 0;
455   bool seen_start_code = false;
456   while (true) {
457     // Advance to picture_start_code, if there is one.
458     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 20, 0x10)) {
459       // No start code found (or off end of buffer), so success if
460       // there was at least one valid header.
461       return seen_start_code;
462     }
463 
464     // Now verify the block. AdvanceToStartCode() made sure that there are
465     // at least 4 bytes remaining in the buffer.
466     BitReader reader(buffer + offset, buffer_size - offset);
467     RCHECK(ReadBits(&reader, 20) == 0x10);
468 
469     // Skip the temporal reference and PTYPE.
470     reader.SkipBits(5 + 6);
471 
472     // Skip any extra insertion information. Since this is open-ended, if we run
473     // out of bits assume that the buffer is correctly formatted.
474     int extra = ReadBits(&reader, 1);
475     while (extra == 1) {
476       if (!reader.SkipBits(8))
477         return seen_start_code;
478       if (!reader.ReadBits(1, &extra))
479         return seen_start_code;
480     }
481 
482     // Next should be a Group of Blocks start code. Again, if we run out of
483     // bits, then assume that the buffer up to here is correct, and the buffer
484     // just happened to end in the middle of a header.
485     int next;
486     if (!reader.ReadBits(16, &next))
487       return seen_start_code;
488     RCHECK(next == 1);
489 
490     // Move to the next block.
491     seen_start_code = true;
492     offset += 4;
493   }
494 }
495 
496 // Checks for an H.263 container.
CheckH263(const uint8_t * buffer,int buffer_size)497 static bool CheckH263(const uint8_t* buffer, int buffer_size) {
498   // Reference: ITU-T Recommendation H.263 (01/2005)
499   // (http://www.itu.int/rec/T-REC-H.263-200501-I/en)
500   // header is PSC(22b) + TR(8b) + PTYPE(8+b).
501   RCHECK(buffer_size > 16);
502 
503   int offset = 0;
504   bool seen_start_code = false;
505   while (true) {
506     // Advance to picture_start_code, if there is one.
507     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 9, 22, 0x20)) {
508       // No start code found (or off end of buffer), so success if
509       // there was at least one valid header.
510       return seen_start_code;
511     }
512 
513     // Now verify the block. AdvanceToStartCode() made sure that there are
514     // at least 9 bytes remaining in the buffer.
515     BitReader reader(buffer + offset, 9);
516     RCHECK(ReadBits(&reader, 22) == 0x20);
517 
518     // Skip the temporal reference.
519     reader.SkipBits(8);
520 
521     // Verify that the first 2 bits of PTYPE are 10b.
522     RCHECK(ReadBits(&reader, 2) == 2);
523 
524     // Skip the split screen indicator, document camera indicator, and full
525     // picture freeze release.
526     reader.SkipBits(1 + 1 + 1);
527 
528     // Verify Source Format.
529     int format = ReadBits(&reader, 3);
530     RCHECK(format != 0 && format != 6);  // Forbidden or reserved.
531 
532     if (format == 7) {
533       // Verify full extended PTYPE.
534       int ufep = ReadBits(&reader, 3);
535       if (ufep == 1) {
536         // Verify the optional part of PLUSPTYPE.
537         format = ReadBits(&reader, 3);
538         RCHECK(format != 0 && format != 7);  // Reserved.
539         reader.SkipBits(11);
540         // Next 4 bits should be b1000.
541         RCHECK(ReadBits(&reader, 4) == 8);  // Not allowed.
542       } else {
543         RCHECK(ufep == 0);  // Only 0 and 1 allowed.
544       }
545 
546       // Verify picture type code is not a reserved value.
547       int picture_type_code = ReadBits(&reader, 3);
548       RCHECK(picture_type_code != 6 && picture_type_code != 7);  // Reserved.
549 
550       // Skip picture resampling mode, reduced resolution mode,
551       // and rounding type.
552       reader.SkipBits(1 + 1 + 1);
553 
554       // Next 3 bits should be b001.
555       RCHECK(ReadBits(&reader, 3) == 1);  // Not allowed.
556     }
557 
558     // Move to the next block.
559     seen_start_code = true;
560     offset += 9;
561   }
562 }
563 
564 // Checks for an H.264 container.
CheckH264(const uint8_t * buffer,int buffer_size)565 static bool CheckH264(const uint8_t* buffer, int buffer_size) {
566   // Reference: ITU-T Recommendation H.264 (01/2012)
567   // (http://www.itu.int/rec/T-REC-H.264)
568   // Section B.1: Byte stream NAL unit syntax and semantics.
569   RCHECK(buffer_size > 4);
570 
571   int offset = 0;
572   int parameter_count = 0;
573   while (true) {
574     // Advance to picture_start_code, if there is one.
575     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 24, 1)) {
576       // No start code found (or off end of buffer), so success if
577       // there was at least one valid header.
578       return parameter_count > 0;
579     }
580 
581     // Now verify the block. AdvanceToStartCode() made sure that there are
582     // at least 4 bytes remaining in the buffer.
583     BitReader reader(buffer + offset, 4);
584     RCHECK(ReadBits(&reader, 24) == 1);
585 
586     // Verify forbidden_zero_bit.
587     RCHECK(ReadBits(&reader, 1) == 0);
588 
589     // Extract nal_ref_idc and nal_unit_type.
590     int nal_ref_idc = ReadBits(&reader, 2);
591     int nal_unit_type = ReadBits(&reader, 5);
592 
593     switch (nal_unit_type) {
594       case 5:  // Coded slice of an IDR picture.
595         RCHECK(nal_ref_idc != 0);
596         break;
597       case 6:   // Supplemental enhancement information (SEI).
598       case 9:   // Access unit delimiter.
599       case 10:  // End of sequence.
600       case 11:  // End of stream.
601       case 12:  // Filler data.
602         RCHECK(nal_ref_idc == 0);
603         break;
604       case 7:  // Sequence parameter set.
605       case 8:  // Picture parameter set.
606         ++parameter_count;
607         break;
608     }
609 
610     // Skip the current start_code_prefix and move to the next.
611     offset += 4;
612   }
613 }
614 
615 static const char kHlsSignature[] = "#EXTM3U";
616 static const char kHls1[] = "#EXT-X-STREAM-INF:";
617 static const char kHls2[] = "#EXT-X-TARGETDURATION:";
618 static const char kHls3[] = "#EXT-X-MEDIA-SEQUENCE:";
619 
620 // Additional checks for a HLS container.
CheckHls(const uint8_t * buffer,int buffer_size)621 static bool CheckHls(const uint8_t* buffer, int buffer_size) {
622   // HLS is simply a play list used for Apple HTTP Live Streaming.
623   // Reference: Apple HTTP Live Streaming Overview
624   // (http://goo.gl/MIwxj)
625 
626   if (StartsWith(buffer, buffer_size, kHlsSignature)) {
627     // Need to find "#EXT-X-STREAM-INF:", "#EXT-X-TARGETDURATION:", or
628     // "#EXT-X-MEDIA-SEQUENCE:" somewhere in the buffer. Other playlists (like
629     // WinAmp) only have additional lines with #EXTINF
630     // (http://en.wikipedia.org/wiki/M3U).
631     int offset = strlen(kHlsSignature);
632     while (offset < buffer_size) {
633       if (buffer[offset] == '#') {
634         if (StartsWith(buffer + offset, buffer_size - offset, kHls1) ||
635             StartsWith(buffer + offset, buffer_size - offset, kHls2) ||
636             StartsWith(buffer + offset, buffer_size - offset, kHls3)) {
637           return true;
638         }
639       }
640       ++offset;
641     }
642   }
643   return false;
644 }
645 
646 // Checks for a MJPEG stream.
CheckMJpeg(const uint8_t * buffer,int buffer_size)647 static bool CheckMJpeg(const uint8_t* buffer, int buffer_size) {
648   // Reference: ISO/IEC 10918-1 : 1993(E), Annex B
649   // (http://www.w3.org/Graphics/JPEG/itu-t81.pdf)
650   RCHECK(buffer_size >= 16);
651 
652   int offset = 0;
653   int last_restart = -1;
654   int num_codes = 0;
655   while (offset + 5 < buffer_size) {
656     // Marker codes are always a two byte code with the first byte xFF.
657     RCHECK(buffer[offset] == 0xff);
658     uint8_t code = buffer[offset + 1];
659     RCHECK(code >= 0xc0 || code == 1);
660 
661     // Skip sequences of xFF.
662     if (code == 0xff) {
663       ++offset;
664       continue;
665     }
666 
667     // Success if the next marker code is EOI (end of image)
668     if (code == 0xd9)
669       return true;
670 
671     // Check remaining codes.
672     if (code == 0xd8 || code == 1) {
673       // SOI (start of image) / TEM (private use). No other data with header.
674       offset += 2;
675     } else if (code >= 0xd0 && code <= 0xd7) {
676       // RST (restart) codes must be in sequence. No other data with header.
677       int restart = code & 0x07;
678       if (last_restart >= 0)
679         RCHECK(restart == (last_restart + 1) % 8);
680       last_restart = restart;
681       offset += 2;
682     } else {
683       // All remaining marker codes are followed by a length of the header.
684       int length = Read16(buffer + offset + 2) + 2;
685 
686       // Special handling of SOS (start of scan) marker since the entropy
687       // coded data follows the SOS. Any xFF byte in the data block must be
688       // followed by x00 in the data.
689       if (code == 0xda) {
690         int number_components = buffer[offset + 4];
691         RCHECK(length == 8 + 2 * number_components);
692 
693         // Advance to the next marker.
694         offset += length;
695         while (offset + 2 < buffer_size) {
696           if (buffer[offset] == 0xff && buffer[offset + 1] != 0)
697             break;
698           ++offset;
699         }
700       } else {
701         // Skip over the marker data for the other marker codes.
702         offset += length;
703       }
704     }
705     ++num_codes;
706   }
707   return (num_codes > 1);
708 }
709 
710 enum Mpeg2StartCodes {
711   PROGRAM_END_CODE = 0xb9,
712   PACK_START_CODE = 0xba
713 };
714 
715 // Checks for a MPEG2 Program Stream.
CheckMpeg2ProgramStream(const uint8_t * buffer,int buffer_size)716 static bool CheckMpeg2ProgramStream(const uint8_t* buffer, int buffer_size) {
717   // Reference: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
718   RCHECK(buffer_size > 14);
719 
720   int offset = 0;
721   while (offset + 14 < buffer_size) {
722     BitReader reader(buffer + offset, 14);
723 
724     // Must start with pack_start_code.
725     RCHECK(ReadBits(&reader, 24) == 1);
726     RCHECK(ReadBits(&reader, 8) == PACK_START_CODE);
727 
728     // Determine MPEG version (MPEG1 has b0010, while MPEG2 has b01).
729     int mpeg_version = ReadBits(&reader, 2);
730     if (mpeg_version == 0) {
731       // MPEG1, 10 byte header
732       // Validate rest of version code
733       RCHECK(ReadBits(&reader, 2) == 2);
734     } else {
735       RCHECK(mpeg_version == 1);
736     }
737 
738     // Skip system_clock_reference_base [32..30].
739     reader.SkipBits(3);
740 
741     // Verify marker bit.
742     RCHECK(ReadBits(&reader, 1) == 1);
743 
744     // Skip system_clock_reference_base [29..15].
745     reader.SkipBits(15);
746 
747     // Verify next marker bit.
748     RCHECK(ReadBits(&reader, 1) == 1);
749 
750     // Skip system_clock_reference_base [14..0].
751     reader.SkipBits(15);
752 
753     // Verify next marker bit.
754     RCHECK(ReadBits(&reader, 1) == 1);
755 
756     if (mpeg_version == 0) {
757       // Verify second marker bit.
758       RCHECK(ReadBits(&reader, 1) == 1);
759 
760       // Skip mux_rate.
761       reader.SkipBits(22);
762 
763       // Verify next marker bit.
764       RCHECK(ReadBits(&reader, 1) == 1);
765 
766       // Update offset to be after this header.
767       offset += 12;
768     } else {
769       // Must be MPEG2.
770       // Skip program_mux_rate.
771       reader.SkipBits(22);
772 
773       // Verify pair of marker bits.
774       RCHECK(ReadBits(&reader, 2) == 3);
775 
776       // Skip reserved.
777       reader.SkipBits(5);
778 
779       // Update offset to be after this header.
780       int pack_stuffing_length = ReadBits(&reader, 3);
781       offset += 14 + pack_stuffing_length;
782     }
783 
784     // Check for system headers and PES_packets.
785     while (offset + 6 < buffer_size && Read24(buffer + offset) == 1) {
786       // Next 8 bits determine stream type.
787       int stream_id = buffer[offset + 3];
788 
789       // Some stream types are reserved and shouldn't occur.
790       if (mpeg_version == 0)
791         RCHECK(stream_id != 0xbc && stream_id < 0xf0);
792       else
793         RCHECK(stream_id != 0xfc && stream_id != 0xfd && stream_id != 0xfe);
794 
795       // Some stream types are used for pack headers.
796       if (stream_id == PACK_START_CODE)  // back to outer loop.
797         break;
798       if (stream_id == PROGRAM_END_CODE)  // end of stream.
799         return true;
800 
801       int pes_length = Read16(buffer + offset + 4);
802       RCHECK(pes_length > 0);
803       offset = offset + 6 + pes_length;
804     }
805   }
806   // Success as we are off the end of the buffer and liked everything
807   // in the buffer.
808   return true;
809 }
810 
811 const uint8_t kMpeg2SyncWord = 0x47;
812 
813 // Checks for a MPEG2 Transport Stream.
CheckMpeg2TransportStream(const uint8_t * buffer,int buffer_size)814 static bool CheckMpeg2TransportStream(const uint8_t* buffer, int buffer_size) {
815   // Spec: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
816   // Normal packet size is 188 bytes. However, some systems add various error
817   // correction data at the end, resulting in packet of length 192/204/208
818   // (https://en.wikipedia.org/wiki/MPEG_transport_stream). Determine the
819   // length with the first packet.
820   RCHECK(buffer_size >= 250);  // Want more than 1 packet to check.
821 
822   int offset = 0;
823   int packet_length = -1;
824   while (buffer[offset] != kMpeg2SyncWord && offset < 20) {
825     // Skip over any header in the first 20 bytes.
826     ++offset;
827   }
828 
829   while (offset + 6 < buffer_size) {
830     BitReader reader(buffer + offset, 6);
831 
832     // Must start with sync byte.
833     RCHECK(ReadBits(&reader, 8) == kMpeg2SyncWord);
834 
835     // Skip transport_error_indicator, payload_unit_start_indicator, and
836     // transport_priority.
837     reader.SkipBits(1 + 1 + 1);
838 
839     // Verify the pid is not a reserved value.
840     int pid = ReadBits(&reader, 13);
841     RCHECK(pid < 3 || pid > 15);
842 
843     // Skip transport_scrambling_control.
844     reader.SkipBits(2);
845 
846     // Adaptation_field_control can not be 0.
847     int adaptation_field_control = ReadBits(&reader, 2);
848     RCHECK(adaptation_field_control != 0);
849 
850     // If there is an adaptation_field, verify it.
851     if (adaptation_field_control >= 2) {
852       // Skip continuity_counter.
853       reader.SkipBits(4);
854 
855       // Get adaptation_field_length and verify it.
856       int adaptation_field_length = ReadBits(&reader, 8);
857       if (adaptation_field_control == 2)
858         RCHECK(adaptation_field_length == 183);
859       else
860         RCHECK(adaptation_field_length <= 182);
861     }
862 
863     // Attempt to determine the packet length on the first packet.
864     if (packet_length < 0) {
865       if (buffer[offset + 188] == kMpeg2SyncWord)
866         packet_length = 188;
867       else if (buffer[offset + 192] == kMpeg2SyncWord)
868         packet_length = 192;
869       else if (buffer[offset + 204] == kMpeg2SyncWord)
870         packet_length = 204;
871       else
872         packet_length = 208;
873     }
874     offset += packet_length;
875   }
876   return true;
877 }
878 
879 enum Mpeg4StartCodes {
880   VISUAL_OBJECT_SEQUENCE_START_CODE = 0xb0,
881   VISUAL_OBJECT_SEQUENCE_END_CODE = 0xb1,
882   VISUAL_OBJECT_START_CODE = 0xb5,
883   VOP_START_CODE = 0xb6
884 };
885 
886 // Checks for a raw MPEG4 bitstream container.
CheckMpeg4BitStream(const uint8_t * buffer,int buffer_size)887 static bool CheckMpeg4BitStream(const uint8_t* buffer, int buffer_size) {
888   // Defined in ISO/IEC 14496-2:2001.
889   // However, no length ... simply scan for start code values.
890   // Note tags are very similar to H.264.
891   RCHECK(buffer_size > 4);
892 
893   int offset = 0;
894   int sequence_start_count = 0;
895   int sequence_end_count = 0;
896   int visual_object_count = 0;
897   int vop_count = 0;
898   while (true) {
899     // Advance to start_code, if there is one.
900     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 6, 24, 1)) {
901       // Not a complete sequence in memory, so return true if we've seen a
902       // visual_object_sequence_start_code and a visual_object_start_code.
903       return (sequence_start_count > 0 && visual_object_count > 0);
904     }
905 
906     // Now verify the block. AdvanceToStartCode() made sure that there are
907     // at least 6 bytes remaining in the buffer.
908     BitReader reader(buffer + offset, 6);
909     RCHECK(ReadBits(&reader, 24) == 1);
910 
911     int start_code = ReadBits(&reader, 8);
912     RCHECK(start_code < 0x30 || start_code > 0xaf);  // 30..AF and
913     RCHECK(start_code < 0xb7 || start_code > 0xb9);  // B7..B9 reserved
914 
915     switch (start_code) {
916       case VISUAL_OBJECT_SEQUENCE_START_CODE: {
917         ++sequence_start_count;
918         // Verify profile in not one of many reserved values.
919         int profile = ReadBits(&reader, 8);
920         RCHECK(profile > 0);
921         RCHECK(profile < 0x04 || profile > 0x10);
922         RCHECK(profile < 0x13 || profile > 0x20);
923         RCHECK(profile < 0x23 || profile > 0x31);
924         RCHECK(profile < 0x35 || profile > 0x41);
925         RCHECK(profile < 0x43 || profile > 0x60);
926         RCHECK(profile < 0x65 || profile > 0x70);
927         RCHECK(profile < 0x73 || profile > 0x80);
928         RCHECK(profile < 0x83 || profile > 0x90);
929         RCHECK(profile < 0x95 || profile > 0xa0);
930         RCHECK(profile < 0xa4 || profile > 0xb0);
931         RCHECK(profile < 0xb5 || profile > 0xc0);
932         RCHECK(profile < 0xc3 || profile > 0xd0);
933         RCHECK(profile < 0xe4);
934         break;
935       }
936 
937       case VISUAL_OBJECT_SEQUENCE_END_CODE:
938         RCHECK(++sequence_end_count == sequence_start_count);
939         break;
940 
941       case VISUAL_OBJECT_START_CODE: {
942         ++visual_object_count;
943         if (ReadBits(&reader, 1) == 1) {
944           int visual_object_verid = ReadBits(&reader, 4);
945           RCHECK(visual_object_verid > 0 && visual_object_verid < 3);
946           RCHECK(ReadBits(&reader, 3) != 0);
947         }
948         int visual_object_type = ReadBits(&reader, 4);
949         RCHECK(visual_object_type > 0 && visual_object_type < 6);
950         break;
951       }
952 
953       case VOP_START_CODE:
954         RCHECK(++vop_count <= visual_object_count);
955         break;
956     }
957     // Skip this block.
958     offset += 6;
959   }
960 }
961 
962 // Additional checks for a MOV/QuickTime/MPEG4 container.
CheckMov(const uint8_t * buffer,int buffer_size)963 static bool CheckMov(const uint8_t* buffer, int buffer_size) {
964   // Reference: ISO/IEC 14496-12:2005(E).
965   // (http://standards.iso.org/ittf/PubliclyAvailableStandards/c061988_ISO_IEC_14496-12_2012.zip)
966   RCHECK(buffer_size > 8);
967 
968   int offset = 0;
969   int valid_top_level_boxes = 0;
970   while (offset + 8 < buffer_size) {
971     uint32_t atomsize = Read32(buffer + offset);
972     uint32_t atomtype = Read32(buffer + offset + 4);
973 
974     // Only need to check for atoms that are valid at the top level. However,
975     // "Boxes with an unrecognized type shall be ignored and skipped." So
976     // simply make sure that at least two recognized top level boxes are found.
977     // This list matches BoxReader::IsValidTopLevelBox().
978     switch (atomtype) {
979       case TAG('f', 't', 'y', 'p'):
980       case TAG('p', 'd', 'i', 'n'):
981       case TAG('b', 'l', 'o', 'c'):
982       case TAG('m', 'o', 'o', 'v'):
983       case TAG('m', 'o', 'o', 'f'):
984       case TAG('m', 'f', 'r', 'a'):
985       case TAG('m', 'd', 'a', 't'):
986       case TAG('f', 'r', 'e', 'e'):
987       case TAG('s', 'k', 'i', 'p'):
988       case TAG('m', 'e', 't', 'a'):
989       case TAG('m', 'e', 'c', 'o'):
990       case TAG('s', 't', 'y', 'p'):
991       case TAG('s', 'i', 'd', 'x'):
992       case TAG('s', 's', 'i', 'x'):
993       case TAG('p', 'r', 'f', 't'):
994       case TAG('u', 'u', 'i', 'd'):
995       case TAG('e', 'm', 's', 'g'):
996         ++valid_top_level_boxes;
997         break;
998     }
999     if (atomsize == 1) {
1000       // Indicates that the length is the next 64bits.
1001       if (offset + 16 > buffer_size)
1002         break;
1003       if (Read32(buffer + offset + 8) != 0)
1004         break;  // Offset is way past buffer size.
1005       atomsize = Read32(buffer + offset + 12);
1006     }
1007     if (atomsize == 0 || atomsize > static_cast<size_t>(buffer_size))
1008       break;  // Indicates the last atom or length too big.
1009     offset += atomsize;
1010   }
1011   return valid_top_level_boxes >= 2;
1012 }
1013 
1014 enum MPEGVersion {
1015   VERSION_25 = 0,
1016   VERSION_RESERVED,
1017   VERSION_2,
1018   VERSION_1
1019 };
1020 enum MPEGLayer {
1021   L_RESERVED = 0,
1022   LAYER_3,
1023   LAYER_2,
1024   LAYER_1
1025 };
1026 
1027 static int kSampleRateTable[4][4] = { { 11025, 12000, 8000, 0 },   // v2.5
1028                                       { 0, 0, 0, 0 },              // not used
1029                                       { 22050, 24000, 16000, 0 },  // v2
1030                                       { 44100, 48000, 32000, 0 }   // v1
1031 };
1032 
1033 static int kBitRateTableV1L1[16] = { 0, 32, 64, 96, 128, 160, 192, 224, 256,
1034                                      288, 320, 352, 384, 416, 448, 0 };
1035 static int kBitRateTableV1L2[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160,
1036                                      192, 224, 256, 320, 384, 0 };
1037 static int kBitRateTableV1L3[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128,
1038                                      160, 192, 224, 256, 320, 0 };
1039 static int kBitRateTableV2L1[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144,
1040                                      160, 176, 192, 224, 256, 0 };
1041 static int kBitRateTableV2L23[16] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,
1042                                       112, 128, 144, 160, 0 };
1043 
ValidMpegAudioFrameHeader(const uint8_t * header,int header_size,int * framesize)1044 static bool ValidMpegAudioFrameHeader(const uint8_t* header,
1045                                       int header_size,
1046                                       int* framesize) {
1047   // Reference: http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm.
1048   DCHECK_GE(header_size, 4);
1049   *framesize = 0;
1050   BitReader reader(header, 4);  // Header can only be 4 bytes long.
1051 
1052   // Verify frame sync (11 bits) are all set.
1053   RCHECK(ReadBits(&reader, 11) == 0x7ff);
1054 
1055   // Verify MPEG audio version id.
1056   int version = ReadBits(&reader, 2);
1057   RCHECK(version != 1);  // Reserved.
1058 
1059   // Verify layer.
1060   int layer = ReadBits(&reader, 2);
1061   RCHECK(layer != 0);
1062 
1063   // Skip protection bit.
1064   reader.SkipBits(1);
1065 
1066   // Verify bitrate index.
1067   int bitrate_index = ReadBits(&reader, 4);
1068   RCHECK(bitrate_index != 0xf);
1069 
1070   // Verify sampling rate frequency index.
1071   int sampling_index = ReadBits(&reader, 2);
1072   RCHECK(sampling_index != 3);
1073 
1074   // Get padding bit.
1075   int padding = ReadBits(&reader, 1);
1076 
1077   // Frame size:
1078   // For Layer I files = (12 * BitRate / SampleRate + Padding) * 4
1079   // For others = 144 * BitRate / SampleRate + Padding
1080   // Unfortunately, BitRate and SampleRate are coded.
1081   int sampling_rate = kSampleRateTable[version][sampling_index];
1082   int bitrate;
1083   if (version == VERSION_1) {
1084     if (layer == LAYER_1)
1085       bitrate = kBitRateTableV1L1[bitrate_index];
1086     else if (layer == LAYER_2)
1087       bitrate = kBitRateTableV1L2[bitrate_index];
1088     else
1089       bitrate = kBitRateTableV1L3[bitrate_index];
1090   } else {
1091     if (layer == LAYER_1)
1092       bitrate = kBitRateTableV2L1[bitrate_index];
1093     else
1094       bitrate = kBitRateTableV2L23[bitrate_index];
1095   }
1096   if (layer == LAYER_1)
1097     *framesize = ((12000 * bitrate) / sampling_rate + padding) * 4;
1098   else
1099     *framesize = (144000 * bitrate) / sampling_rate + padding;
1100   return (bitrate > 0 && sampling_rate > 0);
1101 }
1102 
1103 // Additional checks for a MP3 container.
CheckMp3(const uint8_t * buffer,int buffer_size)1104 static bool CheckMp3(const uint8_t* buffer, int buffer_size) {
1105   // This function assumes that the ID3 header is not present in the file and
1106   // simply checks for several valid MPEG audio buffers after skipping any
1107   // optional padding characters.
1108   int numSeen = 0;
1109   int offset = 0;
1110 
1111   // Skip over any padding (0's).
1112   while (offset < buffer_size && buffer[offset] == 0)
1113     ++offset;
1114 
1115   while (offset + 3 < buffer_size) {
1116     int framesize;
1117     RCHECK(ValidMpegAudioFrameHeader(
1118         buffer + offset, buffer_size - offset, &framesize));
1119 
1120     // Have we seen enough valid headers?
1121     if (++numSeen > 10)
1122       return true;
1123     offset += framesize;
1124   }
1125   // Off the end of the buffer, return success if a few valid headers seen.
1126   return numSeen > 2;
1127 }
1128 
1129 // Check that the next characters in |buffer| represent a number. The format
1130 // accepted is optional whitespace followed by 1 or more digits. |max_digits|
1131 // specifies the maximum number of digits to process. Returns true if a valid
1132 // number is found, false otherwise.
VerifyNumber(const uint8_t * buffer,int buffer_size,int * offset,int max_digits)1133 static bool VerifyNumber(const uint8_t* buffer,
1134                          int buffer_size,
1135                          int* offset,
1136                          int max_digits) {
1137   RCHECK(*offset < buffer_size);
1138 
1139   // Skip over any leading space.
1140   while (isspace(buffer[*offset])) {
1141     ++(*offset);
1142     RCHECK(*offset < buffer_size);
1143   }
1144 
1145   // Need to process up to max_digits digits.
1146   int numSeen = 0;
1147   while (--max_digits >= 0 && isdigit(buffer[*offset])) {
1148     ++numSeen;
1149     ++(*offset);
1150     if (*offset >= buffer_size)
1151       return true;  // Out of space but seen a digit.
1152   }
1153 
1154   // Success if at least one digit seen.
1155   return (numSeen > 0);
1156 }
1157 
1158 // Check that the next character in |buffer| is one of |c1| or |c2|. |c2| is
1159 // optional. Returns true if there is a match, false if no match or out of
1160 // space.
VerifyCharacters(const uint8_t * buffer,int buffer_size,int * offset,char c1,char c2)1161 static inline bool VerifyCharacters(const uint8_t* buffer,
1162                                     int buffer_size,
1163                                     int* offset,
1164                                     char c1,
1165                                     char c2) {
1166   RCHECK(*offset < buffer_size);
1167   char c = static_cast<char>(buffer[(*offset)++]);
1168   return (c == c1 || (c == c2 && c2 != 0));
1169 }
1170 
1171 // Checks for a SRT container.
CheckSrt(const uint8_t * buffer,int buffer_size)1172 static bool CheckSrt(const uint8_t* buffer, int buffer_size) {
1173   // Reference: http://en.wikipedia.org/wiki/SubRip
1174   RCHECK(buffer_size > 20);
1175 
1176   // First line should just be the subtitle sequence number.
1177   int offset = StartsWith(buffer, buffer_size, UTF8_BYTE_ORDER_MARK) ? 3 : 0;
1178   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1179   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r'));
1180 
1181   // Skip any additional \n\r.
1182   while (VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r')) {}
1183   --offset;  // Since VerifyCharacters() gobbled up the next non-CR/LF.
1184 
1185   // Second line should look like the following:
1186   //   00:00:10,500 --> 00:00:13,000
1187   // Units separator can be , or .
1188   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1189   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1190   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1191   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1192   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1193   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1194   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1195   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1196   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1197   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1198   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '>', 0));
1199   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1200   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1201   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1202   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1203   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1204   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1205   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1206   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1207   return true;
1208 }
1209 
1210 // Read a Matroska Element Id.
GetElementId(BitReader * reader)1211 static int GetElementId(BitReader* reader) {
1212   // Element ID is coded with the leading zero bits (max 3) determining size.
1213   // If it is an invalid encoding or the end of the buffer is reached,
1214   // return -1 as a tag that won't be expected.
1215   if (reader->bits_available() >= 8) {
1216     int num_bits_to_read = 0;
1217     static int prefix[] = { 0x80, 0x4000, 0x200000, 0x10000000 };
1218     for (int i = 0; i < 4; ++i) {
1219       num_bits_to_read += 7;
1220       if (ReadBits(reader, 1) == 1) {
1221         if (reader->bits_available() < num_bits_to_read)
1222           break;
1223         // prefix[] adds back the bits read individually.
1224         return ReadBits(reader, num_bits_to_read) | prefix[i];
1225       }
1226     }
1227   }
1228   // Invalid encoding, return something not expected.
1229   return -1;
1230 }
1231 
1232 // Read a Matroska Unsigned Integer (VINT).
GetVint(BitReader * reader)1233 static uint64_t GetVint(BitReader* reader) {
1234   // Values are coded with the leading zero bits (max 7) determining size.
1235   // If it is an invalid coding or the end of the buffer is reached,
1236   // return something that will go off the end of the buffer.
1237   if (reader->bits_available() >= 8) {
1238     int num_bits_to_read = 0;
1239     for (int i = 0; i < 8; ++i) {
1240       num_bits_to_read += 7;
1241       if (ReadBits(reader, 1) == 1) {
1242         if (reader->bits_available() < num_bits_to_read)
1243           break;
1244         return ReadBits(reader, num_bits_to_read);
1245       }
1246     }
1247   }
1248   // Incorrect format (more than 7 leading 0's) or off the end of the buffer.
1249   // Since the return value is used as a byte size, return a value that will
1250   // cause a failure when used.
1251   return (reader->bits_available() / 8) + 2;
1252 }
1253 
1254 // Additional checks for a WEBM container.
CheckWebm(const uint8_t * buffer,int buffer_size)1255 static bool CheckWebm(const uint8_t* buffer, int buffer_size) {
1256   // Reference: http://www.matroska.org/technical/specs/index.html
1257   RCHECK(buffer_size > 12);
1258 
1259   BitReader reader(buffer, buffer_size);
1260 
1261   // Verify starting Element Id.
1262   RCHECK(GetElementId(&reader) == 0x1a45dfa3);
1263 
1264   // Get the header size, and ensure there are enough bits to check.
1265   // Using saturated_cast<> in case the size read is really large
1266   // (in which case the bits_available() check will fail).
1267   int header_size = base::saturated_cast<int>(GetVint(&reader));
1268   RCHECK(reader.bits_available() / 8 >= header_size);
1269 
1270   // Loop through the header.
1271   while (reader.bits_available() > 0) {
1272     int tag = GetElementId(&reader);
1273     int tagsize = base::saturated_cast<int>(GetVint(&reader));
1274     switch (tag) {
1275       case 0x4286:  // EBMLVersion
1276       case 0x42f7:  // EBMLReadVersion
1277       case 0x42f2:  // EBMLMaxIdLength
1278       case 0x42f3:  // EBMLMaxSizeLength
1279       case 0x4287:  // DocTypeVersion
1280       case 0x4285:  // DocTypeReadVersion
1281       case 0xec:    // void
1282       case 0xbf:    // CRC32
1283         RCHECK(reader.bits_available() / 8 >= tagsize);
1284         RCHECK(reader.SkipBits(tagsize * 8));
1285         break;
1286 
1287       case 0x4282:  // EBMLDocType
1288         // Need to see "webm" or "matroska" next.
1289         RCHECK(reader.bits_available() >= 32);
1290         switch (ReadBits(&reader, 32)) {
1291           case TAG('w', 'e', 'b', 'm') :
1292             return true;
1293           case TAG('m', 'a', 't', 'r') :
1294             RCHECK(reader.bits_available() >= 32);
1295             return (ReadBits(&reader, 32) == TAG('o', 's', 'k', 'a'));
1296         }
1297         return false;
1298 
1299       default:  // Unrecognized tag
1300         return false;
1301     }
1302   }
1303   return false;
1304 }
1305 
1306 enum VC1StartCodes {
1307   VC1_FRAME_START_CODE = 0x0d,
1308   VC1_ENTRY_POINT_START_CODE = 0x0e,
1309   VC1_SEQUENCE_START_CODE = 0x0f
1310 };
1311 
1312 // Checks for a VC1 bitstream container.
CheckVC1(const uint8_t * buffer,int buffer_size)1313 static bool CheckVC1(const uint8_t* buffer, int buffer_size) {
1314   // Reference: SMPTE 421M
1315   // (http://standards.smpte.org/content/978-1-61482-555-5/st-421-2006/SEC1.body.pdf)
1316   // However, no length ... simply scan for start code values.
1317   // Expect to see SEQ | [ [ ENTRY ] PIC* ]*
1318   // Note tags are very similar to H.264.
1319 
1320   RCHECK(buffer_size >= 24);
1321 
1322   // First check for Bitstream Metadata Serialization (Annex L)
1323   if (buffer[0] == 0xc5 &&
1324       Read32(buffer + 4) == 0x04 &&
1325       Read32(buffer + 20) == 0x0c) {
1326     // Verify settings in STRUCT_C and STRUCT_A
1327     BitReader reader(buffer + 8, 12);
1328 
1329     int profile = ReadBits(&reader, 4);
1330     if (profile == 0 || profile == 4) {  // simple or main
1331       // Skip FRMRTQ_POSTPROC, BITRTQ_POSTPROC, and LOOPFILTER.
1332       reader.SkipBits(3 + 5 + 1);
1333 
1334       // Next bit must be 0.
1335       RCHECK(ReadBits(&reader, 1) == 0);
1336 
1337       // Skip MULTIRES.
1338       reader.SkipBits(1);
1339 
1340       // Next bit must be 1.
1341       RCHECK(ReadBits(&reader, 1) == 1);
1342 
1343       // Skip FASTUVMC, EXTENDED_MV, DQUANT, and VSTRANSFORM.
1344       reader.SkipBits(1 + 1 + 2 + 1);
1345 
1346       // Next bit must be 0.
1347       RCHECK(ReadBits(&reader, 1) == 0);
1348 
1349       // Skip OVERLAP, SYNCMARKER, RANGERED, MAXBFRAMES, QUANTIZER, and
1350       // FINTERPFLAG.
1351       reader.SkipBits(1 + 1 + 1 + 3 + 2 + 1);
1352 
1353       // Next bit must be 1.
1354       RCHECK(ReadBits(&reader, 1) == 1);
1355 
1356     } else {
1357       RCHECK(profile == 12);  // Other profile values not allowed.
1358       RCHECK(ReadBits(&reader, 28) == 0);
1359     }
1360 
1361     // Now check HORIZ_SIZE and VERT_SIZE, which must be 8192 or less.
1362     RCHECK(ReadBits(&reader, 32) <= 8192);
1363     RCHECK(ReadBits(&reader, 32) <= 8192);
1364     return true;
1365   }
1366 
1367   // Buffer isn't Bitstream Metadata, so scan for start codes.
1368   int offset = 0;
1369   int sequence_start_code = 0;
1370   int frame_start_code = 0;
1371   while (true) {
1372     // Advance to start_code, if there is one.
1373     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 5, 24, 1)) {
1374       // Not a complete sequence in memory, so return true if we've seen a
1375       // sequence start and a frame start (not checking entry points since
1376       // they only occur in advanced profiles).
1377       return (sequence_start_code > 0 && frame_start_code > 0);
1378     }
1379 
1380     // Now verify the block. AdvanceToStartCode() made sure that there are
1381     // at least 5 bytes remaining in the buffer.
1382     BitReader reader(buffer + offset, 5);
1383     RCHECK(ReadBits(&reader, 24) == 1);
1384 
1385     // Keep track of the number of certain types received.
1386     switch (ReadBits(&reader, 8)) {
1387       case VC1_SEQUENCE_START_CODE: {
1388         ++sequence_start_code;
1389         switch (ReadBits(&reader, 2)) {
1390           case 0:  // simple
1391           case 1:  // main
1392             RCHECK(ReadBits(&reader, 2) == 0);
1393             break;
1394           case 2:  // complex
1395             return false;
1396           case 3:  // advanced
1397             RCHECK(ReadBits(&reader, 3) <= 4);  // Verify level = 0..4
1398             RCHECK(ReadBits(&reader, 2) == 1);  // Verify colordiff_format = 1
1399             break;
1400         }
1401         break;
1402       }
1403 
1404       case VC1_ENTRY_POINT_START_CODE:
1405         // No fields in entry data to check. However, it must occur after
1406         // sequence header.
1407         RCHECK(sequence_start_code > 0);
1408         break;
1409 
1410       case VC1_FRAME_START_CODE:
1411         ++frame_start_code;
1412         break;
1413     }
1414     offset += 5;
1415   }
1416 }
1417 
1418 // For some formats the signature is a bunch of characters. They are defined
1419 // below. Note that the first 4 characters of the string may be used as a TAG
1420 // in LookupContainerByFirst4. For signatures that contain embedded \0, use
1421 // uint8_t[].
1422 static const char kAmrSignature[] = "#!AMR";
1423 static const uint8_t kAsfSignature[] = {0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66,
1424                                         0xcf, 0x11, 0xa6, 0xd9, 0x00, 0xaa,
1425                                         0x00, 0x62, 0xce, 0x6c};
1426 static const char kAssSignature[] = "[Script Info]";
1427 static const char kAssBomSignature[] = UTF8_BYTE_ORDER_MARK "[Script Info]";
1428 static const uint8_t kWtvSignature[] = {0xb7, 0xd8, 0x00, 0x20, 0x37, 0x49,
1429                                         0xda, 0x11, 0xa6, 0x4e, 0x00, 0x07,
1430                                         0xe9, 0x5e, 0xad, 0x8d};
1431 
1432 // Attempt to determine the container type from the buffer provided. This is
1433 // a simple pass, that uses the first 4 bytes of the buffer as an index to get
1434 // a rough idea of the container format.
LookupContainerByFirst4(const uint8_t * buffer,int buffer_size)1435 static MediaContainerName LookupContainerByFirst4(const uint8_t* buffer,
1436                                                   int buffer_size) {
1437   // Minimum size that the code expects to exist without checking size.
1438   if (buffer_size < kMinimumContainerSize)
1439     return CONTAINER_UNKNOWN;
1440 
1441   uint32_t first4 = Read32(buffer);
1442   switch (first4) {
1443     case 0x1a45dfa3:
1444       if (CheckWebm(buffer, buffer_size))
1445         return CONTAINER_WEBM;
1446       break;
1447 
1448     case 0x3026b275:
1449       if (StartsWith(buffer,
1450                      buffer_size,
1451                      kAsfSignature,
1452                      sizeof(kAsfSignature))) {
1453         return CONTAINER_ASF;
1454       }
1455       break;
1456 
1457     case TAG('#','!','A','M'):
1458       if (StartsWith(buffer, buffer_size, kAmrSignature))
1459         return CONTAINER_AMR;
1460       break;
1461 
1462     case TAG('#','E','X','T'):
1463       if (CheckHls(buffer, buffer_size))
1464         return CONTAINER_HLS;
1465       break;
1466 
1467     case TAG('.','R','M','F'):
1468       if (buffer[4] == 0 && buffer[5] == 0)
1469         return CONTAINER_RM;
1470       break;
1471 
1472     case TAG('.','r','a','\xfd'):
1473       return CONTAINER_RM;
1474 
1475     case TAG('B','I','K','b'):
1476     case TAG('B','I','K','d'):
1477     case TAG('B','I','K','f'):
1478     case TAG('B','I','K','g'):
1479     case TAG('B','I','K','h'):
1480     case TAG('B','I','K','i'):
1481       if (CheckBink(buffer, buffer_size))
1482         return CONTAINER_BINK;
1483       break;
1484 
1485     case TAG('c','a','f','f'):
1486       if (CheckCaf(buffer, buffer_size))
1487         return CONTAINER_CAF;
1488       break;
1489 
1490     case TAG('D','E','X','A'):
1491       if (buffer_size > 15 &&
1492           Read16(buffer + 11) <= 2048 &&
1493           Read16(buffer + 13) <= 2048) {
1494         return CONTAINER_DXA;
1495       }
1496       break;
1497 
1498     case TAG('D','T','S','H'):
1499       if (Read32(buffer + 4) == TAG('D','H','D','R'))
1500         return CONTAINER_DTSHD;
1501       break;
1502 
1503     case 0x64a30100:
1504     case 0x64a30200:
1505     case 0x64a30300:
1506     case 0x64a30400:
1507     case 0x0001a364:
1508     case 0x0002a364:
1509     case 0x0003a364:
1510       if (Read32(buffer + 4) != 0 && Read32(buffer + 8) != 0)
1511         return CONTAINER_IRCAM;
1512       break;
1513 
1514     case TAG('f','L','a','C'):
1515       return CONTAINER_FLAC;
1516 
1517     case TAG('F','L','V',0):
1518     case TAG('F','L','V',1):
1519     case TAG('F','L','V',2):
1520     case TAG('F','L','V',3):
1521     case TAG('F','L','V',4):
1522       if (buffer[5] == 0 && Read32(buffer + 5) > 8)
1523         return CONTAINER_FLV;
1524       break;
1525 
1526     case TAG('F','O','R','M'):
1527       switch (Read32(buffer + 8)) {
1528         case TAG('A','I','F','F'):
1529         case TAG('A','I','F','C'):
1530           return CONTAINER_AIFF;
1531       }
1532       break;
1533 
1534     case TAG('M','A','C',' '):
1535       return CONTAINER_APE;
1536 
1537     case TAG('O','N','2',' '):
1538       if (Read32(buffer + 8) == TAG('O','N','2','f'))
1539         return CONTAINER_AVI;
1540       break;
1541 
1542     case TAG('O','g','g','S'):
1543       if (buffer[5] <= 7)
1544         return CONTAINER_OGG;
1545       break;
1546 
1547     case TAG('R','F','6','4'):
1548       if (buffer_size > 16 && Read32(buffer + 12) == TAG('d','s','6','4'))
1549         return CONTAINER_WAV;
1550       break;
1551 
1552     case TAG('R','I','F','F'):
1553       switch (Read32(buffer + 8)) {
1554         case TAG('A','V','I',' '):
1555         case TAG('A','V','I','X'):
1556         case TAG('A','V','I','\x19'):
1557         case TAG('A','M','V',' '):
1558           return CONTAINER_AVI;
1559         case TAG('W','A','V','E'):
1560           return CONTAINER_WAV;
1561       }
1562       break;
1563 
1564     case TAG('[','S','c','r'):
1565       if (StartsWith(buffer, buffer_size, kAssSignature))
1566         return CONTAINER_ASS;
1567       break;
1568 
1569     case TAG('\xef','\xbb','\xbf','['):
1570       if (StartsWith(buffer, buffer_size, kAssBomSignature))
1571         return CONTAINER_ASS;
1572       break;
1573 
1574     case 0x7ffe8001:
1575     case 0xfe7f0180:
1576     case 0x1fffe800:
1577     case 0xff1f00e8:
1578       if (CheckDts(buffer, buffer_size))
1579         return CONTAINER_DTS;
1580       break;
1581 
1582     case 0xb7d80020:
1583       if (StartsWith(buffer,
1584                      buffer_size,
1585                      kWtvSignature,
1586                      sizeof(kWtvSignature))) {
1587         return CONTAINER_WTV;
1588       }
1589       break;
1590   }
1591 
1592   // Now try a few different ones that look at something other
1593   // than the first 4 bytes.
1594   uint32_t first3 = first4 & 0xffffff00;
1595   switch (first3) {
1596     case TAG('C','W','S',0):
1597     case TAG('F','W','S',0):
1598       return CONTAINER_SWF;
1599 
1600     case TAG('I','D','3',0):
1601       return CONTAINER_MP3;
1602   }
1603 
1604   // Maybe the first 2 characters are something we can use.
1605   uint32_t first2 = Read16(buffer);
1606   switch (first2) {
1607     case kAc3SyncWord:
1608       if (CheckAc3(buffer, buffer_size))
1609         return CONTAINER_AC3;
1610       if (CheckEac3(buffer, buffer_size))
1611         return CONTAINER_EAC3;
1612       break;
1613 
1614     case 0xfff0:
1615     case 0xfff1:
1616     case 0xfff8:
1617     case 0xfff9:
1618       if (CheckAac(buffer, buffer_size))
1619         return CONTAINER_AAC;
1620       break;
1621   }
1622 
1623   // Check if the file is in MP3 format without the ID3 header.
1624   if (CheckMp3(buffer, buffer_size))
1625     return CONTAINER_MP3;
1626 
1627   return CONTAINER_UNKNOWN;
1628 }
1629 
1630 // Attempt to determine the container name from the buffer provided.
DetermineContainer(const uint8_t * buffer,int buffer_size)1631 MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) {
1632   DCHECK(buffer);
1633 
1634   // Since MOV/QuickTime/MPEG4 streams are common, check for them first.
1635   if (CheckMov(buffer, buffer_size))
1636     return CONTAINER_MOV;
1637 
1638   // Next attempt the simple checks, that typically look at just the
1639   // first few bytes of the file.
1640   MediaContainerName result = LookupContainerByFirst4(buffer, buffer_size);
1641   if (result != CONTAINER_UNKNOWN)
1642     return result;
1643 
1644   // Additional checks that may scan a portion of the buffer.
1645   if (CheckMpeg2ProgramStream(buffer, buffer_size))
1646     return CONTAINER_MPEG2PS;
1647   if (CheckMpeg2TransportStream(buffer, buffer_size))
1648     return CONTAINER_MPEG2TS;
1649   if (CheckMJpeg(buffer, buffer_size))
1650     return CONTAINER_MJPEG;
1651   if (CheckDV(buffer, buffer_size))
1652     return CONTAINER_DV;
1653   if (CheckH261(buffer, buffer_size))
1654     return CONTAINER_H261;
1655   if (CheckH263(buffer, buffer_size))
1656     return CONTAINER_H263;
1657   if (CheckH264(buffer, buffer_size))
1658     return CONTAINER_H264;
1659   if (CheckMpeg4BitStream(buffer, buffer_size))
1660     return CONTAINER_MPEG4BS;
1661   if (CheckVC1(buffer, buffer_size))
1662     return CONTAINER_VC1;
1663   if (CheckSrt(buffer, buffer_size))
1664     return CONTAINER_SRT;
1665   if (CheckGsm(buffer, buffer_size))
1666     return CONTAINER_GSM;
1667 
1668   // AC3/EAC3 might not start at the beginning of the stream,
1669   // so scan for a start code.
1670   int offset = 1;  // No need to start at byte 0 due to First4 check.
1671   if (AdvanceToStartCode(buffer, buffer_size, &offset, 4, 16, kAc3SyncWord)) {
1672     if (CheckAc3(buffer + offset, buffer_size - offset))
1673       return CONTAINER_AC3;
1674     if (CheckEac3(buffer + offset, buffer_size - offset))
1675       return CONTAINER_EAC3;
1676   }
1677 
1678   return CONTAINER_UNKNOWN;
1679 }
1680 
1681 }  // namespace container_names
1682 
1683 }  // namespace media
1684