1 // Claxon -- A FLAC decoding library in Rust
2 // Copyright 2014 Ruud van Asseldonk
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // A copy of the License has been included in the root of the repository.
7 
8 //! The `metadata` module deals with metadata at the beginning of a FLAC stream.
9 
10 use error::{Error, Result, fmt_err};
11 use input::ReadBytes;
12 use std::str;
13 use std::slice;
14 
15 #[derive(Clone, Copy)]
16 struct MetadataBlockHeader {
17     is_last: bool,
18     block_type: u8,
19     length: u32,
20 }
21 
22 /// The streaminfo metadata block, with important information about the stream.
23 #[derive(Clone, Copy, Debug)]
24 pub struct StreamInfo {
25     // TODO: "size" would better be called "duration" for clarity.
26     /// The minimum block size (in inter-channel samples) used in the stream.
27     ///
28     /// This number is independent of the number of channels. To get the minimum
29     /// block duration in seconds, divide this by the sample rate.
30     pub min_block_size: u16,
31     /// The maximum block size (in inter-channel samples) used in the stream.
32     ///
33     /// This number is independent of the number of channels. To get the
34     /// maximum block duration in seconds, divide by the sample rate. To avoid
35     /// allocations during decoding, a buffer of this size times the number of
36     /// channels can be allocated up front and passed into
37     /// `FrameReader::read_next_or_eof()`.
38     pub max_block_size: u16,
39     /// The minimum frame size (in bytes) used in the stream.
40     pub min_frame_size: Option<u32>,
41     /// The maximum frame size (in bytes) used in the stream.
42     pub max_frame_size: Option<u32>,
43     /// The sample rate in Hz.
44     pub sample_rate: u32,
45     /// The number of channels.
46     pub channels: u32,
47     /// The number of bits per sample.
48     pub bits_per_sample: u32,
49     /// The total number of inter-channel samples in the stream.
50     // TODO: rename to `duration` for clarity?
51     pub samples: Option<u64>,
52     /// MD5 signature of the unencoded audio data.
53     pub md5sum: [u8; 16],
54 }
55 
56 /// A seek point in the seek table.
57 #[derive(Clone, Copy)]
58 pub struct SeekPoint {
59     /// Sample number of the first sample in the target frame, or 2<sup>64</sup> - 1 for a placeholder.
60     pub sample: u64,
61     /// Offset in bytes from the first byte of the first frame header to the first byte of the
62     /// target frame's header.
63     pub offset: u64,
64     /// Number of samples in the target frame.
65     pub samples: u16,
66 }
67 
68 /// A seek table to aid seeking in the stream.
69 pub struct SeekTable {
70     /// The seek points, sorted in ascending order by sample number.
71     #[allow(dead_code)] // TODO: Implement seeking.
72     seekpoints: Vec<SeekPoint>,
73 }
74 
75 /// Vorbis comments, also known as FLAC tags (e.g. artist, title, etc.).
76 pub struct VorbisComment {
77     /// The “vendor string”, chosen by the encoder vendor.
78     ///
79     /// This string usually contains the name and version of the program that
80     /// encoded the FLAC stream, such as `reference libFLAC 1.3.2 20170101`
81     /// or `Lavf57.25.100`.
82     pub vendor: String,
83 
84     /// Name-value pairs of Vorbis comments, such as `ARTIST=Queen`.
85     ///
86     /// This struct stores a raw low-level representation of tags. Use
87     /// `FlacReader::tags()` for a friendlier iterator. The tuple consists of
88     /// the string in `"NAME=value"` format, and the index of the `'='` into
89     /// that string.
90     ///
91     /// The name is supposed to be interpreted case-insensitively, and is
92     /// guaranteed to consist of ASCII characters. Claxon does not normalize
93     /// the casing of the name. Use `metadata::GetTag` to do a case-insensitive
94     /// lookup.
95     ///
96     /// Names need not be unique. For instance, multiple `ARTIST` comments might
97     /// be present on a collaboration track.
98     ///
99     /// See <https://www.xiph.org/vorbis/doc/v-comment.html> for more details.
100     pub comments: Vec<(String, usize)>,
101 }
102 
103 /// A metadata about the flac stream.
104 pub enum MetadataBlock {
105     /// A stream info block.
106     StreamInfo(StreamInfo),
107     /// A padding block (with no meaningful data).
108     Padding {
109         /// The number of padding bytes.
110         length: u32,
111     },
112     /// An application block with application-specific data.
113     Application {
114         /// The registered application ID.
115         id: u32,
116         /// The contents of the application block.
117         data: Vec<u8>,
118     },
119     /// A seek table block.
120     SeekTable(SeekTable),
121     /// A Vorbis comment block, also known as FLAC tags.
122     VorbisComment(VorbisComment),
123     /// A CUE sheet block.
124     CueSheet, // TODO
125     /// A picture block.
126     Picture, // TODO
127     /// A block with a reserved block type, not supported by this library.
128     Reserved,
129 }
130 
131 /// Iterates over Vorbis comments (FLAC tags) in a FLAC stream.
132 ///
133 /// See `FlacReader::tags()` for more details.
134 pub struct Tags<'a> {
135     /// The underlying iterator.
136     iter: slice::Iter<'a, (String, usize)>,
137 }
138 
139 impl<'a> Tags<'a> {
140     /// Returns a new `Tags` iterator.
141     #[inline]
new(comments: &'a [(String, usize)]) -> Tags<'a>142     pub fn new(comments: &'a [(String, usize)]) -> Tags<'a> {
143         Tags {
144             iter: comments.iter(),
145         }
146     }
147 }
148 
149 impl<'a> Iterator for Tags<'a> {
150     type Item = (&'a str, &'a str);
151 
152     #[inline]
next(&mut self) -> Option<(&'a str, &'a str)>153     fn next(&mut self) -> Option<(&'a str, &'a str)> {
154         return self.iter.next().map(|&(ref comment, sep_idx)| {
155             (&comment[..sep_idx], &comment[sep_idx+1..])
156         })
157     }
158 
159     #[inline]
size_hint(&self) -> (usize, Option<usize>)160     fn size_hint(&self) -> (usize, Option<usize>) {
161         self.iter.size_hint()
162     }
163 }
164 
165 impl<'a> ExactSizeIterator for Tags<'a> {}
166 
167 /// Iterates over Vorbis comments looking for a specific one; returns its values as `&str`.
168 ///
169 /// See `FlacReader::get_tag()` for more details.
170 pub struct GetTag<'a> {
171     /// The Vorbis comments to search through.
172     vorbis_comments: &'a [(String, usize)],
173     /// The tag to look for.
174     needle: &'a str,
175     /// The index of the (name, value) pair that should be inspected next.
176     index: usize,
177 }
178 
179 impl<'a> GetTag<'a> {
180     /// Returns a new `GetTag` iterator.
181     #[inline]
new(vorbis_comments: &'a [(String, usize)], needle: &'a str) -> GetTag<'a>182     pub fn new(vorbis_comments: &'a [(String, usize)], needle: &'a str) -> GetTag<'a> {
183         GetTag {
184             vorbis_comments: vorbis_comments,
185             needle: needle,
186             index: 0,
187         }
188     }
189 }
190 
191 impl<'a> Iterator for GetTag<'a> {
192     type Item = &'a str;
193 
194     #[inline]
next(&mut self) -> Option<&'a str>195     fn next(&mut self) -> Option<&'a str> {
196         // This import is actually required on Rust 1.13.
197         #[allow(unused_imports)]
198         use std::ascii::AsciiExt;
199 
200         while self.index < self.vorbis_comments.len() {
201             let (ref comment, sep_idx) = self.vorbis_comments[self.index];
202             self.index += 1;
203 
204             if comment[..sep_idx].eq_ignore_ascii_case(self.needle) {
205                 return Some(&comment[sep_idx + 1..])
206             }
207         }
208 
209         return None
210     }
211 }
212 
213 #[inline]
read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlockHeader>214 fn read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlockHeader> {
215     let byte = try!(input.read_u8());
216 
217     // The first bit specifies whether this is the last block, the next 7 bits
218     // specify the type of the metadata block to follow.
219     let is_last = (byte >> 7) == 1;
220     let block_type = byte & 0b0111_1111;
221 
222     // The length field is 24 bits, or 3 bytes.
223     let length = try!(input.read_be_u24());
224 
225     let header = MetadataBlockHeader {
226         is_last: is_last,
227         block_type: block_type,
228         length: length,
229     };
230     Ok(header)
231 }
232 
233 /// Read a single metadata block header and body from the input.
234 ///
235 /// When reading a regular flac stream, there is no need to use this function
236 /// directly; constructing a `FlacReader` will read the header and its metadata
237 /// blocks.
238 ///
239 /// When a flac stream is embedded in a container format, this function can be
240 /// used to decode a single metadata block. For instance, the Ogg format embeds
241 /// metadata blocks including their header verbatim in packets. This function
242 /// can be used to decode that raw data.
243 #[inline]
read_metadata_block_with_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlock>244 pub fn read_metadata_block_with_header<R: ReadBytes>(input: &mut R)
245                                                      -> Result<MetadataBlock> {
246   let header = try!(read_metadata_block_header(input));
247   read_metadata_block(input, header.block_type, header.length)
248 }
249 
250 /// Read a single metadata block of the given type and length from the input.
251 ///
252 /// When reading a regular flac stream, there is no need to use this function
253 /// directly; constructing a `FlacReader` will read the header and its metadata
254 /// blocks.
255 ///
256 /// When a flac stream is embedded in a container format, this function can be
257 /// used to decode a single metadata block. For instance, the MP4 format sports
258 /// a “FLAC Specific Box” which contains the block type and the raw data. This
259 /// function can be used to decode that raw data.
260 #[inline]
read_metadata_block<R: ReadBytes>(input: &mut R, block_type: u8, length: u32) -> Result<MetadataBlock>261 pub fn read_metadata_block<R: ReadBytes>(input: &mut R,
262                                          block_type: u8,
263                                          length: u32)
264                                          -> Result<MetadataBlock> {
265     match block_type {
266         0 => {
267             // The streaminfo block has a fixed size of 34 bytes.
268             if length == 34 {
269                 let streaminfo = try!(read_streaminfo_block(input));
270                 Ok(MetadataBlock::StreamInfo(streaminfo))
271             } else {
272                 fmt_err("invalid streaminfo metadata block length")
273             }
274         }
275         1 => {
276             try!(read_padding_block(input, length));
277             Ok(MetadataBlock::Padding { length: length })
278         }
279         2 => {
280             let (id, data) = try!(read_application_block(input, length));
281             Ok(MetadataBlock::Application {
282                 id: id,
283                 data: data,
284             })
285         }
286         3 => {
287             // TODO: implement seektable reading. For now, pretend it is padding.
288             try!(input.skip(length));
289             Ok(MetadataBlock::Padding { length: length })
290         }
291         4 => {
292             let vorbis_comment = try!(read_vorbis_comment_block(input, length));
293             Ok(MetadataBlock::VorbisComment(vorbis_comment))
294         }
295         5 => {
296             // TODO: implement CUE sheet reading. For now, pretend it is padding.
297             try!(input.skip(length));
298             Ok(MetadataBlock::Padding { length: length })
299         }
300         6 => {
301             // TODO: implement picture reading. For now, pretend it is padding.
302             try!(input.skip(length));
303             Ok(MetadataBlock::Padding { length: length })
304         }
305         127 => {
306             // This code is invalid to avoid confusion with a frame sync code.
307             fmt_err("invalid metadata block type")
308         }
309         _ => {
310             // Any other block type is 'reserved' at the moment of writing. The
311             // reference implementation reads it as an 'unknown' block. That is
312             // one way of handling it, but maybe there should be some kind of
313             // 'strict' mode (configurable at compile time?) so that this can
314             // be an error if desired.
315             try!(input.skip(length));
316             Ok(MetadataBlock::Reserved)
317         }
318     }
319 }
320 
read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo>321 fn read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo> {
322     let min_block_size = try!(input.read_be_u16());
323     let max_block_size = try!(input.read_be_u16());
324 
325     // The frame size fields are 24 bits, or 3 bytes.
326     let min_frame_size = try!(input.read_be_u24());
327     let max_frame_size = try!(input.read_be_u24());
328 
329     // Next up are 20 bits that determine the sample rate.
330     let sample_rate_msb = try!(input.read_be_u16());
331     let sample_rate_lsb = try!(input.read_u8());
332 
333     // Stitch together the value from the first 16 bits,
334     // and then the 4 most significant bits of the next byte.
335     let sample_rate = (sample_rate_msb as u32) << 4 | (sample_rate_lsb as u32) >> 4;
336 
337     // Next three bits are the number of channels - 1. Mask them out and add 1.
338     let n_channels_bps = sample_rate_lsb;
339     let n_channels = ((n_channels_bps >> 1) & 0b0000_0111) + 1;
340 
341     // The final bit is the most significant of bits per sample - 1. Bits per
342     // sample - 1 is 5 bits in total.
343     let bps_msb = n_channels_bps & 1;
344     let bps_lsb_n_samples = try!(input.read_u8());
345 
346     // Stitch together these values, add 1 because # - 1 is stored.
347     let bits_per_sample = (bps_msb << 4 | (bps_lsb_n_samples >> 4)) + 1;
348 
349     // Number of samples in 36 bits, we have 4 already, 32 to go.
350     let n_samples_msb = bps_lsb_n_samples & 0b0000_1111;
351     let n_samples_lsb = try!(input.read_be_u32());
352     let n_samples = (n_samples_msb as u64) << 32 | n_samples_lsb as u64;
353 
354     // Next are 128 bits (16 bytes) of MD5 signature.
355     let mut md5sum = [0u8; 16];
356     try!(input.read_into(&mut md5sum));
357 
358     // Lower bounds can never be larger than upper bounds. Note that 0 indicates
359     // unknown for the frame size. Also, the block size must be at least 16.
360     if min_block_size > max_block_size {
361         return fmt_err("inconsistent bounds, min block size > max block size");
362     }
363     if min_block_size < 16 {
364         return fmt_err("invalid block size, must be at least 16");
365     }
366     if min_frame_size > max_frame_size && max_frame_size != 0 {
367         return fmt_err("inconsistent bounds, min frame size > max frame size");
368     }
369 
370     // A sample rate of 0 is invalid, and the maximum sample rate is limited by
371     // the structure of the frame headers to 655350 Hz.
372     if sample_rate == 0 || sample_rate > 655350 {
373         return fmt_err("invalid sample rate");
374     }
375 
376     let stream_info = StreamInfo {
377         min_block_size: min_block_size,
378         max_block_size: max_block_size,
379         min_frame_size: if min_frame_size == 0 {
380             None
381         } else {
382             Some(min_frame_size)
383         },
384         max_frame_size: if max_frame_size == 0 {
385             None
386         } else {
387             Some(max_frame_size)
388         },
389         sample_rate: sample_rate,
390         channels: n_channels as u32,
391         bits_per_sample: bits_per_sample as u32,
392         samples: if n_samples == 0 {
393             None
394         } else {
395             Some(n_samples)
396         },
397         md5sum: md5sum,
398     };
399     Ok(stream_info)
400 }
401 
read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<VorbisComment>402 fn read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<VorbisComment> {
403     if length < 8 {
404         // We expect at a minimum a 32-bit vendor string length, and a 32-bit
405         // comment count.
406         return fmt_err("Vorbis comment block is too short")
407     }
408 
409     // Fail if the length of the Vorbis comment block is larger than 1 MiB. This
410     // block is full of length-prefixed strings for which we allocate memory up
411     // front. If there were no limit on these, a maliciously crafted file could
412     // cause OOM by claiming to contain large strings. But at least the strings
413     // cannot be longer than the size of the Vorbis comment block, and by
414     // limiting the size of that block, we can mitigate such DoS attacks.
415     //
416     // The typical size of a the Vorbis comment block is 1 KiB; on a corpus of
417     // real-world flac files, the 0.05 and 0.95 quantiles were 792 and 1257
418     // bytes respectively, with even the 0.99 quantile below 2 KiB. The only
419     // reason for having a large Vorbis comment block is when cover art is
420     // incorrectly embedded there, but the Vorbis comment block is not the right
421     // place for that anyway.
422     if length > 10 * 1024 * 1024 {
423         let msg = "Vorbis comment blocks larger than 10 MiB are not supported";
424         return Err(Error::Unsupported(msg))
425     }
426 
427     // The Vorbis comment block starts with a length-prefixed "vendor string".
428     // It cannot be larger than the block length - 8, because there are the
429     // 32-bit vendor string length, and comment count.
430     let vendor_len = try!(input.read_le_u32());
431     if vendor_len > length - 8 { return fmt_err("vendor string too long") }
432     let mut vendor_bytes = Vec::with_capacity(vendor_len as usize);
433 
434     // We can safely set the lenght of the vector here; the uninitialized memory
435     // is not exposed. If `read_into` succeeds, it will have overwritten all
436     // bytes. If not, an error is returned and the memory is never exposed.
437     unsafe { vendor_bytes.set_len(vendor_len as usize); }
438     try!(input.read_into(&mut vendor_bytes));
439     let vendor = try!(String::from_utf8(vendor_bytes));
440 
441     // Next up is the number of comments. Because every comment is at least 4
442     // bytes to indicate its length, there cannot be more comments than the
443     // length of the block divided by 4. This is only an upper bound to ensure
444     // that we don't allocate a big vector, to protect against DoS attacks.
445     let mut comments_len = try!(input.read_le_u32());
446     if comments_len >= length / 4 {
447         return fmt_err("too many entries for Vorbis comment block")
448     }
449     let mut comments = Vec::with_capacity(comments_len as usize);
450 
451     let mut bytes_left = length - 8 - vendor_len;
452 
453     // For every comment, there is a length-prefixed string of the form
454     // "NAME=value".
455     while bytes_left >= 4 && comments.len() < comments_len as usize {
456         let comment_len = try!(input.read_le_u32());
457         bytes_left -= 4;
458 
459         if comment_len > bytes_left {
460             return fmt_err("Vorbis comment too long for Vorbis comment block")
461         }
462 
463         // Some older versions of libflac allowed writing zero-length Vorbis
464         // comments. ALthough such files are invalid, they do occur in the wild,
465         // so we skip over the empty comment.
466         if comment_len == 0 {
467             // Does not overflow because `comments_len > comments.len() >= 0`.
468             comments_len -= 1;
469             continue;
470         }
471 
472         // For the same reason as above, setting the length is safe here.
473         let mut comment_bytes = Vec::with_capacity(comment_len as usize);
474         unsafe { comment_bytes.set_len(comment_len as usize); }
475         try!(input.read_into(&mut comment_bytes));
476 
477         bytes_left -= comment_len;
478 
479         if let Some(sep_index) = comment_bytes.iter().position(|&x| x == b'=') {
480             {
481                 let name_bytes = &comment_bytes[..sep_index];
482 
483                 // According to the Vorbis spec, the field name may consist of ascii
484                 // bytes 0x20 through 0x7d, 0x3d (`=`) excluded. Verifying this has
485                 // the advantage that if the check passes, the result is valid
486                 // UTF-8, so the conversion to string will not fail.
487                 if name_bytes.iter().any(|&x| x < 0x20 || x > 0x7d) {
488                     return fmt_err("Vorbis comment field name contains invalid byte")
489                 }
490             }
491 
492             let comment = try!(String::from_utf8(comment_bytes));
493             comments.push((comment, sep_index));
494         } else {
495             return fmt_err("Vorbis comment does not contain '='")
496         }
497     }
498 
499     if bytes_left != 0 {
500         return fmt_err("Vorbis comment block has excess data")
501     }
502 
503     if comments.len() != comments_len as usize {
504         return fmt_err("Vorbis comment block contains wrong number of entries")
505     }
506 
507     let vorbis_comment = VorbisComment {
508         vendor: vendor,
509         comments: comments,
510     };
511 
512     Ok(vorbis_comment)
513 }
514 
read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()>515 fn read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()> {
516     // The specification dictates that all bits of the padding block must be 0.
517     // However, the reference implementation does not issue an error when this
518     // is not the case, and frankly, when you are going to skip over these
519     // bytes and do nothing with them whatsoever, why waste all those CPU
520     // cycles checking that the padding is valid?
521     Ok(try!(input.skip(length)))
522 }
523 
read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u32, Vec<u8>)>524 fn read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u32, Vec<u8>)> {
525     if length < 4 {
526         return fmt_err("application block length must be at least 4 bytes")
527     }
528 
529     // Reject large application blocks to avoid memory-based denial-
530     // of-service attacks. See also the more elaborate motivation in
531     // `read_vorbis_comment_block()`.
532     if length > 10 * 1024 * 1024 {
533         let msg = "application blocks larger than 10 MiB are not supported";
534         return Err(Error::Unsupported(msg))
535     }
536 
537     let id = try!(input.read_be_u32());
538 
539     // Four bytes of the block have been used for the ID, the rest is payload.
540     // Create a vector of uninitialized memory, and read the block into it. The
541     // uninitialized memory is never exposed: read_into will either fill the
542     // buffer completely, or return an err, in which case the memory is not
543     // exposed.
544     let mut data = Vec::with_capacity(length as usize - 4);
545     unsafe { data.set_len(length as usize - 4); }
546     try!(input.read_into(&mut data));
547 
548     Ok((id, data))
549 }
550 
551 /// Reads metadata blocks from a stream and exposes them as an iterator.
552 ///
553 /// It is assumed that the next byte that the reader will read, is the first
554 /// byte of a metadata block header. This means that the iterator will yield at
555 /// least a single value. If the iterator ever yields an error, then no more
556 /// data will be read thereafter, and the next value will be `None`.
557 pub struct MetadataBlockReader<R: ReadBytes> {
558     input: R,
559     done: bool,
560 }
561 
562 /// Either a `MetadataBlock` or an `Error`.
563 pub type MetadataBlockResult = Result<MetadataBlock>;
564 
565 impl<R: ReadBytes> MetadataBlockReader<R> {
566     /// Creates a metadata block reader that will yield at least one element.
new(input: R) -> MetadataBlockReader<R>567     pub fn new(input: R) -> MetadataBlockReader<R> {
568         MetadataBlockReader {
569             input: input,
570             done: false,
571         }
572     }
573 
574     #[inline]
read_next(&mut self) -> MetadataBlockResult575     fn read_next(&mut self) -> MetadataBlockResult {
576         let header = try!(read_metadata_block_header(&mut self.input));
577         let block = try!(read_metadata_block(&mut self.input, header.block_type, header.length));
578         self.done = header.is_last;
579         Ok(block)
580     }
581 }
582 
583 impl<R: ReadBytes> Iterator for MetadataBlockReader<R> {
584     type Item = MetadataBlockResult;
585 
586     #[inline]
next(&mut self) -> Option<MetadataBlockResult>587     fn next(&mut self) -> Option<MetadataBlockResult> {
588         if self.done {
589             None
590         } else {
591             let block = self.read_next();
592 
593             // After a failure, no more attempts to read will be made,
594             // because we don't know where we are in the stream.
595             if !block.is_ok() {
596                 self.done = true;
597             }
598 
599             Some(block)
600         }
601     }
602 
603     #[inline]
size_hint(&self) -> (usize, Option<usize>)604     fn size_hint(&self) -> (usize, Option<usize>) {
605         // When done, there will be no more blocks,
606         // when not done, there will be at least one more.
607         if self.done { (0, Some(0)) } else { (1, None) }
608     }
609 }
610