1 // Claxon -- A FLAC decoding library in Rust
2 // Copyright 2014 Ruud van Asseldonk
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // A copy of the License has been included in the root of the repository.
7
8 //! The `metadata` module deals with metadata at the beginning of a FLAC stream.
9
10 use error::{Error, Result, fmt_err};
11 use input::ReadBytes;
12 use std::str;
13 use std::slice;
14
15 #[derive(Clone, Copy)]
16 struct MetadataBlockHeader {
17 is_last: bool,
18 block_type: u8,
19 length: u32,
20 }
21
22 /// The streaminfo metadata block, with important information about the stream.
23 #[derive(Clone, Copy, Debug)]
24 pub struct StreamInfo {
25 // TODO: "size" would better be called "duration" for clarity.
26 /// The minimum block size (in inter-channel samples) used in the stream.
27 ///
28 /// This number is independent of the number of channels. To get the minimum
29 /// block duration in seconds, divide this by the sample rate.
30 pub min_block_size: u16,
31 /// The maximum block size (in inter-channel samples) used in the stream.
32 ///
33 /// This number is independent of the number of channels. To get the
34 /// maximum block duration in seconds, divide by the sample rate. To avoid
35 /// allocations during decoding, a buffer of this size times the number of
36 /// channels can be allocated up front and passed into
37 /// `FrameReader::read_next_or_eof()`.
38 pub max_block_size: u16,
39 /// The minimum frame size (in bytes) used in the stream.
40 pub min_frame_size: Option<u32>,
41 /// The maximum frame size (in bytes) used in the stream.
42 pub max_frame_size: Option<u32>,
43 /// The sample rate in Hz.
44 pub sample_rate: u32,
45 /// The number of channels.
46 pub channels: u32,
47 /// The number of bits per sample.
48 pub bits_per_sample: u32,
49 /// The total number of inter-channel samples in the stream.
50 // TODO: rename to `duration` for clarity?
51 pub samples: Option<u64>,
52 /// MD5 signature of the unencoded audio data.
53 pub md5sum: [u8; 16],
54 }
55
56 /// A seek point in the seek table.
57 #[derive(Clone, Copy)]
58 pub struct SeekPoint {
59 /// Sample number of the first sample in the target frame, or 2<sup>64</sup> - 1 for a placeholder.
60 pub sample: u64,
61 /// Offset in bytes from the first byte of the first frame header to the first byte of the
62 /// target frame's header.
63 pub offset: u64,
64 /// Number of samples in the target frame.
65 pub samples: u16,
66 }
67
68 /// A seek table to aid seeking in the stream.
69 pub struct SeekTable {
70 /// The seek points, sorted in ascending order by sample number.
71 #[allow(dead_code)] // TODO: Implement seeking.
72 seekpoints: Vec<SeekPoint>,
73 }
74
75 /// Vorbis comments, also known as FLAC tags (e.g. artist, title, etc.).
76 pub struct VorbisComment {
77 /// The “vendor string”, chosen by the encoder vendor.
78 ///
79 /// This string usually contains the name and version of the program that
80 /// encoded the FLAC stream, such as `reference libFLAC 1.3.2 20170101`
81 /// or `Lavf57.25.100`.
82 pub vendor: String,
83
84 /// Name-value pairs of Vorbis comments, such as `ARTIST=Queen`.
85 ///
86 /// This struct stores a raw low-level representation of tags. Use
87 /// `FlacReader::tags()` for a friendlier iterator. The tuple consists of
88 /// the string in `"NAME=value"` format, and the index of the `'='` into
89 /// that string.
90 ///
91 /// The name is supposed to be interpreted case-insensitively, and is
92 /// guaranteed to consist of ASCII characters. Claxon does not normalize
93 /// the casing of the name. Use `metadata::GetTag` to do a case-insensitive
94 /// lookup.
95 ///
96 /// Names need not be unique. For instance, multiple `ARTIST` comments might
97 /// be present on a collaboration track.
98 ///
99 /// See <https://www.xiph.org/vorbis/doc/v-comment.html> for more details.
100 pub comments: Vec<(String, usize)>,
101 }
102
103 /// A metadata about the flac stream.
104 pub enum MetadataBlock {
105 /// A stream info block.
106 StreamInfo(StreamInfo),
107 /// A padding block (with no meaningful data).
108 Padding {
109 /// The number of padding bytes.
110 length: u32,
111 },
112 /// An application block with application-specific data.
113 Application {
114 /// The registered application ID.
115 id: u32,
116 /// The contents of the application block.
117 data: Vec<u8>,
118 },
119 /// A seek table block.
120 SeekTable(SeekTable),
121 /// A Vorbis comment block, also known as FLAC tags.
122 VorbisComment(VorbisComment),
123 /// A CUE sheet block.
124 CueSheet, // TODO
125 /// A picture block.
126 Picture, // TODO
127 /// A block with a reserved block type, not supported by this library.
128 Reserved,
129 }
130
131 /// Iterates over Vorbis comments (FLAC tags) in a FLAC stream.
132 ///
133 /// See `FlacReader::tags()` for more details.
134 pub struct Tags<'a> {
135 /// The underlying iterator.
136 iter: slice::Iter<'a, (String, usize)>,
137 }
138
139 impl<'a> Tags<'a> {
140 /// Returns a new `Tags` iterator.
141 #[inline]
new(comments: &'a [(String, usize)]) -> Tags<'a>142 pub fn new(comments: &'a [(String, usize)]) -> Tags<'a> {
143 Tags {
144 iter: comments.iter(),
145 }
146 }
147 }
148
149 impl<'a> Iterator for Tags<'a> {
150 type Item = (&'a str, &'a str);
151
152 #[inline]
next(&mut self) -> Option<(&'a str, &'a str)>153 fn next(&mut self) -> Option<(&'a str, &'a str)> {
154 return self.iter.next().map(|&(ref comment, sep_idx)| {
155 (&comment[..sep_idx], &comment[sep_idx+1..])
156 })
157 }
158
159 #[inline]
size_hint(&self) -> (usize, Option<usize>)160 fn size_hint(&self) -> (usize, Option<usize>) {
161 self.iter.size_hint()
162 }
163 }
164
165 impl<'a> ExactSizeIterator for Tags<'a> {}
166
167 /// Iterates over Vorbis comments looking for a specific one; returns its values as `&str`.
168 ///
169 /// See `FlacReader::get_tag()` for more details.
170 pub struct GetTag<'a> {
171 /// The Vorbis comments to search through.
172 vorbis_comments: &'a [(String, usize)],
173 /// The tag to look for.
174 needle: &'a str,
175 /// The index of the (name, value) pair that should be inspected next.
176 index: usize,
177 }
178
179 impl<'a> GetTag<'a> {
180 /// Returns a new `GetTag` iterator.
181 #[inline]
new(vorbis_comments: &'a [(String, usize)], needle: &'a str) -> GetTag<'a>182 pub fn new(vorbis_comments: &'a [(String, usize)], needle: &'a str) -> GetTag<'a> {
183 GetTag {
184 vorbis_comments: vorbis_comments,
185 needle: needle,
186 index: 0,
187 }
188 }
189 }
190
191 impl<'a> Iterator for GetTag<'a> {
192 type Item = &'a str;
193
194 #[inline]
next(&mut self) -> Option<&'a str>195 fn next(&mut self) -> Option<&'a str> {
196 // This import is actually required on Rust 1.13.
197 #[allow(unused_imports)]
198 use std::ascii::AsciiExt;
199
200 while self.index < self.vorbis_comments.len() {
201 let (ref comment, sep_idx) = self.vorbis_comments[self.index];
202 self.index += 1;
203
204 if comment[..sep_idx].eq_ignore_ascii_case(self.needle) {
205 return Some(&comment[sep_idx + 1..])
206 }
207 }
208
209 return None
210 }
211 }
212
213 #[inline]
read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlockHeader>214 fn read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlockHeader> {
215 let byte = try!(input.read_u8());
216
217 // The first bit specifies whether this is the last block, the next 7 bits
218 // specify the type of the metadata block to follow.
219 let is_last = (byte >> 7) == 1;
220 let block_type = byte & 0b0111_1111;
221
222 // The length field is 24 bits, or 3 bytes.
223 let length = try!(input.read_be_u24());
224
225 let header = MetadataBlockHeader {
226 is_last: is_last,
227 block_type: block_type,
228 length: length,
229 };
230 Ok(header)
231 }
232
233 /// Read a single metadata block header and body from the input.
234 ///
235 /// When reading a regular flac stream, there is no need to use this function
236 /// directly; constructing a `FlacReader` will read the header and its metadata
237 /// blocks.
238 ///
239 /// When a flac stream is embedded in a container format, this function can be
240 /// used to decode a single metadata block. For instance, the Ogg format embeds
241 /// metadata blocks including their header verbatim in packets. This function
242 /// can be used to decode that raw data.
243 #[inline]
read_metadata_block_with_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlock>244 pub fn read_metadata_block_with_header<R: ReadBytes>(input: &mut R)
245 -> Result<MetadataBlock> {
246 let header = try!(read_metadata_block_header(input));
247 read_metadata_block(input, header.block_type, header.length)
248 }
249
250 /// Read a single metadata block of the given type and length from the input.
251 ///
252 /// When reading a regular flac stream, there is no need to use this function
253 /// directly; constructing a `FlacReader` will read the header and its metadata
254 /// blocks.
255 ///
256 /// When a flac stream is embedded in a container format, this function can be
257 /// used to decode a single metadata block. For instance, the MP4 format sports
258 /// a “FLAC Specific Box” which contains the block type and the raw data. This
259 /// function can be used to decode that raw data.
260 #[inline]
read_metadata_block<R: ReadBytes>(input: &mut R, block_type: u8, length: u32) -> Result<MetadataBlock>261 pub fn read_metadata_block<R: ReadBytes>(input: &mut R,
262 block_type: u8,
263 length: u32)
264 -> Result<MetadataBlock> {
265 match block_type {
266 0 => {
267 // The streaminfo block has a fixed size of 34 bytes.
268 if length == 34 {
269 let streaminfo = try!(read_streaminfo_block(input));
270 Ok(MetadataBlock::StreamInfo(streaminfo))
271 } else {
272 fmt_err("invalid streaminfo metadata block length")
273 }
274 }
275 1 => {
276 try!(read_padding_block(input, length));
277 Ok(MetadataBlock::Padding { length: length })
278 }
279 2 => {
280 let (id, data) = try!(read_application_block(input, length));
281 Ok(MetadataBlock::Application {
282 id: id,
283 data: data,
284 })
285 }
286 3 => {
287 // TODO: implement seektable reading. For now, pretend it is padding.
288 try!(input.skip(length));
289 Ok(MetadataBlock::Padding { length: length })
290 }
291 4 => {
292 let vorbis_comment = try!(read_vorbis_comment_block(input, length));
293 Ok(MetadataBlock::VorbisComment(vorbis_comment))
294 }
295 5 => {
296 // TODO: implement CUE sheet reading. For now, pretend it is padding.
297 try!(input.skip(length));
298 Ok(MetadataBlock::Padding { length: length })
299 }
300 6 => {
301 // TODO: implement picture reading. For now, pretend it is padding.
302 try!(input.skip(length));
303 Ok(MetadataBlock::Padding { length: length })
304 }
305 127 => {
306 // This code is invalid to avoid confusion with a frame sync code.
307 fmt_err("invalid metadata block type")
308 }
309 _ => {
310 // Any other block type is 'reserved' at the moment of writing. The
311 // reference implementation reads it as an 'unknown' block. That is
312 // one way of handling it, but maybe there should be some kind of
313 // 'strict' mode (configurable at compile time?) so that this can
314 // be an error if desired.
315 try!(input.skip(length));
316 Ok(MetadataBlock::Reserved)
317 }
318 }
319 }
320
read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo>321 fn read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo> {
322 let min_block_size = try!(input.read_be_u16());
323 let max_block_size = try!(input.read_be_u16());
324
325 // The frame size fields are 24 bits, or 3 bytes.
326 let min_frame_size = try!(input.read_be_u24());
327 let max_frame_size = try!(input.read_be_u24());
328
329 // Next up are 20 bits that determine the sample rate.
330 let sample_rate_msb = try!(input.read_be_u16());
331 let sample_rate_lsb = try!(input.read_u8());
332
333 // Stitch together the value from the first 16 bits,
334 // and then the 4 most significant bits of the next byte.
335 let sample_rate = (sample_rate_msb as u32) << 4 | (sample_rate_lsb as u32) >> 4;
336
337 // Next three bits are the number of channels - 1. Mask them out and add 1.
338 let n_channels_bps = sample_rate_lsb;
339 let n_channels = ((n_channels_bps >> 1) & 0b0000_0111) + 1;
340
341 // The final bit is the most significant of bits per sample - 1. Bits per
342 // sample - 1 is 5 bits in total.
343 let bps_msb = n_channels_bps & 1;
344 let bps_lsb_n_samples = try!(input.read_u8());
345
346 // Stitch together these values, add 1 because # - 1 is stored.
347 let bits_per_sample = (bps_msb << 4 | (bps_lsb_n_samples >> 4)) + 1;
348
349 // Number of samples in 36 bits, we have 4 already, 32 to go.
350 let n_samples_msb = bps_lsb_n_samples & 0b0000_1111;
351 let n_samples_lsb = try!(input.read_be_u32());
352 let n_samples = (n_samples_msb as u64) << 32 | n_samples_lsb as u64;
353
354 // Next are 128 bits (16 bytes) of MD5 signature.
355 let mut md5sum = [0u8; 16];
356 try!(input.read_into(&mut md5sum));
357
358 // Lower bounds can never be larger than upper bounds. Note that 0 indicates
359 // unknown for the frame size. Also, the block size must be at least 16.
360 if min_block_size > max_block_size {
361 return fmt_err("inconsistent bounds, min block size > max block size");
362 }
363 if min_block_size < 16 {
364 return fmt_err("invalid block size, must be at least 16");
365 }
366 if min_frame_size > max_frame_size && max_frame_size != 0 {
367 return fmt_err("inconsistent bounds, min frame size > max frame size");
368 }
369
370 // A sample rate of 0 is invalid, and the maximum sample rate is limited by
371 // the structure of the frame headers to 655350 Hz.
372 if sample_rate == 0 || sample_rate > 655350 {
373 return fmt_err("invalid sample rate");
374 }
375
376 let stream_info = StreamInfo {
377 min_block_size: min_block_size,
378 max_block_size: max_block_size,
379 min_frame_size: if min_frame_size == 0 {
380 None
381 } else {
382 Some(min_frame_size)
383 },
384 max_frame_size: if max_frame_size == 0 {
385 None
386 } else {
387 Some(max_frame_size)
388 },
389 sample_rate: sample_rate,
390 channels: n_channels as u32,
391 bits_per_sample: bits_per_sample as u32,
392 samples: if n_samples == 0 {
393 None
394 } else {
395 Some(n_samples)
396 },
397 md5sum: md5sum,
398 };
399 Ok(stream_info)
400 }
401
read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<VorbisComment>402 fn read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<VorbisComment> {
403 if length < 8 {
404 // We expect at a minimum a 32-bit vendor string length, and a 32-bit
405 // comment count.
406 return fmt_err("Vorbis comment block is too short")
407 }
408
409 // Fail if the length of the Vorbis comment block is larger than 1 MiB. This
410 // block is full of length-prefixed strings for which we allocate memory up
411 // front. If there were no limit on these, a maliciously crafted file could
412 // cause OOM by claiming to contain large strings. But at least the strings
413 // cannot be longer than the size of the Vorbis comment block, and by
414 // limiting the size of that block, we can mitigate such DoS attacks.
415 //
416 // The typical size of a the Vorbis comment block is 1 KiB; on a corpus of
417 // real-world flac files, the 0.05 and 0.95 quantiles were 792 and 1257
418 // bytes respectively, with even the 0.99 quantile below 2 KiB. The only
419 // reason for having a large Vorbis comment block is when cover art is
420 // incorrectly embedded there, but the Vorbis comment block is not the right
421 // place for that anyway.
422 if length > 10 * 1024 * 1024 {
423 let msg = "Vorbis comment blocks larger than 10 MiB are not supported";
424 return Err(Error::Unsupported(msg))
425 }
426
427 // The Vorbis comment block starts with a length-prefixed "vendor string".
428 // It cannot be larger than the block length - 8, because there are the
429 // 32-bit vendor string length, and comment count.
430 let vendor_len = try!(input.read_le_u32());
431 if vendor_len > length - 8 { return fmt_err("vendor string too long") }
432 let mut vendor_bytes = Vec::with_capacity(vendor_len as usize);
433
434 // We can safely set the lenght of the vector here; the uninitialized memory
435 // is not exposed. If `read_into` succeeds, it will have overwritten all
436 // bytes. If not, an error is returned and the memory is never exposed.
437 unsafe { vendor_bytes.set_len(vendor_len as usize); }
438 try!(input.read_into(&mut vendor_bytes));
439 let vendor = try!(String::from_utf8(vendor_bytes));
440
441 // Next up is the number of comments. Because every comment is at least 4
442 // bytes to indicate its length, there cannot be more comments than the
443 // length of the block divided by 4. This is only an upper bound to ensure
444 // that we don't allocate a big vector, to protect against DoS attacks.
445 let mut comments_len = try!(input.read_le_u32());
446 if comments_len >= length / 4 {
447 return fmt_err("too many entries for Vorbis comment block")
448 }
449 let mut comments = Vec::with_capacity(comments_len as usize);
450
451 let mut bytes_left = length - 8 - vendor_len;
452
453 // For every comment, there is a length-prefixed string of the form
454 // "NAME=value".
455 while bytes_left >= 4 && comments.len() < comments_len as usize {
456 let comment_len = try!(input.read_le_u32());
457 bytes_left -= 4;
458
459 if comment_len > bytes_left {
460 return fmt_err("Vorbis comment too long for Vorbis comment block")
461 }
462
463 // Some older versions of libflac allowed writing zero-length Vorbis
464 // comments. ALthough such files are invalid, they do occur in the wild,
465 // so we skip over the empty comment.
466 if comment_len == 0 {
467 // Does not overflow because `comments_len > comments.len() >= 0`.
468 comments_len -= 1;
469 continue;
470 }
471
472 // For the same reason as above, setting the length is safe here.
473 let mut comment_bytes = Vec::with_capacity(comment_len as usize);
474 unsafe { comment_bytes.set_len(comment_len as usize); }
475 try!(input.read_into(&mut comment_bytes));
476
477 bytes_left -= comment_len;
478
479 if let Some(sep_index) = comment_bytes.iter().position(|&x| x == b'=') {
480 {
481 let name_bytes = &comment_bytes[..sep_index];
482
483 // According to the Vorbis spec, the field name may consist of ascii
484 // bytes 0x20 through 0x7d, 0x3d (`=`) excluded. Verifying this has
485 // the advantage that if the check passes, the result is valid
486 // UTF-8, so the conversion to string will not fail.
487 if name_bytes.iter().any(|&x| x < 0x20 || x > 0x7d) {
488 return fmt_err("Vorbis comment field name contains invalid byte")
489 }
490 }
491
492 let comment = try!(String::from_utf8(comment_bytes));
493 comments.push((comment, sep_index));
494 } else {
495 return fmt_err("Vorbis comment does not contain '='")
496 }
497 }
498
499 if bytes_left != 0 {
500 return fmt_err("Vorbis comment block has excess data")
501 }
502
503 if comments.len() != comments_len as usize {
504 return fmt_err("Vorbis comment block contains wrong number of entries")
505 }
506
507 let vorbis_comment = VorbisComment {
508 vendor: vendor,
509 comments: comments,
510 };
511
512 Ok(vorbis_comment)
513 }
514
read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()>515 fn read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()> {
516 // The specification dictates that all bits of the padding block must be 0.
517 // However, the reference implementation does not issue an error when this
518 // is not the case, and frankly, when you are going to skip over these
519 // bytes and do nothing with them whatsoever, why waste all those CPU
520 // cycles checking that the padding is valid?
521 Ok(try!(input.skip(length)))
522 }
523
read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u32, Vec<u8>)>524 fn read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u32, Vec<u8>)> {
525 if length < 4 {
526 return fmt_err("application block length must be at least 4 bytes")
527 }
528
529 // Reject large application blocks to avoid memory-based denial-
530 // of-service attacks. See also the more elaborate motivation in
531 // `read_vorbis_comment_block()`.
532 if length > 10 * 1024 * 1024 {
533 let msg = "application blocks larger than 10 MiB are not supported";
534 return Err(Error::Unsupported(msg))
535 }
536
537 let id = try!(input.read_be_u32());
538
539 // Four bytes of the block have been used for the ID, the rest is payload.
540 // Create a vector of uninitialized memory, and read the block into it. The
541 // uninitialized memory is never exposed: read_into will either fill the
542 // buffer completely, or return an err, in which case the memory is not
543 // exposed.
544 let mut data = Vec::with_capacity(length as usize - 4);
545 unsafe { data.set_len(length as usize - 4); }
546 try!(input.read_into(&mut data));
547
548 Ok((id, data))
549 }
550
551 /// Reads metadata blocks from a stream and exposes them as an iterator.
552 ///
553 /// It is assumed that the next byte that the reader will read, is the first
554 /// byte of a metadata block header. This means that the iterator will yield at
555 /// least a single value. If the iterator ever yields an error, then no more
556 /// data will be read thereafter, and the next value will be `None`.
557 pub struct MetadataBlockReader<R: ReadBytes> {
558 input: R,
559 done: bool,
560 }
561
562 /// Either a `MetadataBlock` or an `Error`.
563 pub type MetadataBlockResult = Result<MetadataBlock>;
564
565 impl<R: ReadBytes> MetadataBlockReader<R> {
566 /// Creates a metadata block reader that will yield at least one element.
new(input: R) -> MetadataBlockReader<R>567 pub fn new(input: R) -> MetadataBlockReader<R> {
568 MetadataBlockReader {
569 input: input,
570 done: false,
571 }
572 }
573
574 #[inline]
read_next(&mut self) -> MetadataBlockResult575 fn read_next(&mut self) -> MetadataBlockResult {
576 let header = try!(read_metadata_block_header(&mut self.input));
577 let block = try!(read_metadata_block(&mut self.input, header.block_type, header.length));
578 self.done = header.is_last;
579 Ok(block)
580 }
581 }
582
583 impl<R: ReadBytes> Iterator for MetadataBlockReader<R> {
584 type Item = MetadataBlockResult;
585
586 #[inline]
next(&mut self) -> Option<MetadataBlockResult>587 fn next(&mut self) -> Option<MetadataBlockResult> {
588 if self.done {
589 None
590 } else {
591 let block = self.read_next();
592
593 // After a failure, no more attempts to read will be made,
594 // because we don't know where we are in the stream.
595 if !block.is_ok() {
596 self.done = true;
597 }
598
599 Some(block)
600 }
601 }
602
603 #[inline]
size_hint(&self) -> (usize, Option<usize>)604 fn size_hint(&self) -> (usize, Option<usize>) {
605 // When done, there will be no more blocks,
606 // when not done, there will be at least one more.
607 if self.done { (0, Some(0)) } else { (1, None) }
608 }
609 }
610