1 // Claxon -- A FLAC decoding library in Rust
2 // Copyright 2014 Ruud van Asseldonk
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // A copy of the License has been included in the root of the repository.
7 
8 //! Exposes traits that help reading data at the bit level with low overhead.
9 //!
10 //! The traits in this module deal with reading bytes (with a given endianness)
11 //! from a buffered reader, in such a way that still allows efficient
12 //! checksumming of the data read. There is also a bitstream which is used
13 //! internally to read the bitstream.
14 
15 use std::cmp;
16 use std::io;
17 
18 /// Similar to `std::io::BufRead`, but more performant.
19 ///
20 /// There is no simple way to wrap a standard `BufRead` such that it can compute
21 /// checksums on consume. This is really something that needs a less restrictive
22 /// interface. Apart from enabling checksum computations, this buffered reader
23 /// has some convenience functions.
24 pub struct BufferedReader<R: io::Read> {
25     /// The wrapped reader.
26     inner: R,
27 
28     /// The buffer that holds data read from the inner reader.
29     buf: Box<[u8]>,
30 
31     /// The index of the first byte in the buffer which has not been consumed.
32     pos: u32,
33 
34     /// The number of bytes of the buffer which have meaningful content.
35     num_valid: u32,
36 }
37 
38 impl<R: io::Read> BufferedReader<R> {
39 
40     /// Wrap the reader in a new buffered reader.
new(inner: R) -> BufferedReader<R>41     pub fn new(inner: R) -> BufferedReader<R> {
42         // Use a large-ish buffer size, such that system call overhead is
43         // negligible when replenishing the buffer. However, when fuzzing we
44         // want to have small samples, and still trigger the case where we have
45         // to replenish the buffer, so use a smaller buffer size there.
46         #[cfg(not(fuzzing))]
47         const CAPACITY: usize = 2048;
48 
49         #[cfg(fuzzing)]
50         const CAPACITY: usize = 31;
51 
52         let buf = vec![0; CAPACITY].into_boxed_slice();
53         BufferedReader {
54             inner: inner,
55             buf: buf,
56             pos: 0,
57             num_valid: 0,
58         }
59     }
60 
61     /// Destroys the buffered reader, returning the wrapped reader.
62     ///
63     /// Anything in the buffer will be lost.
into_inner(self) -> R64     pub fn into_inner(self) -> R {
65         self.inner
66     }
67 }
68 
69 
70 /// Provides convenience methods to make input less cumbersome.
71 pub trait ReadBytes {
72     /// Reads a single byte, failing on EOF.
read_u8(&mut self) -> io::Result<u8>73     fn read_u8(&mut self) -> io::Result<u8>;
74 
75     /// Reads a single byte, not failing on EOF.
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>76     fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>>;
77 
78     /// Reads until the provided buffer is full.
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>79     fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>;
80 
81     /// Skips over the specified number of bytes.
82     ///
83     /// For a buffered reader, this can help a lot by just bumping a pointer.
skip(&mut self, amount: u32) -> io::Result<()>84     fn skip(&mut self, amount: u32) -> io::Result<()>;
85 
86     /// Reads two bytes and interprets them as a big-endian 16-bit unsigned integer.
read_be_u16(&mut self) -> io::Result<u16>87     fn read_be_u16(&mut self) -> io::Result<u16> {
88         let b0 = try!(self.read_u8()) as u16;
89         let b1 = try!(self.read_u8()) as u16;
90         Ok(b0 << 8 | b1)
91     }
92 
93     /// Reads two bytes and interprets them as a big-endian 16-bit unsigned integer.
read_be_u16_or_eof(&mut self) -> io::Result<Option<u16>>94     fn read_be_u16_or_eof(&mut self) -> io::Result<Option<u16>> {
95         if let Some(b0) = try!(self.read_u8_or_eof()) {
96             if let Some(b1) = try!(self.read_u8_or_eof()) {
97                 return Ok(Some((b0 as u16) << 8 | (b1 as u16)));
98             }
99         }
100         Ok(None)
101     }
102 
103     /// Reads three bytes and interprets them as a big-endian 24-bit unsigned integer.
read_be_u24(&mut self) -> io::Result<u32>104     fn read_be_u24(&mut self) -> io::Result<u32> {
105         let b0 = try!(self.read_u8()) as u32;
106         let b1 = try!(self.read_u8()) as u32;
107         let b2 = try!(self.read_u8()) as u32;
108         Ok(b0 << 16 | b1 << 8 | b2)
109     }
110 
111     /// Reads four bytes and interprets them as a big-endian 32-bit unsigned integer.
read_be_u32(&mut self) -> io::Result<u32>112     fn read_be_u32(&mut self) -> io::Result<u32> {
113         let b0 = try!(self.read_u8()) as u32;
114         let b1 = try!(self.read_u8()) as u32;
115         let b2 = try!(self.read_u8()) as u32;
116         let b3 = try!(self.read_u8()) as u32;
117         Ok(b0 << 24 | b1 << 16 | b2 << 8 | b3)
118     }
119 
120     /// Reads four bytes and interprets them as a little-endian 32-bit unsigned integer.
read_le_u32(&mut self) -> io::Result<u32>121     fn read_le_u32(&mut self) -> io::Result<u32> {
122         let b0 = try!(self.read_u8()) as u32;
123         let b1 = try!(self.read_u8()) as u32;
124         let b2 = try!(self.read_u8()) as u32;
125         let b3 = try!(self.read_u8()) as u32;
126         Ok(b3 << 24 | b2 << 16 | b1 << 8 | b0)
127     }
128 }
129 
130 impl<R: io::Read> ReadBytes for BufferedReader<R>
131 {
132     #[inline(always)]
read_u8(&mut self) -> io::Result<u8>133     fn read_u8(&mut self) -> io::Result<u8> {
134         if self.pos == self.num_valid {
135             // The buffer was depleted, replenish it first.
136             self.pos = 0;
137             self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
138 
139             if self.num_valid == 0 {
140                 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
141                                           "Expected one more byte."))
142             }
143         }
144 
145         // At this point there is at least one more byte in the buffer, we
146         // checked that above. However, when using regular indexing, the
147         // compiler still inserts a bounds check here. It is safe to avoid it.
148         let byte = unsafe { *self.buf.get_unchecked(self.pos as usize) };
149         self.pos += 1;
150         Ok(byte)
151     }
152 
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>153     fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
154         if self.pos == self.num_valid {
155             // The buffer was depleted, try to replenish it first.
156             self.pos = 0;
157             self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
158 
159             if self.num_valid == 0 {
160                 return Ok(None);
161             }
162         }
163 
164         Ok(Some(try!(self.read_u8())))
165     }
166 
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>167     fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
168         let mut bytes_left = buffer.len();
169 
170         while bytes_left > 0 {
171             let from = buffer.len() - bytes_left;
172             let count = cmp::min(bytes_left, (self.num_valid - self.pos) as usize);
173             buffer[from..from + count].copy_from_slice(
174                 &self.buf[self.pos as usize..self.pos as usize + count]);
175             bytes_left -= count;
176             self.pos += count as u32;
177 
178             if bytes_left > 0 {
179                 // Replenish the buffer if there is more to be read.
180                 self.pos = 0;
181                 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
182                 if self.num_valid == 0 {
183                     return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
184                                               "Expected more bytes."))
185                 }
186             }
187         }
188 
189         Ok(())
190     }
191 
skip(&mut self, mut amount: u32) -> io::Result<()>192     fn skip(&mut self, mut amount: u32) -> io::Result<()> {
193         while amount > 0 {
194             let num_left = self.num_valid - self.pos;
195             let read_now = cmp::min(amount, num_left);
196             self.pos += read_now;
197             amount -= read_now;
198 
199             if amount > 0 {
200                 // If there is more to skip, refill the buffer first.
201                 self.pos = 0;
202                 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
203 
204                 if self.num_valid == 0 {
205                     return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
206                                               "Expected more bytes."))
207                 }
208             }
209         }
210         Ok(())
211     }
212 }
213 
214 impl<'r, R: ReadBytes> ReadBytes for &'r mut R {
215 
216     #[inline(always)]
read_u8(&mut self) -> io::Result<u8>217     fn read_u8(&mut self) -> io::Result<u8> {
218         (*self).read_u8()
219     }
220 
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>221     fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
222         (*self).read_u8_or_eof()
223     }
224 
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>225     fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
226         (*self).read_into(buffer)
227     }
228 
skip(&mut self, amount: u32) -> io::Result<()>229     fn skip(&mut self, amount: u32) -> io::Result<()> {
230         (*self).skip(amount)
231     }
232 }
233 
234 impl<T: AsRef<[u8]>> ReadBytes for io::Cursor<T> {
235 
read_u8(&mut self) -> io::Result<u8>236     fn read_u8(&mut self) -> io::Result<u8> {
237         let pos = self.position();
238         if pos < self.get_ref().as_ref().len() as u64 {
239             self.set_position(pos + 1);
240             Ok(self.get_ref().as_ref()[pos as usize])
241         } else {
242             Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
243         }
244     }
245 
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>246     fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
247         let pos = self.position();
248         if pos < self.get_ref().as_ref().len() as u64 {
249             self.set_position(pos + 1);
250             Ok(Some(self.get_ref().as_ref()[pos as usize]))
251         } else {
252             Ok(None)
253         }
254     }
255 
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>256     fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
257         let pos = self.position();
258         if pos + buffer.len() as u64 <= self.get_ref().as_ref().len() as u64 {
259             let start = pos as usize;
260             let end = pos as usize + buffer.len();
261             buffer.copy_from_slice(&self.get_ref().as_ref()[start..end]);
262             self.set_position(pos + buffer.len() as u64);
263             Ok(())
264         } else {
265             Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
266         }
267     }
268 
skip(&mut self, amount: u32) -> io::Result<()>269     fn skip(&mut self, amount: u32) -> io::Result<()> {
270         let pos = self.position();
271         if pos + amount as u64 <= self.get_ref().as_ref().len() as u64 {
272             self.set_position(pos + amount as u64);
273             Ok(())
274         } else {
275             Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
276         }
277     }
278 }
279 
280 #[test]
verify_read_into_buffered_reader()281 fn verify_read_into_buffered_reader() {
282     let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]));
283     let mut buf1 = [0u8; 3];
284     let mut buf2 = [0u8; 5];
285     let mut buf3 = [0u8; 2];
286     reader.read_into(&mut buf1).ok().unwrap();
287     reader.read_into(&mut buf2).ok().unwrap();
288     assert!(reader.read_into(&mut buf3).is_err());
289     assert_eq!(&buf1[..], &[2u8, 3, 5]);
290     assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
291 }
292 
293 #[test]
verify_read_into_cursor()294 fn verify_read_into_cursor() {
295     let mut cursor = io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]);
296     let mut buf1 = [0u8; 3];
297     let mut buf2 = [0u8; 5];
298     let mut buf3 = [0u8; 2];
299     cursor.read_into(&mut buf1).ok().unwrap();
300     cursor.read_into(&mut buf2).ok().unwrap();
301     assert!(cursor.read_into(&mut buf3).is_err());
302     assert_eq!(&buf1[..], &[2u8, 3, 5]);
303     assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
304 }
305 
306 #[test]
verify_read_u8_buffered_reader()307 fn verify_read_u8_buffered_reader() {
308     let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 2, 129, 89, 122]));
309     assert_eq!(reader.read_u8().unwrap(), 0);
310     assert_eq!(reader.read_u8().unwrap(), 2);
311     assert_eq!(reader.read_u8().unwrap(), 129);
312     assert_eq!(reader.read_u8().unwrap(), 89);
313     assert_eq!(reader.read_u8_or_eof().unwrap(), Some(122));
314     assert_eq!(reader.read_u8_or_eof().unwrap(), None);
315     assert!(reader.read_u8().is_err());
316 }
317 
318 #[test]
verify_read_u8_cursor()319 fn verify_read_u8_cursor() {
320     let mut reader = io::Cursor::new(vec![0u8, 2, 129, 89, 122]);
321     assert_eq!(reader.read_u8().unwrap(), 0);
322     assert_eq!(reader.read_u8().unwrap(), 2);
323     assert_eq!(reader.read_u8().unwrap(), 129);
324     assert_eq!(reader.read_u8().unwrap(), 89);
325     assert_eq!(reader.read_u8_or_eof().unwrap(), Some(122));
326     assert_eq!(reader.read_u8_or_eof().unwrap(), None);
327     assert!(reader.read_u8().is_err());
328 }
329 
330 #[test]
verify_read_be_u16_buffered_reader()331 fn verify_read_be_u16_buffered_reader() {
332     let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 2, 129, 89, 122]));
333     assert_eq!(reader.read_be_u16().ok(), Some(2));
334     assert_eq!(reader.read_be_u16().ok(), Some(33113));
335     assert!(reader.read_be_u16().is_err());
336 }
337 
338 #[test]
verify_read_be_u16_cursor()339 fn verify_read_be_u16_cursor() {
340     let mut cursor = io::Cursor::new(vec![0u8, 2, 129, 89, 122]);
341     assert_eq!(cursor.read_be_u16().ok(), Some(2));
342     assert_eq!(cursor.read_be_u16().ok(), Some(33113));
343     assert!(cursor.read_be_u16().is_err());
344 }
345 
346 #[test]
verify_read_be_u24_buffered_reader()347 fn verify_read_be_u24_buffered_reader() {
348     let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 0, 2, 0x8f, 0xff, 0xf3, 122]));
349     assert_eq!(reader.read_be_u24().ok(), Some(2));
350     assert_eq!(reader.read_be_u24().ok(), Some(9_437_171));
351     assert!(reader.read_be_u24().is_err());
352 }
353 
354 #[test]
verify_read_be_u24_cursor()355 fn verify_read_be_u24_cursor() {
356     let mut cursor = io::Cursor::new(vec![0u8, 0, 2, 0x8f, 0xff, 0xf3, 122]);
357     assert_eq!(cursor.read_be_u24().ok(), Some(2));
358     assert_eq!(cursor.read_be_u24().ok(), Some(9_437_171));
359     assert!(cursor.read_be_u24().is_err());
360 }
361 
362 #[test]
verify_read_be_u32_buffered_reader()363 fn verify_read_be_u32_buffered_reader() {
364     let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 0, 0, 2, 0x80, 0x01, 0xff, 0xe9, 0]));
365     assert_eq!(reader.read_be_u32().ok(), Some(2));
366     assert_eq!(reader.read_be_u32().ok(), Some(2_147_614_697));
367     assert!(reader.read_be_u32().is_err());
368 }
369 
370 #[test]
verify_read_be_u32_cursor()371 fn verify_read_be_u32_cursor() {
372     let mut cursor = io::Cursor::new(vec![0u8, 0, 0, 2, 0x80, 0x01, 0xff, 0xe9, 0]);
373     assert_eq!(cursor.read_be_u32().ok(), Some(2));
374     assert_eq!(cursor.read_be_u32().ok(), Some(2_147_614_697));
375     assert!(cursor.read_be_u32().is_err());
376 }
377 
378 #[test]
verify_read_le_u32_buffered_reader()379 fn verify_read_le_u32_buffered_reader() {
380     let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]));
381     assert_eq!(reader.read_le_u32().ok(), Some(2));
382     assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
383     assert!(reader.read_le_u32().is_err());
384 }
385 
386 #[test]
verify_read_le_u32_cursor()387 fn verify_read_le_u32_cursor() {
388     let mut reader = io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]);
389     assert_eq!(reader.read_le_u32().ok(), Some(2));
390     assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
391     assert!(reader.read_le_u32().is_err());
392 }
393 
394 /// Left shift that does not panic when shifting by the integer width.
395 #[inline(always)]
shift_left(x: u8, shift: u32) -> u8396 fn shift_left(x: u8, shift: u32) -> u8 {
397     debug_assert!(shift <= 8);
398 
399     // We cannot shift a u8 by 8 or more, because Rust panics when shifting by
400     // the integer width. But we can definitely shift a u32.
401     ((x as u32) << shift) as u8
402 }
403 
404 /// Right shift that does not panic when shifting by the integer width.
405 #[inline(always)]
shift_right(x: u8, shift: u32) -> u8406 fn shift_right(x: u8, shift: u32) -> u8 {
407     debug_assert!(shift <= 8);
408 
409     // We cannot shift a u8 by 8 or more, because Rust panics when shifting by
410     // the integer width. But we can definitely shift a u32.
411     ((x as u32) >> shift) as u8
412 }
413 
414 /// Wraps a `Reader` to facilitate reading that is not byte-aligned.
415 pub struct Bitstream<R: ReadBytes> {
416     /// The source where bits are read from.
417     reader: R,
418     /// Data read from the reader, but not yet fully consumed.
419     data: u8,
420     /// The number of bits of `data` that have not been consumed.
421     bits_left: u32,
422 }
423 
424 impl<R: ReadBytes> Bitstream<R> {
425     /// Wraps the reader with a reader that facilitates reading individual bits.
new(reader: R) -> Bitstream<R>426     pub fn new(reader: R) -> Bitstream<R> {
427         Bitstream {
428             reader: reader,
429             data: 0,
430             bits_left: 0,
431         }
432     }
433 
434     /// Generates a bitmask with 1s in the `bits` most significant bits.
435     #[inline(always)]
mask_u8(bits: u32) -> u8436     fn mask_u8(bits: u32) -> u8 {
437         debug_assert!(bits <= 8);
438 
439         shift_left(0xff, 8 - bits)
440     }
441 
442     /// Reads a single bit.
443     ///
444     /// Reading a single bit can be done more efficiently than reading
445     /// more than one bit, because a bit never straddles a byte boundary.
446     #[inline(always)]
read_bit(&mut self) -> io::Result<bool>447     pub fn read_bit(&mut self) -> io::Result<bool> {
448 
449         // If no bits are left, we will need to read the next byte.
450         let result = if self.bits_left == 0 {
451             let fresh_byte = try!(self.reader.read_u8());
452 
453             // What remains later are the 7 least significant bits.
454             self.data = fresh_byte << 1;
455             self.bits_left = 7;
456 
457             // What we report is the most significant bit of the fresh byte.
458             fresh_byte & 0b1000_0000
459         } else {
460             // Consume the most significant bit of the buffer byte.
461             let bit = self.data & 0b1000_0000;
462             self.data = self.data << 1;
463             self.bits_left = self.bits_left - 1;
464             bit
465         };
466 
467         Ok(result != 0)
468     }
469 
470     /// Reads bits until a 1 is read, and returns the number of zeros read.
471     ///
472     /// Because the reader buffers a byte internally, reading unary can be done
473     /// more efficiently than by just reading bit by bit.
474     #[inline(always)]
read_unary(&mut self) -> io::Result<u32>475     pub fn read_unary(&mut self) -> io::Result<u32> {
476         // Start initially with the number of zeros that are in the buffer byte
477         // already (counting from the most significant bit).
478         let mut n = self.data.leading_zeros();
479 
480         // If the number of zeros plus the one following it was not more than
481         // the bytes left, then there is no need to look further.
482         if n < self.bits_left {
483             // Note: this shift never shifts by more than 7 places, because
484             // bits_left is always at most 7 in between read calls, and the
485             // least significant bit of the buffer byte is 0 in that case. So
486             // we count either 8 zeros, or less than 7. In the former case we
487             // would not have taken this branch, in the latter the shift below
488             // is safe.
489             self.data = self.data << (n + 1);
490             self.bits_left = self.bits_left - (n + 1);
491         } else {
492             // We inspected more bits than available, so our count is incorrect,
493             // and we need to look at the next byte.
494             n = self.bits_left;
495 
496             // Continue reading bytes until we encounter a one.
497             loop {
498                 let fresh_byte = try!(self.reader.read_u8());
499                 let zeros = fresh_byte.leading_zeros();
500                 n = n + zeros;
501                 if zeros < 8 {
502                     // We consumed the zeros, plus the one following it.
503                     self.bits_left = 8 - (zeros + 1);
504                     self.data = shift_left(fresh_byte, zeros + 1);
505                     break;
506                 }
507             }
508         }
509 
510         Ok(n)
511     }
512 
513     /// Reads at most eight bits.
514     #[inline(always)]
read_leq_u8(&mut self, bits: u32) -> io::Result<u8>515     pub fn read_leq_u8(&mut self, bits: u32) -> io::Result<u8> {
516         // Of course we can read no more than 8 bits, but we do not want the
517         // performance overhead of the assertion, so only do it in debug mode.
518         debug_assert!(bits <= 8);
519 
520         // If not enough bits left, we will need to read the next byte.
521         let result = if self.bits_left < bits {
522             // Most significant bits are shifted to the right position already.
523             let msb = self.data;
524 
525             // Read a single byte.
526             self.data = try!(self.reader.read_u8());
527 
528             // From the next byte, we take the additional bits that we need.
529             // Those start at the most significant bit, so we need to shift so
530             // that it does not overlap with what we have already.
531             let lsb = (self.data & Bitstream::<R>::mask_u8(bits - self.bits_left))
532                 >> self.bits_left;
533 
534             // Shift out the bits that we have consumed.
535             self.data = shift_left(self.data, bits - self.bits_left);
536             self.bits_left = 8 - (bits - self.bits_left);
537 
538             msb | lsb
539         } else {
540             let result = self.data & Bitstream::<R>::mask_u8(bits);
541 
542             // Shift out the bits that we have consumed.
543             self.data = self.data << bits;
544             self.bits_left = self.bits_left - bits;
545 
546             result
547         };
548 
549         // If there are more than 8 bits left, we read too far.
550         debug_assert!(self.bits_left < 8);
551 
552         // The least significant bits should be zero.
553         debug_assert_eq!(self.data & !Bitstream::<R>::mask_u8(self.bits_left), 0u8);
554 
555         // The resulting data is padded with zeros in the least significant
556         // bits, but we want to pad in the most significant bits, so shift.
557         Ok(shift_right(result, 8 - bits))
558     }
559 
560     /// Read n bits, where 8 < n <= 16.
561     #[inline(always)]
read_gt_u8_leq_u16(&mut self, bits: u32) -> io::Result<u32>562     pub fn read_gt_u8_leq_u16(&mut self, bits: u32) -> io::Result<u32> {
563         debug_assert!((8 < bits) && (bits <= 16));
564 
565         // The most significant bits of the current byte are valid. Shift them
566         // by 2 so they become the most significant bits of the 10 bit number.
567         let mask_msb = 0xffffffff << (bits - self.bits_left);
568         let msb = ((self.data as u32) << (bits - 8)) & mask_msb;
569 
570         // Continue reading the next bits, because no matter how many bits were
571         // still left, there were less than 10.
572         let bits_to_read = bits - self.bits_left;
573         let fresh_byte = try!(self.reader.read_u8()) as u32;
574         let lsb = if bits_to_read >= 8 {
575             fresh_byte << (bits_to_read - 8)
576         } else {
577             fresh_byte >> (8 - bits_to_read)
578         };
579         let combined = msb | lsb;
580 
581         let result = if bits_to_read <= 8 {
582             // We have all bits already, update the internal state. If no
583             // bits are left we might shift by 8 which is invalid, but in that
584             // case the value is not used, so a masked shift is appropriate.
585             self.bits_left = 8 - bits_to_read;
586             self.data = fresh_byte.wrapping_shl(8 - self.bits_left) as u8;
587             combined
588         } else {
589             // We need to read one more byte to get the final bits.
590             let fresher_byte = try!(self.reader.read_u8()) as u32;
591             let lsb = fresher_byte >> (16 - bits_to_read);
592 
593             // Update the reader state. The wrapping shift is appropriate for
594             // the same reason as above.
595             self.bits_left = 16 - bits_to_read;
596             self.data = fresher_byte.wrapping_shl(8 - self.bits_left) as u8;
597 
598             combined | lsb
599         };
600 
601         Ok(result)
602     }
603 
604     /// Reads at most 16 bits.
605     #[inline(always)]
read_leq_u16(&mut self, bits: u32) -> io::Result<u16>606     pub fn read_leq_u16(&mut self, bits: u32) -> io::Result<u16> {
607         // As with read_leq_u8, this only makes sense if we read <= 16 bits.
608         debug_assert!(bits <= 16);
609 
610         // Note: the following is not the most efficient implementation
611         // possible, but it avoids duplicating the complexity of `read_leq_u8`.
612 
613         if bits <= 8 {
614             let result = try!(self.read_leq_u8(bits));
615             Ok(result as u16)
616         } else {
617             // First read the 8 most significant bits, then read what is left.
618             let msb = try!(self.read_leq_u8(8)) as u16;
619             let lsb = try!(self.read_leq_u8(bits - 8)) as u16;
620             Ok((msb << (bits - 8)) | lsb)
621         }
622     }
623 
624     /// Reads at most 32 bits.
625     #[inline(always)]
read_leq_u32(&mut self, bits: u32) -> io::Result<u32>626     pub fn read_leq_u32(&mut self, bits: u32) -> io::Result<u32> {
627         // As with read_leq_u8, this only makes sense if we read <= 32 bits.
628         debug_assert!(bits <= 32);
629 
630         // Note: the following is not the most efficient implementation
631         // possible, but it avoids duplicating the complexity of `read_leq_u8`.
632 
633         if bits <= 16 {
634             let result = try!(self.read_leq_u16(bits));
635             Ok(result as u32)
636         } else {
637             // First read the 16 most significant bits, then read what is left.
638             let msb = try!(self.read_leq_u16(16)) as u32;
639             let lsb = try!(self.read_leq_u16(bits - 16)) as u32;
640             Ok((msb << (bits - 16)) | lsb)
641         }
642     }
643 }
644 
645 #[test]
verify_read_bit()646 fn verify_read_bit() {
647     let data = io::Cursor::new(vec![0b1010_0100, 0b1110_0001]);
648     let mut bits = Bitstream::new(BufferedReader::new(data));
649 
650     assert_eq!(bits.read_bit().unwrap(), true);
651     assert_eq!(bits.read_bit().unwrap(), false);
652     assert_eq!(bits.read_bit().unwrap(), true);
653     // Mix in reading more bits as well, to ensure that they are compatible.
654     assert_eq!(bits.read_leq_u8(1).unwrap(), 0);
655     assert_eq!(bits.read_bit().unwrap(), false);
656     assert_eq!(bits.read_bit().unwrap(), true);
657     assert_eq!(bits.read_bit().unwrap(), false);
658     assert_eq!(bits.read_bit().unwrap(), false);
659 
660     assert_eq!(bits.read_bit().unwrap(), true);
661     assert_eq!(bits.read_bit().unwrap(), true);
662     assert_eq!(bits.read_bit().unwrap(), true);
663     assert_eq!(bits.read_leq_u8(2).unwrap(), 0);
664     assert_eq!(bits.read_bit().unwrap(), false);
665     assert_eq!(bits.read_bit().unwrap(), false);
666     assert_eq!(bits.read_bit().unwrap(), true);
667 
668     assert!(bits.read_bit().is_err());
669 }
670 
671 #[test]
verify_read_unary()672 fn verify_read_unary() {
673     let data = io::Cursor::new(vec![
674         0b1010_0100, 0b1000_0000, 0b0010_0000, 0b0000_0000, 0b0000_1010]);
675     let mut bits = Bitstream::new(BufferedReader::new(data));
676 
677     assert_eq!(bits.read_unary().unwrap(), 0);
678     assert_eq!(bits.read_unary().unwrap(), 1);
679     assert_eq!(bits.read_unary().unwrap(), 2);
680 
681     // The ending one is after the first byte boundary.
682     assert_eq!(bits.read_unary().unwrap(), 2);
683 
684     assert_eq!(bits.read_unary().unwrap(), 9);
685 
686     // This one skips a full byte of zeros.
687     assert_eq!(bits.read_unary().unwrap(), 17);
688 
689     // Verify that the ending position is still correct.
690     assert_eq!(bits.read_leq_u8(3).unwrap(), 0b010);
691     assert!(bits.read_bit().is_err());
692 }
693 
694 #[test]
verify_read_leq_u8()695 fn verify_read_leq_u8() {
696     let data = io::Cursor::new(vec![0b1010_0101,
697                                     0b1110_0001,
698                                     0b1101_0010,
699                                     0b0101_0101,
700                                     0b0111_0011,
701                                     0b0011_1111,
702                                     0b1010_1010,
703                                     0b0000_1100]);
704     let mut bits = Bitstream::new(BufferedReader::new(data));
705 
706     assert_eq!(bits.read_leq_u8(0).unwrap(), 0);
707     assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
708     assert_eq!(bits.read_leq_u8(1).unwrap(), 0);
709     assert_eq!(bits.read_leq_u8(2).unwrap(), 0b10);
710     assert_eq!(bits.read_leq_u8(2).unwrap(), 0b01);
711     assert_eq!(bits.read_leq_u8(3).unwrap(), 0b011);
712     assert_eq!(bits.read_leq_u8(3).unwrap(), 0b110);
713     assert_eq!(bits.read_leq_u8(4).unwrap(), 0b0001);
714     assert_eq!(bits.read_leq_u8(5).unwrap(), 0b11010);
715     assert_eq!(bits.read_leq_u8(6).unwrap(), 0b010010);
716     assert_eq!(bits.read_leq_u8(7).unwrap(), 0b1010101);
717     assert_eq!(bits.read_leq_u8(8).unwrap(), 0b11001100);
718     assert_eq!(bits.read_leq_u8(6).unwrap(), 0b111111);
719     assert_eq!(bits.read_leq_u8(8).unwrap(), 0b10101010);
720     assert_eq!(bits.read_leq_u8(4).unwrap(), 0b0000);
721     assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
722     assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
723     assert_eq!(bits.read_leq_u8(2).unwrap(), 0b00);
724 }
725 
726 #[test]
verify_read_gt_u8_get_u16()727 fn verify_read_gt_u8_get_u16() {
728     let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101, 0b1111_0000]);
729     let mut bits = Bitstream::new(BufferedReader::new(data));
730 
731     assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b1010_0101_11);
732     assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b10_0001_1101);
733     assert_eq!(bits.read_leq_u8(3).unwrap(), 0b001);
734     assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b0_0101_0101_1);
735     assert_eq!(bits.read_leq_u8(7).unwrap(), 0b111_0000);
736     assert!(bits.read_gt_u8_leq_u16(10).is_err());
737 }
738 
739 #[test]
verify_read_leq_u16()740 fn verify_read_leq_u16() {
741     let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101]);
742     let mut bits = Bitstream::new(BufferedReader::new(data));
743 
744     assert_eq!(bits.read_leq_u16(0).unwrap(), 0);
745     assert_eq!(bits.read_leq_u16(1).unwrap(), 1);
746     assert_eq!(bits.read_leq_u16(13).unwrap(), 0b010_0101_1110_00);
747     assert_eq!(bits.read_leq_u16(9).unwrap(), 0b01_1101_001);
748 }
749 
750 #[test]
verify_read_leq_u32()751 fn verify_read_leq_u32() {
752     let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101]);
753     let mut bits = Bitstream::new(BufferedReader::new(data));
754 
755     assert_eq!(bits.read_leq_u32(1).unwrap(), 1);
756     assert_eq!(bits.read_leq_u32(17).unwrap(), 0b010_0101_1110_0001_11);
757     assert_eq!(bits.read_leq_u32(14).unwrap(), 0b01_0010_0101_0101);
758 }
759 
760 #[test]
verify_read_mixed()761 fn verify_read_mixed() {
762     // These test data are warm-up samples from an actual stream.
763     let data = io::Cursor::new(vec![0x03, 0xc7, 0xbf, 0xe5, 0x9b, 0x74, 0x1e, 0x3a, 0xdd, 0x7d,
764                                     0xc5, 0x5e, 0xf6, 0xbf, 0x78, 0x1b, 0xbd]);
765     let mut bits = Bitstream::new(BufferedReader::new(data));
766 
767     assert_eq!(bits.read_leq_u8(6).unwrap(), 0);
768     assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
769     let minus = 1u32 << 16;
770     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-14401_i16 as u16 as u32));
771     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-13514_i16 as u16 as u32));
772     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-12168_i16 as u16 as u32));
773     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-10517_i16 as u16 as u32));
774     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-09131_i16 as u16 as u32));
775     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-08489_i16 as u16 as u32));
776     assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-08698_i16 as u16 as u32));
777 }
778