1 // Claxon -- A FLAC decoding library in Rust
2 // Copyright 2014 Ruud van Asseldonk
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // A copy of the License has been included in the root of the repository.
7
8 //! Exposes traits that help reading data at the bit level with low overhead.
9 //!
10 //! The traits in this module deal with reading bytes (with a given endianness)
11 //! from a buffered reader, in such a way that still allows efficient
12 //! checksumming of the data read. There is also a bitstream which is used
13 //! internally to read the bitstream.
14
15 use std::cmp;
16 use std::io;
17
18 /// Similar to `std::io::BufRead`, but more performant.
19 ///
20 /// There is no simple way to wrap a standard `BufRead` such that it can compute
21 /// checksums on consume. This is really something that needs a less restrictive
22 /// interface. Apart from enabling checksum computations, this buffered reader
23 /// has some convenience functions.
24 pub struct BufferedReader<R: io::Read> {
25 /// The wrapped reader.
26 inner: R,
27
28 /// The buffer that holds data read from the inner reader.
29 buf: Box<[u8]>,
30
31 /// The index of the first byte in the buffer which has not been consumed.
32 pos: u32,
33
34 /// The number of bytes of the buffer which have meaningful content.
35 num_valid: u32,
36 }
37
38 impl<R: io::Read> BufferedReader<R> {
39
40 /// Wrap the reader in a new buffered reader.
new(inner: R) -> BufferedReader<R>41 pub fn new(inner: R) -> BufferedReader<R> {
42 // Use a large-ish buffer size, such that system call overhead is
43 // negligible when replenishing the buffer. However, when fuzzing we
44 // want to have small samples, and still trigger the case where we have
45 // to replenish the buffer, so use a smaller buffer size there.
46 #[cfg(not(fuzzing))]
47 const CAPACITY: usize = 2048;
48
49 #[cfg(fuzzing)]
50 const CAPACITY: usize = 31;
51
52 let buf = vec![0; CAPACITY].into_boxed_slice();
53 BufferedReader {
54 inner: inner,
55 buf: buf,
56 pos: 0,
57 num_valid: 0,
58 }
59 }
60
61 /// Destroys the buffered reader, returning the wrapped reader.
62 ///
63 /// Anything in the buffer will be lost.
into_inner(self) -> R64 pub fn into_inner(self) -> R {
65 self.inner
66 }
67 }
68
69
70 /// Provides convenience methods to make input less cumbersome.
71 pub trait ReadBytes {
72 /// Reads a single byte, failing on EOF.
read_u8(&mut self) -> io::Result<u8>73 fn read_u8(&mut self) -> io::Result<u8>;
74
75 /// Reads a single byte, not failing on EOF.
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>76 fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>>;
77
78 /// Reads until the provided buffer is full.
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>79 fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>;
80
81 /// Skips over the specified number of bytes.
82 ///
83 /// For a buffered reader, this can help a lot by just bumping a pointer.
skip(&mut self, amount: u32) -> io::Result<()>84 fn skip(&mut self, amount: u32) -> io::Result<()>;
85
86 /// Reads two bytes and interprets them as a big-endian 16-bit unsigned integer.
read_be_u16(&mut self) -> io::Result<u16>87 fn read_be_u16(&mut self) -> io::Result<u16> {
88 let b0 = try!(self.read_u8()) as u16;
89 let b1 = try!(self.read_u8()) as u16;
90 Ok(b0 << 8 | b1)
91 }
92
93 /// Reads two bytes and interprets them as a big-endian 16-bit unsigned integer.
read_be_u16_or_eof(&mut self) -> io::Result<Option<u16>>94 fn read_be_u16_or_eof(&mut self) -> io::Result<Option<u16>> {
95 if let Some(b0) = try!(self.read_u8_or_eof()) {
96 if let Some(b1) = try!(self.read_u8_or_eof()) {
97 return Ok(Some((b0 as u16) << 8 | (b1 as u16)));
98 }
99 }
100 Ok(None)
101 }
102
103 /// Reads three bytes and interprets them as a big-endian 24-bit unsigned integer.
read_be_u24(&mut self) -> io::Result<u32>104 fn read_be_u24(&mut self) -> io::Result<u32> {
105 let b0 = try!(self.read_u8()) as u32;
106 let b1 = try!(self.read_u8()) as u32;
107 let b2 = try!(self.read_u8()) as u32;
108 Ok(b0 << 16 | b1 << 8 | b2)
109 }
110
111 /// Reads four bytes and interprets them as a big-endian 32-bit unsigned integer.
read_be_u32(&mut self) -> io::Result<u32>112 fn read_be_u32(&mut self) -> io::Result<u32> {
113 let b0 = try!(self.read_u8()) as u32;
114 let b1 = try!(self.read_u8()) as u32;
115 let b2 = try!(self.read_u8()) as u32;
116 let b3 = try!(self.read_u8()) as u32;
117 Ok(b0 << 24 | b1 << 16 | b2 << 8 | b3)
118 }
119
120 /// Reads four bytes and interprets them as a little-endian 32-bit unsigned integer.
read_le_u32(&mut self) -> io::Result<u32>121 fn read_le_u32(&mut self) -> io::Result<u32> {
122 let b0 = try!(self.read_u8()) as u32;
123 let b1 = try!(self.read_u8()) as u32;
124 let b2 = try!(self.read_u8()) as u32;
125 let b3 = try!(self.read_u8()) as u32;
126 Ok(b3 << 24 | b2 << 16 | b1 << 8 | b0)
127 }
128 }
129
130 impl<R: io::Read> ReadBytes for BufferedReader<R>
131 {
132 #[inline(always)]
read_u8(&mut self) -> io::Result<u8>133 fn read_u8(&mut self) -> io::Result<u8> {
134 if self.pos == self.num_valid {
135 // The buffer was depleted, replenish it first.
136 self.pos = 0;
137 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
138
139 if self.num_valid == 0 {
140 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
141 "Expected one more byte."))
142 }
143 }
144
145 // At this point there is at least one more byte in the buffer, we
146 // checked that above. However, when using regular indexing, the
147 // compiler still inserts a bounds check here. It is safe to avoid it.
148 let byte = unsafe { *self.buf.get_unchecked(self.pos as usize) };
149 self.pos += 1;
150 Ok(byte)
151 }
152
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>153 fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
154 if self.pos == self.num_valid {
155 // The buffer was depleted, try to replenish it first.
156 self.pos = 0;
157 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
158
159 if self.num_valid == 0 {
160 return Ok(None);
161 }
162 }
163
164 Ok(Some(try!(self.read_u8())))
165 }
166
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>167 fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
168 let mut bytes_left = buffer.len();
169
170 while bytes_left > 0 {
171 let from = buffer.len() - bytes_left;
172 let count = cmp::min(bytes_left, (self.num_valid - self.pos) as usize);
173 buffer[from..from + count].copy_from_slice(
174 &self.buf[self.pos as usize..self.pos as usize + count]);
175 bytes_left -= count;
176 self.pos += count as u32;
177
178 if bytes_left > 0 {
179 // Replenish the buffer if there is more to be read.
180 self.pos = 0;
181 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
182 if self.num_valid == 0 {
183 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
184 "Expected more bytes."))
185 }
186 }
187 }
188
189 Ok(())
190 }
191
skip(&mut self, mut amount: u32) -> io::Result<()>192 fn skip(&mut self, mut amount: u32) -> io::Result<()> {
193 while amount > 0 {
194 let num_left = self.num_valid - self.pos;
195 let read_now = cmp::min(amount, num_left);
196 self.pos += read_now;
197 amount -= read_now;
198
199 if amount > 0 {
200 // If there is more to skip, refill the buffer first.
201 self.pos = 0;
202 self.num_valid = try!(self.inner.read(&mut self.buf)) as u32;
203
204 if self.num_valid == 0 {
205 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
206 "Expected more bytes."))
207 }
208 }
209 }
210 Ok(())
211 }
212 }
213
214 impl<'r, R: ReadBytes> ReadBytes for &'r mut R {
215
216 #[inline(always)]
read_u8(&mut self) -> io::Result<u8>217 fn read_u8(&mut self) -> io::Result<u8> {
218 (*self).read_u8()
219 }
220
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>221 fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
222 (*self).read_u8_or_eof()
223 }
224
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>225 fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
226 (*self).read_into(buffer)
227 }
228
skip(&mut self, amount: u32) -> io::Result<()>229 fn skip(&mut self, amount: u32) -> io::Result<()> {
230 (*self).skip(amount)
231 }
232 }
233
234 impl<T: AsRef<[u8]>> ReadBytes for io::Cursor<T> {
235
read_u8(&mut self) -> io::Result<u8>236 fn read_u8(&mut self) -> io::Result<u8> {
237 let pos = self.position();
238 if pos < self.get_ref().as_ref().len() as u64 {
239 self.set_position(pos + 1);
240 Ok(self.get_ref().as_ref()[pos as usize])
241 } else {
242 Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
243 }
244 }
245
read_u8_or_eof(&mut self) -> io::Result<Option<u8>>246 fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>> {
247 let pos = self.position();
248 if pos < self.get_ref().as_ref().len() as u64 {
249 self.set_position(pos + 1);
250 Ok(Some(self.get_ref().as_ref()[pos as usize]))
251 } else {
252 Ok(None)
253 }
254 }
255
read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>256 fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
257 let pos = self.position();
258 if pos + buffer.len() as u64 <= self.get_ref().as_ref().len() as u64 {
259 let start = pos as usize;
260 let end = pos as usize + buffer.len();
261 buffer.copy_from_slice(&self.get_ref().as_ref()[start..end]);
262 self.set_position(pos + buffer.len() as u64);
263 Ok(())
264 } else {
265 Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
266 }
267 }
268
skip(&mut self, amount: u32) -> io::Result<()>269 fn skip(&mut self, amount: u32) -> io::Result<()> {
270 let pos = self.position();
271 if pos + amount as u64 <= self.get_ref().as_ref().len() as u64 {
272 self.set_position(pos + amount as u64);
273 Ok(())
274 } else {
275 Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
276 }
277 }
278 }
279
280 #[test]
verify_read_into_buffered_reader()281 fn verify_read_into_buffered_reader() {
282 let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]));
283 let mut buf1 = [0u8; 3];
284 let mut buf2 = [0u8; 5];
285 let mut buf3 = [0u8; 2];
286 reader.read_into(&mut buf1).ok().unwrap();
287 reader.read_into(&mut buf2).ok().unwrap();
288 assert!(reader.read_into(&mut buf3).is_err());
289 assert_eq!(&buf1[..], &[2u8, 3, 5]);
290 assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
291 }
292
293 #[test]
verify_read_into_cursor()294 fn verify_read_into_cursor() {
295 let mut cursor = io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]);
296 let mut buf1 = [0u8; 3];
297 let mut buf2 = [0u8; 5];
298 let mut buf3 = [0u8; 2];
299 cursor.read_into(&mut buf1).ok().unwrap();
300 cursor.read_into(&mut buf2).ok().unwrap();
301 assert!(cursor.read_into(&mut buf3).is_err());
302 assert_eq!(&buf1[..], &[2u8, 3, 5]);
303 assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
304 }
305
306 #[test]
verify_read_u8_buffered_reader()307 fn verify_read_u8_buffered_reader() {
308 let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 2, 129, 89, 122]));
309 assert_eq!(reader.read_u8().unwrap(), 0);
310 assert_eq!(reader.read_u8().unwrap(), 2);
311 assert_eq!(reader.read_u8().unwrap(), 129);
312 assert_eq!(reader.read_u8().unwrap(), 89);
313 assert_eq!(reader.read_u8_or_eof().unwrap(), Some(122));
314 assert_eq!(reader.read_u8_or_eof().unwrap(), None);
315 assert!(reader.read_u8().is_err());
316 }
317
318 #[test]
verify_read_u8_cursor()319 fn verify_read_u8_cursor() {
320 let mut reader = io::Cursor::new(vec![0u8, 2, 129, 89, 122]);
321 assert_eq!(reader.read_u8().unwrap(), 0);
322 assert_eq!(reader.read_u8().unwrap(), 2);
323 assert_eq!(reader.read_u8().unwrap(), 129);
324 assert_eq!(reader.read_u8().unwrap(), 89);
325 assert_eq!(reader.read_u8_or_eof().unwrap(), Some(122));
326 assert_eq!(reader.read_u8_or_eof().unwrap(), None);
327 assert!(reader.read_u8().is_err());
328 }
329
330 #[test]
verify_read_be_u16_buffered_reader()331 fn verify_read_be_u16_buffered_reader() {
332 let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 2, 129, 89, 122]));
333 assert_eq!(reader.read_be_u16().ok(), Some(2));
334 assert_eq!(reader.read_be_u16().ok(), Some(33113));
335 assert!(reader.read_be_u16().is_err());
336 }
337
338 #[test]
verify_read_be_u16_cursor()339 fn verify_read_be_u16_cursor() {
340 let mut cursor = io::Cursor::new(vec![0u8, 2, 129, 89, 122]);
341 assert_eq!(cursor.read_be_u16().ok(), Some(2));
342 assert_eq!(cursor.read_be_u16().ok(), Some(33113));
343 assert!(cursor.read_be_u16().is_err());
344 }
345
346 #[test]
verify_read_be_u24_buffered_reader()347 fn verify_read_be_u24_buffered_reader() {
348 let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 0, 2, 0x8f, 0xff, 0xf3, 122]));
349 assert_eq!(reader.read_be_u24().ok(), Some(2));
350 assert_eq!(reader.read_be_u24().ok(), Some(9_437_171));
351 assert!(reader.read_be_u24().is_err());
352 }
353
354 #[test]
verify_read_be_u24_cursor()355 fn verify_read_be_u24_cursor() {
356 let mut cursor = io::Cursor::new(vec![0u8, 0, 2, 0x8f, 0xff, 0xf3, 122]);
357 assert_eq!(cursor.read_be_u24().ok(), Some(2));
358 assert_eq!(cursor.read_be_u24().ok(), Some(9_437_171));
359 assert!(cursor.read_be_u24().is_err());
360 }
361
362 #[test]
verify_read_be_u32_buffered_reader()363 fn verify_read_be_u32_buffered_reader() {
364 let mut reader = BufferedReader::new(io::Cursor::new(vec![0u8, 0, 0, 2, 0x80, 0x01, 0xff, 0xe9, 0]));
365 assert_eq!(reader.read_be_u32().ok(), Some(2));
366 assert_eq!(reader.read_be_u32().ok(), Some(2_147_614_697));
367 assert!(reader.read_be_u32().is_err());
368 }
369
370 #[test]
verify_read_be_u32_cursor()371 fn verify_read_be_u32_cursor() {
372 let mut cursor = io::Cursor::new(vec![0u8, 0, 0, 2, 0x80, 0x01, 0xff, 0xe9, 0]);
373 assert_eq!(cursor.read_be_u32().ok(), Some(2));
374 assert_eq!(cursor.read_be_u32().ok(), Some(2_147_614_697));
375 assert!(cursor.read_be_u32().is_err());
376 }
377
378 #[test]
verify_read_le_u32_buffered_reader()379 fn verify_read_le_u32_buffered_reader() {
380 let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]));
381 assert_eq!(reader.read_le_u32().ok(), Some(2));
382 assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
383 assert!(reader.read_le_u32().is_err());
384 }
385
386 #[test]
verify_read_le_u32_cursor()387 fn verify_read_le_u32_cursor() {
388 let mut reader = io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]);
389 assert_eq!(reader.read_le_u32().ok(), Some(2));
390 assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
391 assert!(reader.read_le_u32().is_err());
392 }
393
394 /// Left shift that does not panic when shifting by the integer width.
395 #[inline(always)]
shift_left(x: u8, shift: u32) -> u8396 fn shift_left(x: u8, shift: u32) -> u8 {
397 debug_assert!(shift <= 8);
398
399 // We cannot shift a u8 by 8 or more, because Rust panics when shifting by
400 // the integer width. But we can definitely shift a u32.
401 ((x as u32) << shift) as u8
402 }
403
404 /// Right shift that does not panic when shifting by the integer width.
405 #[inline(always)]
shift_right(x: u8, shift: u32) -> u8406 fn shift_right(x: u8, shift: u32) -> u8 {
407 debug_assert!(shift <= 8);
408
409 // We cannot shift a u8 by 8 or more, because Rust panics when shifting by
410 // the integer width. But we can definitely shift a u32.
411 ((x as u32) >> shift) as u8
412 }
413
414 /// Wraps a `Reader` to facilitate reading that is not byte-aligned.
415 pub struct Bitstream<R: ReadBytes> {
416 /// The source where bits are read from.
417 reader: R,
418 /// Data read from the reader, but not yet fully consumed.
419 data: u8,
420 /// The number of bits of `data` that have not been consumed.
421 bits_left: u32,
422 }
423
424 impl<R: ReadBytes> Bitstream<R> {
425 /// Wraps the reader with a reader that facilitates reading individual bits.
new(reader: R) -> Bitstream<R>426 pub fn new(reader: R) -> Bitstream<R> {
427 Bitstream {
428 reader: reader,
429 data: 0,
430 bits_left: 0,
431 }
432 }
433
434 /// Generates a bitmask with 1s in the `bits` most significant bits.
435 #[inline(always)]
mask_u8(bits: u32) -> u8436 fn mask_u8(bits: u32) -> u8 {
437 debug_assert!(bits <= 8);
438
439 shift_left(0xff, 8 - bits)
440 }
441
442 /// Reads a single bit.
443 ///
444 /// Reading a single bit can be done more efficiently than reading
445 /// more than one bit, because a bit never straddles a byte boundary.
446 #[inline(always)]
read_bit(&mut self) -> io::Result<bool>447 pub fn read_bit(&mut self) -> io::Result<bool> {
448
449 // If no bits are left, we will need to read the next byte.
450 let result = if self.bits_left == 0 {
451 let fresh_byte = try!(self.reader.read_u8());
452
453 // What remains later are the 7 least significant bits.
454 self.data = fresh_byte << 1;
455 self.bits_left = 7;
456
457 // What we report is the most significant bit of the fresh byte.
458 fresh_byte & 0b1000_0000
459 } else {
460 // Consume the most significant bit of the buffer byte.
461 let bit = self.data & 0b1000_0000;
462 self.data = self.data << 1;
463 self.bits_left = self.bits_left - 1;
464 bit
465 };
466
467 Ok(result != 0)
468 }
469
470 /// Reads bits until a 1 is read, and returns the number of zeros read.
471 ///
472 /// Because the reader buffers a byte internally, reading unary can be done
473 /// more efficiently than by just reading bit by bit.
474 #[inline(always)]
read_unary(&mut self) -> io::Result<u32>475 pub fn read_unary(&mut self) -> io::Result<u32> {
476 // Start initially with the number of zeros that are in the buffer byte
477 // already (counting from the most significant bit).
478 let mut n = self.data.leading_zeros();
479
480 // If the number of zeros plus the one following it was not more than
481 // the bytes left, then there is no need to look further.
482 if n < self.bits_left {
483 // Note: this shift never shifts by more than 7 places, because
484 // bits_left is always at most 7 in between read calls, and the
485 // least significant bit of the buffer byte is 0 in that case. So
486 // we count either 8 zeros, or less than 7. In the former case we
487 // would not have taken this branch, in the latter the shift below
488 // is safe.
489 self.data = self.data << (n + 1);
490 self.bits_left = self.bits_left - (n + 1);
491 } else {
492 // We inspected more bits than available, so our count is incorrect,
493 // and we need to look at the next byte.
494 n = self.bits_left;
495
496 // Continue reading bytes until we encounter a one.
497 loop {
498 let fresh_byte = try!(self.reader.read_u8());
499 let zeros = fresh_byte.leading_zeros();
500 n = n + zeros;
501 if zeros < 8 {
502 // We consumed the zeros, plus the one following it.
503 self.bits_left = 8 - (zeros + 1);
504 self.data = shift_left(fresh_byte, zeros + 1);
505 break;
506 }
507 }
508 }
509
510 Ok(n)
511 }
512
513 /// Reads at most eight bits.
514 #[inline(always)]
read_leq_u8(&mut self, bits: u32) -> io::Result<u8>515 pub fn read_leq_u8(&mut self, bits: u32) -> io::Result<u8> {
516 // Of course we can read no more than 8 bits, but we do not want the
517 // performance overhead of the assertion, so only do it in debug mode.
518 debug_assert!(bits <= 8);
519
520 // If not enough bits left, we will need to read the next byte.
521 let result = if self.bits_left < bits {
522 // Most significant bits are shifted to the right position already.
523 let msb = self.data;
524
525 // Read a single byte.
526 self.data = try!(self.reader.read_u8());
527
528 // From the next byte, we take the additional bits that we need.
529 // Those start at the most significant bit, so we need to shift so
530 // that it does not overlap with what we have already.
531 let lsb = (self.data & Bitstream::<R>::mask_u8(bits - self.bits_left))
532 >> self.bits_left;
533
534 // Shift out the bits that we have consumed.
535 self.data = shift_left(self.data, bits - self.bits_left);
536 self.bits_left = 8 - (bits - self.bits_left);
537
538 msb | lsb
539 } else {
540 let result = self.data & Bitstream::<R>::mask_u8(bits);
541
542 // Shift out the bits that we have consumed.
543 self.data = self.data << bits;
544 self.bits_left = self.bits_left - bits;
545
546 result
547 };
548
549 // If there are more than 8 bits left, we read too far.
550 debug_assert!(self.bits_left < 8);
551
552 // The least significant bits should be zero.
553 debug_assert_eq!(self.data & !Bitstream::<R>::mask_u8(self.bits_left), 0u8);
554
555 // The resulting data is padded with zeros in the least significant
556 // bits, but we want to pad in the most significant bits, so shift.
557 Ok(shift_right(result, 8 - bits))
558 }
559
560 /// Read n bits, where 8 < n <= 16.
561 #[inline(always)]
read_gt_u8_leq_u16(&mut self, bits: u32) -> io::Result<u32>562 pub fn read_gt_u8_leq_u16(&mut self, bits: u32) -> io::Result<u32> {
563 debug_assert!((8 < bits) && (bits <= 16));
564
565 // The most significant bits of the current byte are valid. Shift them
566 // by 2 so they become the most significant bits of the 10 bit number.
567 let mask_msb = 0xffffffff << (bits - self.bits_left);
568 let msb = ((self.data as u32) << (bits - 8)) & mask_msb;
569
570 // Continue reading the next bits, because no matter how many bits were
571 // still left, there were less than 10.
572 let bits_to_read = bits - self.bits_left;
573 let fresh_byte = try!(self.reader.read_u8()) as u32;
574 let lsb = if bits_to_read >= 8 {
575 fresh_byte << (bits_to_read - 8)
576 } else {
577 fresh_byte >> (8 - bits_to_read)
578 };
579 let combined = msb | lsb;
580
581 let result = if bits_to_read <= 8 {
582 // We have all bits already, update the internal state. If no
583 // bits are left we might shift by 8 which is invalid, but in that
584 // case the value is not used, so a masked shift is appropriate.
585 self.bits_left = 8 - bits_to_read;
586 self.data = fresh_byte.wrapping_shl(8 - self.bits_left) as u8;
587 combined
588 } else {
589 // We need to read one more byte to get the final bits.
590 let fresher_byte = try!(self.reader.read_u8()) as u32;
591 let lsb = fresher_byte >> (16 - bits_to_read);
592
593 // Update the reader state. The wrapping shift is appropriate for
594 // the same reason as above.
595 self.bits_left = 16 - bits_to_read;
596 self.data = fresher_byte.wrapping_shl(8 - self.bits_left) as u8;
597
598 combined | lsb
599 };
600
601 Ok(result)
602 }
603
604 /// Reads at most 16 bits.
605 #[inline(always)]
read_leq_u16(&mut self, bits: u32) -> io::Result<u16>606 pub fn read_leq_u16(&mut self, bits: u32) -> io::Result<u16> {
607 // As with read_leq_u8, this only makes sense if we read <= 16 bits.
608 debug_assert!(bits <= 16);
609
610 // Note: the following is not the most efficient implementation
611 // possible, but it avoids duplicating the complexity of `read_leq_u8`.
612
613 if bits <= 8 {
614 let result = try!(self.read_leq_u8(bits));
615 Ok(result as u16)
616 } else {
617 // First read the 8 most significant bits, then read what is left.
618 let msb = try!(self.read_leq_u8(8)) as u16;
619 let lsb = try!(self.read_leq_u8(bits - 8)) as u16;
620 Ok((msb << (bits - 8)) | lsb)
621 }
622 }
623
624 /// Reads at most 32 bits.
625 #[inline(always)]
read_leq_u32(&mut self, bits: u32) -> io::Result<u32>626 pub fn read_leq_u32(&mut self, bits: u32) -> io::Result<u32> {
627 // As with read_leq_u8, this only makes sense if we read <= 32 bits.
628 debug_assert!(bits <= 32);
629
630 // Note: the following is not the most efficient implementation
631 // possible, but it avoids duplicating the complexity of `read_leq_u8`.
632
633 if bits <= 16 {
634 let result = try!(self.read_leq_u16(bits));
635 Ok(result as u32)
636 } else {
637 // First read the 16 most significant bits, then read what is left.
638 let msb = try!(self.read_leq_u16(16)) as u32;
639 let lsb = try!(self.read_leq_u16(bits - 16)) as u32;
640 Ok((msb << (bits - 16)) | lsb)
641 }
642 }
643 }
644
645 #[test]
verify_read_bit()646 fn verify_read_bit() {
647 let data = io::Cursor::new(vec![0b1010_0100, 0b1110_0001]);
648 let mut bits = Bitstream::new(BufferedReader::new(data));
649
650 assert_eq!(bits.read_bit().unwrap(), true);
651 assert_eq!(bits.read_bit().unwrap(), false);
652 assert_eq!(bits.read_bit().unwrap(), true);
653 // Mix in reading more bits as well, to ensure that they are compatible.
654 assert_eq!(bits.read_leq_u8(1).unwrap(), 0);
655 assert_eq!(bits.read_bit().unwrap(), false);
656 assert_eq!(bits.read_bit().unwrap(), true);
657 assert_eq!(bits.read_bit().unwrap(), false);
658 assert_eq!(bits.read_bit().unwrap(), false);
659
660 assert_eq!(bits.read_bit().unwrap(), true);
661 assert_eq!(bits.read_bit().unwrap(), true);
662 assert_eq!(bits.read_bit().unwrap(), true);
663 assert_eq!(bits.read_leq_u8(2).unwrap(), 0);
664 assert_eq!(bits.read_bit().unwrap(), false);
665 assert_eq!(bits.read_bit().unwrap(), false);
666 assert_eq!(bits.read_bit().unwrap(), true);
667
668 assert!(bits.read_bit().is_err());
669 }
670
671 #[test]
verify_read_unary()672 fn verify_read_unary() {
673 let data = io::Cursor::new(vec![
674 0b1010_0100, 0b1000_0000, 0b0010_0000, 0b0000_0000, 0b0000_1010]);
675 let mut bits = Bitstream::new(BufferedReader::new(data));
676
677 assert_eq!(bits.read_unary().unwrap(), 0);
678 assert_eq!(bits.read_unary().unwrap(), 1);
679 assert_eq!(bits.read_unary().unwrap(), 2);
680
681 // The ending one is after the first byte boundary.
682 assert_eq!(bits.read_unary().unwrap(), 2);
683
684 assert_eq!(bits.read_unary().unwrap(), 9);
685
686 // This one skips a full byte of zeros.
687 assert_eq!(bits.read_unary().unwrap(), 17);
688
689 // Verify that the ending position is still correct.
690 assert_eq!(bits.read_leq_u8(3).unwrap(), 0b010);
691 assert!(bits.read_bit().is_err());
692 }
693
694 #[test]
verify_read_leq_u8()695 fn verify_read_leq_u8() {
696 let data = io::Cursor::new(vec![0b1010_0101,
697 0b1110_0001,
698 0b1101_0010,
699 0b0101_0101,
700 0b0111_0011,
701 0b0011_1111,
702 0b1010_1010,
703 0b0000_1100]);
704 let mut bits = Bitstream::new(BufferedReader::new(data));
705
706 assert_eq!(bits.read_leq_u8(0).unwrap(), 0);
707 assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
708 assert_eq!(bits.read_leq_u8(1).unwrap(), 0);
709 assert_eq!(bits.read_leq_u8(2).unwrap(), 0b10);
710 assert_eq!(bits.read_leq_u8(2).unwrap(), 0b01);
711 assert_eq!(bits.read_leq_u8(3).unwrap(), 0b011);
712 assert_eq!(bits.read_leq_u8(3).unwrap(), 0b110);
713 assert_eq!(bits.read_leq_u8(4).unwrap(), 0b0001);
714 assert_eq!(bits.read_leq_u8(5).unwrap(), 0b11010);
715 assert_eq!(bits.read_leq_u8(6).unwrap(), 0b010010);
716 assert_eq!(bits.read_leq_u8(7).unwrap(), 0b1010101);
717 assert_eq!(bits.read_leq_u8(8).unwrap(), 0b11001100);
718 assert_eq!(bits.read_leq_u8(6).unwrap(), 0b111111);
719 assert_eq!(bits.read_leq_u8(8).unwrap(), 0b10101010);
720 assert_eq!(bits.read_leq_u8(4).unwrap(), 0b0000);
721 assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
722 assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
723 assert_eq!(bits.read_leq_u8(2).unwrap(), 0b00);
724 }
725
726 #[test]
verify_read_gt_u8_get_u16()727 fn verify_read_gt_u8_get_u16() {
728 let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101, 0b1111_0000]);
729 let mut bits = Bitstream::new(BufferedReader::new(data));
730
731 assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b1010_0101_11);
732 assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b10_0001_1101);
733 assert_eq!(bits.read_leq_u8(3).unwrap(), 0b001);
734 assert_eq!(bits.read_gt_u8_leq_u16(10).unwrap(), 0b0_0101_0101_1);
735 assert_eq!(bits.read_leq_u8(7).unwrap(), 0b111_0000);
736 assert!(bits.read_gt_u8_leq_u16(10).is_err());
737 }
738
739 #[test]
verify_read_leq_u16()740 fn verify_read_leq_u16() {
741 let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101]);
742 let mut bits = Bitstream::new(BufferedReader::new(data));
743
744 assert_eq!(bits.read_leq_u16(0).unwrap(), 0);
745 assert_eq!(bits.read_leq_u16(1).unwrap(), 1);
746 assert_eq!(bits.read_leq_u16(13).unwrap(), 0b010_0101_1110_00);
747 assert_eq!(bits.read_leq_u16(9).unwrap(), 0b01_1101_001);
748 }
749
750 #[test]
verify_read_leq_u32()751 fn verify_read_leq_u32() {
752 let data = io::Cursor::new(vec![0b1010_0101, 0b1110_0001, 0b1101_0010, 0b0101_0101]);
753 let mut bits = Bitstream::new(BufferedReader::new(data));
754
755 assert_eq!(bits.read_leq_u32(1).unwrap(), 1);
756 assert_eq!(bits.read_leq_u32(17).unwrap(), 0b010_0101_1110_0001_11);
757 assert_eq!(bits.read_leq_u32(14).unwrap(), 0b01_0010_0101_0101);
758 }
759
760 #[test]
verify_read_mixed()761 fn verify_read_mixed() {
762 // These test data are warm-up samples from an actual stream.
763 let data = io::Cursor::new(vec![0x03, 0xc7, 0xbf, 0xe5, 0x9b, 0x74, 0x1e, 0x3a, 0xdd, 0x7d,
764 0xc5, 0x5e, 0xf6, 0xbf, 0x78, 0x1b, 0xbd]);
765 let mut bits = Bitstream::new(BufferedReader::new(data));
766
767 assert_eq!(bits.read_leq_u8(6).unwrap(), 0);
768 assert_eq!(bits.read_leq_u8(1).unwrap(), 1);
769 let minus = 1u32 << 16;
770 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-14401_i16 as u16 as u32));
771 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-13514_i16 as u16 as u32));
772 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-12168_i16 as u16 as u32));
773 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-10517_i16 as u16 as u32));
774 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-09131_i16 as u16 as u32));
775 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-08489_i16 as u16 as u32));
776 assert_eq!(bits.read_leq_u32(17).unwrap(), minus | (-08698_i16 as u16 as u32));
777 }
778