1 // Copyright 2015 Ilkka Rauta
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! BitReader is a helper type to extract strings of bits from a slice of bytes.
10 //!
11 //! Here is how you read first a single bit, then three bits and finally four bits from a byte
12 //! buffer:
13 //!
14 //! ```
15 //! use bitreader::BitReader;
16 //!
17 //! let slice_of_u8 = &[0b1000_1111];
18 //! let mut reader = BitReader::new(slice_of_u8);
19 //!
20 //! // You probably should use try! or some other error handling mechanism in real code if the
21 //! // length of the input is not known in advance.
22 //! let a_single_bit = reader.read_u8(1).unwrap();
23 //! assert_eq!(a_single_bit, 1);
24 //!
25 //! let more_bits = reader.read_u8(3).unwrap();
26 //! assert_eq!(more_bits, 0);
27 //!
28 //! let last_bits_of_byte = reader.read_u8(4).unwrap();
29 //! assert_eq!(last_bits_of_byte, 0b1111);
30 //! ```
31 //! You can naturally read bits from longer buffer of data than just a single byte.
32 //!
33 //! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big
34 //! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum
35 //! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the
36 //! moment.
37 //!
38 //! The reads do not need to be aligned in any particular way.
39 //!
40 //! Reading zero bits is a no-op.
41 //!
42 //! You can also skip over a number of bits, in which case there is no arbitrary small limits like
43 //! when reading the values to a variable. However, you can not seek past the end of the slice,
44 //! either when reading or when skipping bits.
45 //!
46 //! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but
47 //! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of
48 //! information one bit at a time.
49 #![no_std]
50 cfg_if::cfg_if!{
51     if #[cfg(feature = "std")] {
52         extern crate std;
53         use std::cmp::min;
54         use std::prelude::v1::*;
55         use std::fmt;
56         use std::error::Error;
57         use std::result;
58     } else {
59         use core::result;
60         use core::fmt;
61         use core::cmp::min;
62     }
63 }
64 
65 #[cfg(test)]
66 mod tests;
67 
68 /// BitReader reads data from a byte slice at the granularity of a single bit.
69 pub struct BitReader<'a> {
70     bytes: &'a [u8],
71     /// Position from the start of the slice, counted as bits instead of bytes
72     position: u64,
73     relative_offset: u64,
74 
75     /// Length this reader is allowed to read from the slice, counted as bits instead of bytes.
76     length: u64,
77 }
78 
79 impl<'a> BitReader<'a> {
80     /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as
81     /// the slice given to is valid.
new(bytes: &'a [u8]) -> BitReader<'a>82     pub fn new(bytes: &'a [u8]) -> BitReader<'a> {
83         BitReader {
84             bytes: bytes,
85             position: 0,
86             relative_offset: 0,
87             length: bytes.len() as u64 * 8,
88         }
89     }
90 
91     /// Returns a copy of current BitReader, with the difference that its position() returns
92     /// positions relative to the position of the original BitReader at the construction time.
93     /// After construction, both readers are otherwise completely independent, except of course
94     /// for sharing the same source data.
95     ///
96     /// ```
97     /// use bitreader::BitReader;
98     ///
99     /// let bytes = &[0b11110000, 0b00001111];
100     /// let mut original = BitReader::new(bytes);
101     /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
102     /// assert_eq!(original.position(), 4);
103     ///
104     /// let mut relative = original.relative_reader();
105     /// assert_eq!(relative.position(), 0);
106     ///
107     /// assert_eq!(original.read_u8(8).unwrap(), 0);
108     /// assert_eq!(relative.read_u8(8).unwrap(), 0);
109     ///
110     /// assert_eq!(original.position(), 12);
111     /// assert_eq!(relative.position(), 8);
112     /// ```
relative_reader(&self) -> BitReader<'a>113     pub fn relative_reader(&self) -> BitReader<'a> {
114         BitReader {
115             bytes: self.bytes,
116             position: self.position,
117             relative_offset: self.position,
118             length: self.length - self.position,
119         }
120     }
121 
122     /// Returns a copy of current BitReader, with the difference that its position() returns
123     /// positions relative to the position of the original BitReader at the construction time, and
124     /// will not allow reading more than len bits. After construction, both readers are otherwise
125     // completely independent, except of course for sharing the same source data.
126     ///
127     /// ```
128     /// use bitreader::BitReader;
129     /// use bitreader::BitReaderError;
130     ///
131     /// let bytes = &[0b11110000, 0b00001111];
132     /// let mut original = BitReader::new(bytes);
133     /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
134     /// assert_eq!(original.position(), 4);
135     ///
136     /// let mut relative = original.relative_reader_atmost(8);
137     /// assert_eq!(relative.position(), 0);
138     ///
139     /// assert_eq!(original.read_u8(8).unwrap(), 0);
140     /// assert_eq!(relative.read_u8(8).unwrap(), 0);
141     ///
142     /// assert_eq!(original.position(), 12);
143     /// assert_eq!(relative.position(), 8);
144     ///
145     /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{
146     ///    position: 8,
147     ///    length: 8,
148     ///    requested: 8
149     /// });
150     /// ```
relative_reader_atmost(&self, len: u64) -> BitReader<'a>151     pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> {
152         BitReader {
153             bytes: self.bytes,
154             position: self.position,
155             relative_offset: self.position,
156             length: min(self.length - self.position, len),
157         }
158     }
159 
160     /// Read at most 8 bits into a u8.
read_u8(&mut self, bit_count: u8) -> Result<u8>161     pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> {
162         let value = self.read_value(bit_count, 8)?;
163         Ok((value & 0xff) as u8)
164     }
165 
166     /// Read at most 8 bits into a u8, but without moving the cursor forward.
peek_u8(&self, bit_count: u8) -> Result<u8>167     pub fn peek_u8(&self, bit_count: u8) -> Result<u8> {
168         self.relative_reader().read_u8(bit_count)
169     }
170 
171     /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining
172     /// after the internal cursor's current position, the cursor won't be moved forward
173     /// and the contents of `output_bytes` won't be modified.
read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()>174     pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> {
175         let requested = output_bytes.len() as u64 * 8;
176         if requested > self.remaining() {
177             Err(BitReaderError::NotEnoughData {
178                 position: self.position(),
179                 length: self.length,
180                 requested,
181             })
182         } else {
183             for byte in output_bytes.iter_mut() {
184                 *byte = self.read_u8(8)?;
185             }
186             Ok(())
187         }
188     }
189 
190     /// Read at most 16 bits into a u16.
read_u16(&mut self, bit_count: u8) -> Result<u16>191     pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> {
192         let value = self.read_value(bit_count, 16)?;
193         Ok((value & 0xffff) as u16)
194     }
195 
196     /// Read at most 16 bits into a u16, but without moving the cursor forward.
peek_u16(&self, bit_count: u8) -> Result<u16>197     pub fn peek_u16(&self, bit_count: u8) -> Result<u16> {
198         self.relative_reader().read_u16(bit_count)
199     }
200 
201     /// Read at most 32 bits into a u32.
read_u32(&mut self, bit_count: u8) -> Result<u32>202     pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> {
203         let value = self.read_value(bit_count, 32)?;
204         Ok((value & 0xffffffff) as u32)
205     }
206 
207     /// Read at most 32 bits into a u32, but without moving the cursor forward.
peek_u32(&self, bit_count: u8) -> Result<u32>208     pub fn peek_u32(&self, bit_count: u8) -> Result<u32> {
209         self.relative_reader().read_u32(bit_count)
210     }
211 
212     /// Read at most 64 bits into a u64.
read_u64(&mut self, bit_count: u8) -> Result<u64>213     pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> {
214         let value = self.read_value(bit_count, 64)?;
215         Ok(value)
216     }
217 
218     /// Read at most 64 bits into a u64, but without moving the cursor forward.
peek_u64(&self, bit_count: u8) -> Result<u64>219     pub fn peek_u64(&self, bit_count: u8) -> Result<u64> {
220         self.relative_reader().read_u64(bit_count)
221     }
222 
223     /// Read at most 8 bits into a i8.
224     /// Assumes the bits are stored in two's complement format.
read_i8(&mut self, bit_count: u8) -> Result<i8>225     pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> {
226         let value = self.read_signed_value(bit_count, 8)?;
227         Ok((value & 0xff) as i8)
228     }
229 
230     /// Read at most 16 bits into a i16.
231     /// Assumes the bits are stored in two's complement format.
read_i16(&mut self, bit_count: u8) -> Result<i16>232     pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> {
233         let value = self.read_signed_value(bit_count, 16)?;
234         Ok((value & 0xffff) as i16)
235     }
236 
237     /// Read at most 32 bits into a i32.
238     /// Assumes the bits are stored in two's complement format.
read_i32(&mut self, bit_count: u8) -> Result<i32>239     pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> {
240         let value = self.read_signed_value(bit_count, 32)?;
241         Ok((value & 0xffffffff) as i32)
242     }
243 
244     /// Read at most 64 bits into a i64.
245     /// Assumes the bits are stored in two's complement format.
read_i64(&mut self, bit_count: u8) -> Result<i64>246     pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> {
247         let value = self.read_signed_value(bit_count, 64)?;
248         Ok(value)
249     }
250 
251     /// Read a single bit as a boolean value.
252     /// Interprets 1 as true and 0 as false.
read_bool(&mut self) -> Result<bool>253     pub fn read_bool(&mut self) -> Result<bool> {
254         match self.read_value(1, 1)? {
255             0 => Ok(false),
256             _ => Ok(true),
257         }
258     }
259 
260     /// Read a single bit as a boolean value, but without moving the cursor forward.
261     /// Interprets 1 as true and 0 as false.
peek_bool(&self) -> Result<bool>262     pub fn peek_bool(&self) -> Result<bool> {
263         self.relative_reader().read_bool()
264     }
265 
266     /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice.
skip(&mut self, bit_count: u64) -> Result<()>267     pub fn skip(&mut self, bit_count: u64) -> Result<()> {
268         let end_position = self.position + bit_count;
269         if end_position > (self.relative_offset + self.length) {
270             return Err(BitReaderError::NotEnoughData {
271                 position: self.position(),
272                 length: self.length,
273                 requested: bit_count,
274             });
275         }
276         self.position = end_position;
277         Ok(())
278     }
279 
280     /// Returns the position of the cursor, or how many bits have been read so far.
position(&self) -> u64281     pub fn position(&self) -> u64 {
282         self.position - self.relative_offset
283     }
284 
285     /// Returns the number of bits not yet read from the underlying slice.
remaining(&self) -> u64286     pub fn remaining(&self) -> u64 {
287         self.length - self.position
288     }
289 
290     /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific
291     /// multi-byte alignment position.
292     ///
293     /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been
294     /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte
295     /// sequences has been read.
296     ///
297     /// This function can be used to validate the data is being read properly, for example by
298     /// adding invocations wrapped into `debug_assert!()` to places where it is known the data
299     /// should be n-byte aligned.
is_aligned(&self, alignment_bytes: u32) -> bool300     pub fn is_aligned(&self, alignment_bytes: u32) -> bool {
301         self.position % (alignment_bytes as u64 * 8) == 0
302     }
303 
304     /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific
305     /// multi-byte alignment position.
306     ///
307     /// That is, `reader.align(n)` moves the cursor to the next position that
308     /// is a multiple of n * 8 bits, if it's not correctly aligned already.
align(&mut self, alignment_bytes: u32) -> Result<()>309     pub fn align(&mut self, alignment_bytes: u32) -> Result<()> {
310         let alignment_bits = alignment_bytes as u64 * 8;
311         let cur_alignment = self.position % alignment_bits;
312         let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits;
313         self.skip(bits_to_skip)
314     }
315 
read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64>316     fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> {
317         let unsigned = self.read_value(bit_count, maximum_count)?;
318         // Fill the bits above the requested bits with all ones or all zeros,
319         // depending on the sign bit.
320         let sign_bit = unsigned >> (bit_count - 1) & 1;
321         let high_bits = if sign_bit == 1 { -1 } else { 0 };
322         Ok(high_bits << bit_count | unsigned as i64)
323     }
324 
read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64>325     fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> {
326         if bit_count == 0 {
327             return Ok(0);
328         }
329         if bit_count > maximum_count {
330             return Err(BitReaderError::TooManyBitsForType {
331                 position: self.position,
332                 requested: bit_count,
333                 allowed: maximum_count,
334             });
335         }
336         let start_position = self.position;
337         let end_position = self.position + bit_count as u64;
338         if end_position > (self.relative_offset + self.length) {
339             return Err(BitReaderError::NotEnoughData {
340                 position: self.position(),
341                 length: self.length,
342                 requested: bit_count as u64,
343             });
344         }
345 
346         let mut value: u64 = 0;
347 
348         for i in start_position..end_position {
349             let byte_index = (i / 8) as usize;
350             let byte = self.bytes[byte_index];
351             let shift = 7 - (i % 8);
352             let bit = (byte >> shift) as u64 & 1;
353             value = (value << 1) | bit;
354         }
355 
356         self.position = end_position;
357         Ok(value)
358     }
359 }
360 
361 /// Result type for those BitReader operations that can fail.
362 pub type Result<T> = result::Result<T, BitReaderError>;
363 
364 /// Error enumeration of BitReader errors.
365 #[derive(Debug,PartialEq,Copy,Clone)]
366 pub enum BitReaderError {
367     /// Requested more bits than there are left in the byte slice at the current position.
368     NotEnoughData {
369         /// Current posititon in bits relative to the beginning of the reader.
370         position: u64,
371 
372         /// Total readable length in bits of the underlaying slice.
373         length: u64,
374 
375         /// Bits requested to be read.
376         requested: u64,
377     },
378     /// Requested more bits than the returned variable can hold, for example more than 8 bits when
379     /// reading into a u8.
380     TooManyBitsForType {
381         position: u64,
382         requested: u8,
383         allowed: u8,
384     }
385 }
386 
387 #[cfg(feature = "std")]
388 impl Error for BitReaderError {
description(&self) -> &str389     fn description(&self) -> &str {
390         match *self {
391             BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left",
392             BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold",
393         }
394     }
395 }
396 
397 impl fmt::Display for BitReaderError {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result398     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
399         //self.description().fmt(fmt)
400         match *self {
401             BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position),
402             BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position),
403         }
404     }
405 }
406 
407 /// Helper trait to allow reading bits into a variable without explicitly mentioning its type.
408 ///
409 /// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but
410 /// want to rely on type inference instead, you can use the ReadInto trait. The trait is
411 /// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned)
412 /// and the boolean type.
413 ///
414 /// ```
415 /// use bitreader::{BitReader,ReadInto};
416 ///
417 /// let slice_of_u8 = &[0b1110_0000];
418 /// let mut reader = BitReader::new(slice_of_u8);
419 ///
420 /// struct Foo {
421 ///     bar: u8,
422 ///     valid: bool,
423 /// }
424 ///
425 /// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar,
426 /// // and consequently the correct "overload" is used.
427 /// let bits = ReadInto::read(&mut reader, 2).unwrap();
428 /// let valid = ReadInto::read(&mut reader, 1).unwrap();
429 ///
430 /// let foo = Foo { bar: bits, valid: valid };
431 /// assert_eq!(foo.bar, 3);
432 /// assert!(foo.valid);
433 /// ```
434 pub trait ReadInto
435     where Self: Sized
436 {
read(reader: &mut BitReader, bits: u8) -> Result<Self>437     fn read(reader: &mut BitReader, bits: u8) -> Result<Self>;
438 }
439 
440 // There's eight almost identical implementations, let's make this easier.
441 macro_rules! impl_read_into {
442     ($T:ty, $method:ident) => (
443         impl ReadInto for $T {
444             fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
445                 reader.$method(bits)
446             }
447         }
448     )
449 }
450 
451 impl_read_into!(u8, read_u8);
452 impl_read_into!(u16, read_u16);
453 impl_read_into!(u32, read_u32);
454 impl_read_into!(u64, read_u64);
455 
456 impl_read_into!(i8, read_i8);
457 impl_read_into!(i16, read_i16);
458 impl_read_into!(i32, read_i32);
459 impl_read_into!(i64, read_i64);
460 
461 // We can't cast to bool, so this requires a separate method.
462 impl ReadInto for bool {
read(reader: &mut BitReader, bits: u8) -> Result<Self>463     fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
464         match reader.read_u8(bits)? {
465             0 => Ok(false),
466             _ => Ok(true),
467         }
468     }
469 }
470