1 // Copyright 2015 Ilkka Rauta 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 //! BitReader is a helper type to extract strings of bits from a slice of bytes. 10 //! 11 //! Here is how you read first a single bit, then three bits and finally four bits from a byte 12 //! buffer: 13 //! 14 //! ``` 15 //! use bitreader::BitReader; 16 //! 17 //! let slice_of_u8 = &[0b1000_1111]; 18 //! let mut reader = BitReader::new(slice_of_u8); 19 //! 20 //! // You probably should use try! or some other error handling mechanism in real code if the 21 //! // length of the input is not known in advance. 22 //! let a_single_bit = reader.read_u8(1).unwrap(); 23 //! assert_eq!(a_single_bit, 1); 24 //! 25 //! let more_bits = reader.read_u8(3).unwrap(); 26 //! assert_eq!(more_bits, 0); 27 //! 28 //! let last_bits_of_byte = reader.read_u8(4).unwrap(); 29 //! assert_eq!(last_bits_of_byte, 0b1111); 30 //! ``` 31 //! You can naturally read bits from longer buffer of data than just a single byte. 32 //! 33 //! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big 34 //! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum 35 //! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the 36 //! moment. 37 //! 38 //! The reads do not need to be aligned in any particular way. 39 //! 40 //! Reading zero bits is a no-op. 41 //! 42 //! You can also skip over a number of bits, in which case there is no arbitrary small limits like 43 //! when reading the values to a variable. However, you can not seek past the end of the slice, 44 //! either when reading or when skipping bits. 45 //! 46 //! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but 47 //! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of 48 //! information one bit at a time. 49 #![no_std] 50 cfg_if::cfg_if!{ 51 if #[cfg(feature = "std")] { 52 extern crate std; 53 use std::cmp::min; 54 use std::prelude::v1::*; 55 use std::fmt; 56 use std::error::Error; 57 use std::result; 58 } else { 59 use core::result; 60 use core::fmt; 61 use core::cmp::min; 62 } 63 } 64 65 #[cfg(test)] 66 mod tests; 67 68 /// BitReader reads data from a byte slice at the granularity of a single bit. 69 pub struct BitReader<'a> { 70 bytes: &'a [u8], 71 /// Position from the start of the slice, counted as bits instead of bytes 72 position: u64, 73 relative_offset: u64, 74 75 /// Length this reader is allowed to read from the slice, counted as bits instead of bytes. 76 length: u64, 77 } 78 79 impl<'a> BitReader<'a> { 80 /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as 81 /// the slice given to is valid. new(bytes: &'a [u8]) -> BitReader<'a>82 pub fn new(bytes: &'a [u8]) -> BitReader<'a> { 83 BitReader { 84 bytes: bytes, 85 position: 0, 86 relative_offset: 0, 87 length: bytes.len() as u64 * 8, 88 } 89 } 90 91 /// Returns a copy of current BitReader, with the difference that its position() returns 92 /// positions relative to the position of the original BitReader at the construction time. 93 /// After construction, both readers are otherwise completely independent, except of course 94 /// for sharing the same source data. 95 /// 96 /// ``` 97 /// use bitreader::BitReader; 98 /// 99 /// let bytes = &[0b11110000, 0b00001111]; 100 /// let mut original = BitReader::new(bytes); 101 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 102 /// assert_eq!(original.position(), 4); 103 /// 104 /// let mut relative = original.relative_reader(); 105 /// assert_eq!(relative.position(), 0); 106 /// 107 /// assert_eq!(original.read_u8(8).unwrap(), 0); 108 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 109 /// 110 /// assert_eq!(original.position(), 12); 111 /// assert_eq!(relative.position(), 8); 112 /// ``` relative_reader(&self) -> BitReader<'a>113 pub fn relative_reader(&self) -> BitReader<'a> { 114 BitReader { 115 bytes: self.bytes, 116 position: self.position, 117 relative_offset: self.position, 118 length: self.length - self.position, 119 } 120 } 121 122 /// Returns a copy of current BitReader, with the difference that its position() returns 123 /// positions relative to the position of the original BitReader at the construction time, and 124 /// will not allow reading more than len bits. After construction, both readers are otherwise 125 // completely independent, except of course for sharing the same source data. 126 /// 127 /// ``` 128 /// use bitreader::BitReader; 129 /// use bitreader::BitReaderError; 130 /// 131 /// let bytes = &[0b11110000, 0b00001111]; 132 /// let mut original = BitReader::new(bytes); 133 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 134 /// assert_eq!(original.position(), 4); 135 /// 136 /// let mut relative = original.relative_reader_atmost(8); 137 /// assert_eq!(relative.position(), 0); 138 /// 139 /// assert_eq!(original.read_u8(8).unwrap(), 0); 140 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 141 /// 142 /// assert_eq!(original.position(), 12); 143 /// assert_eq!(relative.position(), 8); 144 /// 145 /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{ 146 /// position: 8, 147 /// length: 8, 148 /// requested: 8 149 /// }); 150 /// ``` relative_reader_atmost(&self, len: u64) -> BitReader<'a>151 pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> { 152 BitReader { 153 bytes: self.bytes, 154 position: self.position, 155 relative_offset: self.position, 156 length: min(self.length - self.position, len), 157 } 158 } 159 160 /// Read at most 8 bits into a u8. read_u8(&mut self, bit_count: u8) -> Result<u8>161 pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> { 162 let value = self.read_value(bit_count, 8)?; 163 Ok((value & 0xff) as u8) 164 } 165 166 /// Read at most 8 bits into a u8, but without moving the cursor forward. peek_u8(&self, bit_count: u8) -> Result<u8>167 pub fn peek_u8(&self, bit_count: u8) -> Result<u8> { 168 self.relative_reader().read_u8(bit_count) 169 } 170 171 /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining 172 /// after the internal cursor's current position, the cursor won't be moved forward 173 /// and the contents of `output_bytes` won't be modified. read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()>174 pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> { 175 let requested = output_bytes.len() as u64 * 8; 176 if requested > self.remaining() { 177 Err(BitReaderError::NotEnoughData { 178 position: self.position(), 179 length: self.length, 180 requested, 181 }) 182 } else { 183 for byte in output_bytes.iter_mut() { 184 *byte = self.read_u8(8)?; 185 } 186 Ok(()) 187 } 188 } 189 190 /// Read at most 16 bits into a u16. read_u16(&mut self, bit_count: u8) -> Result<u16>191 pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> { 192 let value = self.read_value(bit_count, 16)?; 193 Ok((value & 0xffff) as u16) 194 } 195 196 /// Read at most 16 bits into a u16, but without moving the cursor forward. peek_u16(&self, bit_count: u8) -> Result<u16>197 pub fn peek_u16(&self, bit_count: u8) -> Result<u16> { 198 self.relative_reader().read_u16(bit_count) 199 } 200 201 /// Read at most 32 bits into a u32. read_u32(&mut self, bit_count: u8) -> Result<u32>202 pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> { 203 let value = self.read_value(bit_count, 32)?; 204 Ok((value & 0xffffffff) as u32) 205 } 206 207 /// Read at most 32 bits into a u32, but without moving the cursor forward. peek_u32(&self, bit_count: u8) -> Result<u32>208 pub fn peek_u32(&self, bit_count: u8) -> Result<u32> { 209 self.relative_reader().read_u32(bit_count) 210 } 211 212 /// Read at most 64 bits into a u64. read_u64(&mut self, bit_count: u8) -> Result<u64>213 pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> { 214 let value = self.read_value(bit_count, 64)?; 215 Ok(value) 216 } 217 218 /// Read at most 64 bits into a u64, but without moving the cursor forward. peek_u64(&self, bit_count: u8) -> Result<u64>219 pub fn peek_u64(&self, bit_count: u8) -> Result<u64> { 220 self.relative_reader().read_u64(bit_count) 221 } 222 223 /// Read at most 8 bits into a i8. 224 /// Assumes the bits are stored in two's complement format. read_i8(&mut self, bit_count: u8) -> Result<i8>225 pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> { 226 let value = self.read_signed_value(bit_count, 8)?; 227 Ok((value & 0xff) as i8) 228 } 229 230 /// Read at most 16 bits into a i16. 231 /// Assumes the bits are stored in two's complement format. read_i16(&mut self, bit_count: u8) -> Result<i16>232 pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> { 233 let value = self.read_signed_value(bit_count, 16)?; 234 Ok((value & 0xffff) as i16) 235 } 236 237 /// Read at most 32 bits into a i32. 238 /// Assumes the bits are stored in two's complement format. read_i32(&mut self, bit_count: u8) -> Result<i32>239 pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> { 240 let value = self.read_signed_value(bit_count, 32)?; 241 Ok((value & 0xffffffff) as i32) 242 } 243 244 /// Read at most 64 bits into a i64. 245 /// Assumes the bits are stored in two's complement format. read_i64(&mut self, bit_count: u8) -> Result<i64>246 pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> { 247 let value = self.read_signed_value(bit_count, 64)?; 248 Ok(value) 249 } 250 251 /// Read a single bit as a boolean value. 252 /// Interprets 1 as true and 0 as false. read_bool(&mut self) -> Result<bool>253 pub fn read_bool(&mut self) -> Result<bool> { 254 match self.read_value(1, 1)? { 255 0 => Ok(false), 256 _ => Ok(true), 257 } 258 } 259 260 /// Read a single bit as a boolean value, but without moving the cursor forward. 261 /// Interprets 1 as true and 0 as false. peek_bool(&self) -> Result<bool>262 pub fn peek_bool(&self) -> Result<bool> { 263 self.relative_reader().read_bool() 264 } 265 266 /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice. skip(&mut self, bit_count: u64) -> Result<()>267 pub fn skip(&mut self, bit_count: u64) -> Result<()> { 268 let end_position = self.position + bit_count; 269 if end_position > (self.relative_offset + self.length) { 270 return Err(BitReaderError::NotEnoughData { 271 position: self.position(), 272 length: self.length, 273 requested: bit_count, 274 }); 275 } 276 self.position = end_position; 277 Ok(()) 278 } 279 280 /// Returns the position of the cursor, or how many bits have been read so far. position(&self) -> u64281 pub fn position(&self) -> u64 { 282 self.position - self.relative_offset 283 } 284 285 /// Returns the number of bits not yet read from the underlying slice. remaining(&self) -> u64286 pub fn remaining(&self) -> u64 { 287 self.length - self.position 288 } 289 290 /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific 291 /// multi-byte alignment position. 292 /// 293 /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been 294 /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte 295 /// sequences has been read. 296 /// 297 /// This function can be used to validate the data is being read properly, for example by 298 /// adding invocations wrapped into `debug_assert!()` to places where it is known the data 299 /// should be n-byte aligned. is_aligned(&self, alignment_bytes: u32) -> bool300 pub fn is_aligned(&self, alignment_bytes: u32) -> bool { 301 self.position % (alignment_bytes as u64 * 8) == 0 302 } 303 304 /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific 305 /// multi-byte alignment position. 306 /// 307 /// That is, `reader.align(n)` moves the cursor to the next position that 308 /// is a multiple of n * 8 bits, if it's not correctly aligned already. align(&mut self, alignment_bytes: u32) -> Result<()>309 pub fn align(&mut self, alignment_bytes: u32) -> Result<()> { 310 let alignment_bits = alignment_bytes as u64 * 8; 311 let cur_alignment = self.position % alignment_bits; 312 let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits; 313 self.skip(bits_to_skip) 314 } 315 read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64>316 fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> { 317 let unsigned = self.read_value(bit_count, maximum_count)?; 318 // Fill the bits above the requested bits with all ones or all zeros, 319 // depending on the sign bit. 320 let sign_bit = unsigned >> (bit_count - 1) & 1; 321 let high_bits = if sign_bit == 1 { -1 } else { 0 }; 322 Ok(high_bits << bit_count | unsigned as i64) 323 } 324 read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64>325 fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> { 326 if bit_count == 0 { 327 return Ok(0); 328 } 329 if bit_count > maximum_count { 330 return Err(BitReaderError::TooManyBitsForType { 331 position: self.position, 332 requested: bit_count, 333 allowed: maximum_count, 334 }); 335 } 336 let start_position = self.position; 337 let end_position = self.position + bit_count as u64; 338 if end_position > (self.relative_offset + self.length) { 339 return Err(BitReaderError::NotEnoughData { 340 position: self.position(), 341 length: self.length, 342 requested: bit_count as u64, 343 }); 344 } 345 346 let mut value: u64 = 0; 347 348 for i in start_position..end_position { 349 let byte_index = (i / 8) as usize; 350 let byte = self.bytes[byte_index]; 351 let shift = 7 - (i % 8); 352 let bit = (byte >> shift) as u64 & 1; 353 value = (value << 1) | bit; 354 } 355 356 self.position = end_position; 357 Ok(value) 358 } 359 } 360 361 /// Result type for those BitReader operations that can fail. 362 pub type Result<T> = result::Result<T, BitReaderError>; 363 364 /// Error enumeration of BitReader errors. 365 #[derive(Debug,PartialEq,Copy,Clone)] 366 pub enum BitReaderError { 367 /// Requested more bits than there are left in the byte slice at the current position. 368 NotEnoughData { 369 /// Current posititon in bits relative to the beginning of the reader. 370 position: u64, 371 372 /// Total readable length in bits of the underlaying slice. 373 length: u64, 374 375 /// Bits requested to be read. 376 requested: u64, 377 }, 378 /// Requested more bits than the returned variable can hold, for example more than 8 bits when 379 /// reading into a u8. 380 TooManyBitsForType { 381 position: u64, 382 requested: u8, 383 allowed: u8, 384 } 385 } 386 387 #[cfg(feature = "std")] 388 impl Error for BitReaderError { description(&self) -> &str389 fn description(&self) -> &str { 390 match *self { 391 BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left", 392 BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold", 393 } 394 } 395 } 396 397 impl fmt::Display for BitReaderError { fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result398 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 399 //self.description().fmt(fmt) 400 match *self { 401 BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position), 402 BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position), 403 } 404 } 405 } 406 407 /// Helper trait to allow reading bits into a variable without explicitly mentioning its type. 408 /// 409 /// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but 410 /// want to rely on type inference instead, you can use the ReadInto trait. The trait is 411 /// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned) 412 /// and the boolean type. 413 /// 414 /// ``` 415 /// use bitreader::{BitReader,ReadInto}; 416 /// 417 /// let slice_of_u8 = &[0b1110_0000]; 418 /// let mut reader = BitReader::new(slice_of_u8); 419 /// 420 /// struct Foo { 421 /// bar: u8, 422 /// valid: bool, 423 /// } 424 /// 425 /// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar, 426 /// // and consequently the correct "overload" is used. 427 /// let bits = ReadInto::read(&mut reader, 2).unwrap(); 428 /// let valid = ReadInto::read(&mut reader, 1).unwrap(); 429 /// 430 /// let foo = Foo { bar: bits, valid: valid }; 431 /// assert_eq!(foo.bar, 3); 432 /// assert!(foo.valid); 433 /// ``` 434 pub trait ReadInto 435 where Self: Sized 436 { read(reader: &mut BitReader, bits: u8) -> Result<Self>437 fn read(reader: &mut BitReader, bits: u8) -> Result<Self>; 438 } 439 440 // There's eight almost identical implementations, let's make this easier. 441 macro_rules! impl_read_into { 442 ($T:ty, $method:ident) => ( 443 impl ReadInto for $T { 444 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 445 reader.$method(bits) 446 } 447 } 448 ) 449 } 450 451 impl_read_into!(u8, read_u8); 452 impl_read_into!(u16, read_u16); 453 impl_read_into!(u32, read_u32); 454 impl_read_into!(u64, read_u64); 455 456 impl_read_into!(i8, read_i8); 457 impl_read_into!(i16, read_i16); 458 impl_read_into!(i32, read_i32); 459 impl_read_into!(i64, read_i64); 460 461 // We can't cast to bool, so this requires a separate method. 462 impl ReadInto for bool { read(reader: &mut BitReader, bits: u8) -> Result<Self>463 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 464 match reader.read_u8(bits)? { 465 0 => Ok(false), 466 _ => Ok(true), 467 } 468 } 469 } 470