1 // Copyright 2014-2015 The Servo Project Developers. See the COPYRIGHT 2 // file at the top-level directory of this distribution. 3 // 4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 7 // option. This file may not be copied, modified, or distributed 8 // except according to those terms. 9 10 //! A [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) decoder written in rust. 11 //! 12 //! This library provides functionality to decompress data compressed with the DEFLATE algorithm, 13 //! both with and without a [zlib](https://tools.ietf.org/html/rfc1950) header/trailer. 14 //! 15 //! # Examples 16 //! The easiest way to get `std::Vec<u8>` containing the decompressed bytes is to use either 17 //! `inflate::inflate_bytes` or `inflate::inflate_bytes_zlib` (depending on whether 18 //! the encoded data has zlib headers and trailers or not). The following example 19 //! decodes the DEFLATE encoded string "Hello, world" and prints it: 20 //! 21 //! ```rust 22 //! use inflate::inflate_bytes; 23 //! use std::str::from_utf8; 24 //! 25 //! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0]; 26 //! let decoded = inflate_bytes(&encoded).unwrap(); 27 //! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world" 28 //! ``` 29 //! 30 //! If you need more flexibility, then the library also provides an implementation 31 //! of `std::io::Writer` in `inflate::writer`. Below is an example using an 32 //! `inflate::writer::InflateWriter` to decode the DEFLATE encoded string "Hello, world": 33 //! 34 //! ```rust 35 //! use inflate::InflateWriter; 36 //! use std::io::Write; 37 //! use std::str::from_utf8; 38 //! 39 //! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0]; 40 //! let mut decoder = InflateWriter::new(Vec::new()); 41 //! decoder.write(&encoded).unwrap(); 42 //! let decoded = decoder.finish().unwrap(); 43 //! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world" 44 //! ``` 45 //! 46 //! Finally, if you need even more flexibility, or if you only want to depend on 47 //! `core`, you can use the `inflate::InflateStream` API. The below example 48 //! decodes an array of DEFLATE encoded bytes: 49 //! 50 //! ```rust 51 //! use inflate::InflateStream; 52 //! 53 //! let data = [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00]; 54 //! let mut inflater = InflateStream::new(); 55 //! let mut out = Vec::<u8>::new(); 56 //! let mut n = 0; 57 //! while n < data.len() { 58 //! let res = inflater.update(&data[n..]); 59 //! if let Ok((num_bytes_read, result)) = res { 60 //! n += num_bytes_read; 61 //! out.extend(result.iter().cloned()); 62 //! } else { 63 //! res.unwrap(); 64 //! } 65 //! } 66 //! ``` 67 68 extern crate adler32; 69 70 use std::cmp; 71 use std::slice; 72 73 mod checksum; 74 use checksum::{Checksum, adler32_from_bytes}; 75 76 mod writer; 77 pub use self::writer::{InflateWriter}; 78 79 mod utils; 80 pub use self::utils::{inflate_bytes, inflate_bytes_zlib, inflate_bytes_zlib_no_checksum}; 81 82 mod reader; 83 pub use self::reader::{DeflateDecoder, DeflateDecoderBuf}; 84 85 static BIT_REV_U8: [u8; 256] = [ 86 0b0000_0000, 0b1000_0000, 0b0100_0000, 0b1100_0000, 87 0b0010_0000, 0b1010_0000, 0b0110_0000, 0b1110_0000, 88 0b0001_0000, 0b1001_0000, 0b0101_0000, 0b1101_0000, 89 0b0011_0000, 0b1011_0000, 0b0111_0000, 0b1111_0000, 90 91 0b0000_1000, 0b1000_1000, 0b0100_1000, 0b1100_1000, 92 0b0010_1000, 0b1010_1000, 0b0110_1000, 0b1110_1000, 93 0b0001_1000, 0b1001_1000, 0b0101_1000, 0b1101_1000, 94 0b0011_1000, 0b1011_1000, 0b0111_1000, 0b1111_1000, 95 96 0b0000_0100, 0b1000_0100, 0b0100_0100, 0b1100_0100, 97 0b0010_0100, 0b1010_0100, 0b0110_0100, 0b1110_0100, 98 0b0001_0100, 0b1001_0100, 0b0101_0100, 0b1101_0100, 99 0b0011_0100, 0b1011_0100, 0b0111_0100, 0b1111_0100, 100 101 0b0000_1100, 0b1000_1100, 0b0100_1100, 0b1100_1100, 102 0b0010_1100, 0b1010_1100, 0b0110_1100, 0b1110_1100, 103 0b0001_1100, 0b1001_1100, 0b0101_1100, 0b1101_1100, 104 0b0011_1100, 0b1011_1100, 0b0111_1100, 0b1111_1100, 105 106 107 0b0000_0010, 0b1000_0010, 0b0100_0010, 0b1100_0010, 108 0b0010_0010, 0b1010_0010, 0b0110_0010, 0b1110_0010, 109 0b0001_0010, 0b1001_0010, 0b0101_0010, 0b1101_0010, 110 0b0011_0010, 0b1011_0010, 0b0111_0010, 0b1111_0010, 111 112 0b0000_1010, 0b1000_1010, 0b0100_1010, 0b1100_1010, 113 0b0010_1010, 0b1010_1010, 0b0110_1010, 0b1110_1010, 114 0b0001_1010, 0b1001_1010, 0b0101_1010, 0b1101_1010, 115 0b0011_1010, 0b1011_1010, 0b0111_1010, 0b1111_1010, 116 117 0b0000_0110, 0b1000_0110, 0b0100_0110, 0b1100_0110, 118 0b0010_0110, 0b1010_0110, 0b0110_0110, 0b1110_0110, 119 0b0001_0110, 0b1001_0110, 0b0101_0110, 0b1101_0110, 120 0b0011_0110, 0b1011_0110, 0b0111_0110, 0b1111_0110, 121 122 0b0000_1110, 0b1000_1110, 0b0100_1110, 0b1100_1110, 123 0b0010_1110, 0b1010_1110, 0b0110_1110, 0b1110_1110, 124 0b0001_1110, 0b1001_1110, 0b0101_1110, 0b1101_1110, 125 0b0011_1110, 0b1011_1110, 0b0111_1110, 0b1111_1110, 126 127 128 0b0000_0001, 0b1000_0001, 0b0100_0001, 0b1100_0001, 129 0b0010_0001, 0b1010_0001, 0b0110_0001, 0b1110_0001, 130 0b0001_0001, 0b1001_0001, 0b0101_0001, 0b1101_0001, 131 0b0011_0001, 0b1011_0001, 0b0111_0001, 0b1111_0001, 132 133 0b0000_1001, 0b1000_1001, 0b0100_1001, 0b1100_1001, 134 0b0010_1001, 0b1010_1001, 0b0110_1001, 0b1110_1001, 135 0b0001_1001, 0b1001_1001, 0b0101_1001, 0b1101_1001, 136 0b0011_1001, 0b1011_1001, 0b0111_1001, 0b1111_1001, 137 138 0b0000_0101, 0b1000_0101, 0b0100_0101, 0b1100_0101, 139 0b0010_0101, 0b1010_0101, 0b0110_0101, 0b1110_0101, 140 0b0001_0101, 0b1001_0101, 0b0101_0101, 0b1101_0101, 141 0b0011_0101, 0b1011_0101, 0b0111_0101, 0b1111_0101, 142 143 0b0000_1101, 0b1000_1101, 0b0100_1101, 0b1100_1101, 144 0b0010_1101, 0b1010_1101, 0b0110_1101, 0b1110_1101, 145 0b0001_1101, 0b1001_1101, 0b0101_1101, 0b1101_1101, 146 0b0011_1101, 0b1011_1101, 0b0111_1101, 0b1111_1101, 147 148 149 0b0000_0011, 0b1000_0011, 0b0100_0011, 0b1100_0011, 150 0b0010_0011, 0b1010_0011, 0b0110_0011, 0b1110_0011, 151 0b0001_0011, 0b1001_0011, 0b0101_0011, 0b1101_0011, 152 0b0011_0011, 0b1011_0011, 0b0111_0011, 0b1111_0011, 153 154 0b0000_1011, 0b1000_1011, 0b0100_1011, 0b1100_1011, 155 0b0010_1011, 0b1010_1011, 0b0110_1011, 0b1110_1011, 156 0b0001_1011, 0b1001_1011, 0b0101_1011, 0b1101_1011, 157 0b0011_1011, 0b1011_1011, 0b0111_1011, 0b1111_1011, 158 159 0b0000_0111, 0b1000_0111, 0b0100_0111, 0b1100_0111, 160 0b0010_0111, 0b1010_0111, 0b0110_0111, 0b1110_0111, 161 0b0001_0111, 0b1001_0111, 0b0101_0111, 0b1101_0111, 162 0b0011_0111, 0b1011_0111, 0b0111_0111, 0b1111_0111, 163 164 0b0000_1111, 0b1000_1111, 0b0100_1111, 0b1100_1111, 165 0b0010_1111, 0b1010_1111, 0b0110_1111, 0b1110_1111, 166 0b0001_1111, 0b1001_1111, 0b0101_1111, 0b1101_1111, 167 0b0011_1111, 0b1011_1111, 0b0111_1111, 0b1111_1111 168 ]; 169 170 #[derive(Clone, Copy)] 171 struct BitState { 172 n: u8, 173 v: u32, 174 } 175 176 #[derive(Clone)] 177 struct BitStream<'a> { 178 bytes: slice::Iter<'a, u8>, 179 used: usize, 180 state: BitState, 181 } 182 183 #[cfg(debug)] 184 macro_rules! debug { ($($x:tt)*) => (println!($($x)*)) } 185 #[cfg(not(debug))] 186 macro_rules! debug { ($($x:tt)*) => (()) } 187 188 impl<'a> BitStream<'a> { new(bytes: &'a [u8], state: BitState) -> BitStream<'a>189 fn new(bytes: &'a [u8], state: BitState) -> BitStream<'a> { 190 BitStream { 191 bytes: bytes.iter(), 192 used: 0, 193 state: state, 194 } 195 } 196 use_byte(&mut self) -> bool197 fn use_byte(&mut self) -> bool { 198 match self.bytes.next() { 199 Some(&b) => { 200 self.state.v |= (b as u32) << self.state.n; 201 self.state.n += 8; 202 self.used += 1; 203 true 204 } 205 None => false, 206 } 207 } 208 need(&mut self, n: u8) -> bool209 fn need(&mut self, n: u8) -> bool { 210 if self.state.n < n { 211 if !self.use_byte() { 212 return false; 213 } 214 if n > 8 && self.state.n < n { 215 assert!(n <= 16); 216 if !self.use_byte() { 217 return false; 218 } 219 } 220 } 221 true 222 } 223 take16(&mut self, n: u8) -> Option<u16>224 fn take16(&mut self, n: u8) -> Option<u16> { 225 if self.need(n) { 226 self.state.n -= n; 227 let v = self.state.v & ((1 << n) - 1); 228 self.state.v >>= n; 229 Some(v as u16) 230 } else { 231 None 232 } 233 } 234 take(&mut self, n: u8) -> Option<u8>235 fn take(&mut self, n: u8) -> Option<u8> { 236 assert!(n <= 8); 237 self.take16(n).map(|v: u16| v as u8) 238 } 239 fill(&mut self) -> BitState240 fn fill(&mut self) -> BitState { 241 while self.state.n + 8 <= 32 && self.use_byte() {} 242 self.state 243 } 244 align_byte(&mut self)245 fn align_byte(&mut self) { 246 if self.state.n > 0 { 247 let n = self.state.n % 8; 248 self.take(n); 249 } 250 } 251 trailing_bytes(&mut self) -> (u8, [u8; 4])252 fn trailing_bytes(&mut self) -> (u8, [u8; 4]) { 253 let mut len = 0; 254 let mut bytes = [0; 4]; 255 self.align_byte(); 256 while self.state.n >= 8 { 257 bytes[len as usize] = self.state.v as u8; 258 len += 1; 259 self.state.n -= 8; 260 self.state.v >>= 8; 261 } 262 (len, bytes) 263 } 264 } 265 266 /// Generate huffman codes from the given set of lengths and run `$cb` on them except the first 267 /// code for each length. 268 /// 269 /// See also the [deflate specification](http://www.gzip.org/zlib/rfc-deflate.html#huffman) 270 /// for an explanation of the algorithm. 271 macro_rules! with_codes (($clens:expr, $max_bits:expr => $code_ty:ty, $cb:expr) => ({ 272 // Count the number of codes for each bit length. 273 let mut bl_count = [0 as $code_ty; ($max_bits+1)]; 274 for &bits in $clens.iter() { 275 if bits != 0 { 276 // This should be safe from overflow as the number of lengths read from the input 277 // is bounded by the number of bits the number of lengths is represented by in the 278 // deflate compressed data. 279 bl_count[bits as usize] += 1; 280 } 281 } 282 283 // Compute the first code value for each bit length. 284 let mut next_code = [0 as $code_ty; ($max_bits+1)]; 285 let mut code = 0 as $code_ty; 286 // TODO use range_inclusive as soon as it is stable 287 //for bits in range_inclusive(1, $max_bits) { 288 for bits in 1..$max_bits + 1 { 289 code = try!( 290 code.checked_add(bl_count[bits as usize - 1]) 291 .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths") 292 ) << 1; 293 next_code[bits as usize] = code; 294 } 295 296 // Compute the rest of the codes 297 for (i, &bits) in $clens.iter().enumerate() { 298 if bits != 0 { 299 let code = next_code[bits as usize]; 300 // If there is an overflow here, the given set of code lengths won't allow enough 301 // unique codes to be generated. 302 let new_code = try!( 303 code.checked_add(1) 304 .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths!") 305 ); 306 next_code[bits as usize] = new_code; 307 match $cb(i as $code_ty, code, bits) { 308 Ok(()) => (), 309 Err(err) => return Err(err) 310 } 311 } 312 } 313 })); 314 315 struct CodeLengthReader { 316 patterns: Box<[u8; 128]>, 317 clens: Box<[u8; 19]>, 318 result: Vec<u8>, 319 num_lit: u16, 320 num_dist: u8, 321 } 322 323 impl CodeLengthReader { new(clens: Box<[u8; 19]>, num_lit: u16, num_dist: u8) -> Result<CodeLengthReader, String>324 fn new(clens: Box<[u8; 19]>, num_lit: u16, num_dist: u8) -> Result<CodeLengthReader, String> { 325 // Fill in the 7-bit patterns that match each code. 326 let mut patterns = Box::new([0xffu8; 128]); 327 with_codes!(clens, 7 => u8, |i: u8, code: u8, bits| -> _ { 328 /*let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) { 329 Some(&base) => base, 330 None => return Err("invalid length code".to_owned()) 331 }*/ 332 let base = BIT_REV_U8[(code << (8 - bits)) as usize]; 333 for rest in 0u8 .. 1u8 << (7 - bits) { 334 patterns[(base | (rest << bits)) as usize] = i; 335 } 336 Ok(()) 337 }); 338 339 Ok(CodeLengthReader { 340 patterns: patterns, 341 clens: clens, 342 result: Vec::with_capacity(num_lit as usize + num_dist as usize), 343 num_lit: num_lit, 344 num_dist: num_dist, 345 }) 346 } 347 read(&mut self, stream: &mut BitStream) -> Result<bool, String>348 fn read(&mut self, stream: &mut BitStream) -> Result<bool, String> { 349 let total_len = self.num_lit as usize + self.num_dist as usize; 350 while self.result.len() < total_len { 351 if !stream.need(7) { 352 return Ok(false); 353 } 354 let save = stream.clone(); 355 macro_rules! take (($n:expr) => (match stream.take($n) { 356 Some(v) => v, 357 None => { 358 *stream = save; 359 return Ok(false); 360 } 361 })); 362 let code = self.patterns[(stream.state.v & 0x7f) as usize]; 363 stream.take(match self.clens.get(code as usize) { 364 Some(&len) => len, 365 None => return Err("invalid length code".to_owned()), 366 }); 367 match code { 368 0...15 => self.result.push(code), 369 16 => { 370 let last = match self.result.last() { 371 Some(&v) => v, 372 // 16 appeared before anything else 373 None => return Err("invalid length code".to_owned()), 374 }; 375 for _ in 0..3 + take!(2) { 376 self.result.push(last); 377 } 378 } 379 17 => { 380 for _ in 0..3 + take!(3) { 381 self.result.push(0); 382 } 383 } 384 18 => { 385 for _ in 0..11 + take!(7) { 386 self.result.push(0); 387 } 388 } 389 _ => unreachable!(), 390 } 391 } 392 Ok(true) 393 } 394 to_lit_and_dist(&self) -> Result<(DynHuffman16, DynHuffman16), String>395 fn to_lit_and_dist(&self) -> Result<(DynHuffman16, DynHuffman16), String> { 396 let num_lit = self.num_lit as usize; 397 let lit = try!(DynHuffman16::new(&self.result[..num_lit])); 398 let dist = try!(DynHuffman16::new(&self.result[num_lit..])); 399 Ok((lit, dist)) 400 } 401 } 402 403 struct Trie8bit<T> { 404 data: [T; 16], 405 children: [Option<Box<[T; 16]>>; 16], 406 } 407 408 struct DynHuffman16 { 409 patterns: Box<[u16; 256]>, 410 rest: Vec<Trie8bit<u16>>, 411 } 412 413 impl DynHuffman16 { new(clens: &[u8]) -> Result<DynHuffman16, String>414 fn new(clens: &[u8]) -> Result<DynHuffman16, String> { 415 // Fill in the 8-bit patterns that match each code. 416 // Longer patterns go into the trie. 417 let mut patterns = Box::new([0xffffu16; 256]); 418 let mut rest = Vec::new(); 419 with_codes!(clens, 15 => u16, |i: u16, code: u16, bits: u8| -> _ { 420 let entry = i | ((bits as u16) << 12); 421 if bits <= 8 { 422 let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) { 423 Some(&v) => v, 424 None => return Err("invalid length code".to_owned()) 425 }; 426 for rest in 0u8 .. 1 << (8 - bits) { 427 patterns[(base | (rest << (bits & 7))) as usize] = entry; 428 } 429 } else { 430 let low = match BIT_REV_U8.get((code >> (bits - 8)) as usize) { 431 Some(&v) => v, 432 None => return Err("invalid length code".to_owned()) 433 }; 434 let high = BIT_REV_U8[((code << (16 - bits)) & 0xff) as usize]; 435 let (min_bits, idx) = if patterns[low as usize] != 0xffff { 436 let bits_prev = (patterns[low as usize] >> 12) as u8; 437 (cmp::min(bits_prev, bits), patterns[low as usize] & 0x7ff) 438 } else { 439 rest.push(Trie8bit { 440 data: [0xffff; 16], 441 children: [ 442 None, None, None, None, 443 None, None, None, None, 444 None, None, None, None, 445 None, None, None, None 446 ] 447 }); 448 (bits, (rest.len() - 1) as u16) 449 }; 450 patterns[low as usize] = idx | 0x800 | ((min_bits as u16) << 12); 451 let trie_entry = match rest.get_mut(idx as usize) { 452 Some(v) => v, 453 None => return Err("invalid huffman code".to_owned()) 454 }; 455 if bits <= 12 { 456 for rest in 0u8 .. 1 << (12 - bits) { 457 trie_entry.data[(high | (rest << (bits - 8))) as usize] = entry; 458 } 459 } else { 460 let child = &mut trie_entry.children[(high & 0xf) as usize]; 461 if child.is_none() { 462 *child = Some(Box::new([0xffff; 16])); 463 } 464 let child = &mut **child.as_mut().unwrap(); 465 let high_top = high >> 4; 466 for rest in 0u8 .. 1 << (16 - bits) { 467 child[(high_top | (rest << (bits - 12))) as usize] = entry; 468 } 469 } 470 } 471 Ok(()) 472 }); 473 debug!("=== DYN HUFFMAN ==="); 474 for _i in 0..256 { 475 debug!("{:08b} {:04x}", _i, patterns[BIT_REV_U8[_i] as usize]); 476 } 477 debug!("==================="); 478 Ok(DynHuffman16 { 479 patterns: patterns, 480 rest: rest, 481 }) 482 } 483 read<'a>(&self, stream: &mut BitStream<'a>) -> Result<Option<(BitStream<'a>, u16)>, String>484 fn read<'a>(&self, stream: &mut BitStream<'a>) -> Result<Option<(BitStream<'a>, u16)>, String> { 485 let has8 = stream.need(8); 486 let entry = self.patterns[(stream.state.v & 0xff) as usize]; 487 let bits = (entry >> 12) as u8; 488 489 Ok(if !has8 { 490 if bits <= stream.state.n { 491 let save = stream.clone(); 492 stream.state.n -= bits; 493 stream.state.v >>= bits; 494 Some((save, entry & 0xfff)) 495 } else { 496 None 497 } 498 } else if bits <= 8 { 499 let save = stream.clone(); 500 stream.state.n -= bits; 501 stream.state.v >>= bits; 502 Some((save, entry & 0xfff)) 503 } else { 504 let has16 = stream.need(16); 505 let trie = match self.rest.get((entry & 0x7ff) as usize) { 506 Some(trie) => trie, 507 None => return Err("invalid entry in stream".to_owned()), 508 }; 509 let idx = stream.state.v >> 8; 510 let trie_entry = match trie.children[(idx & 0xf) as usize] { 511 Some(ref child) => child[((idx >> 4) & 0xf) as usize], 512 None => trie.data[(idx & 0xf) as usize], 513 }; 514 let trie_bits = (trie_entry >> 12) as u8; 515 if has16 || trie_bits <= stream.state.n { 516 let save = stream.clone(); 517 stream.state.n -= trie_bits; 518 stream.state.v >>= trie_bits; 519 Some((save, trie_entry & 0xfff)) 520 } else { 521 None 522 } 523 }) 524 } 525 } 526 527 enum State { 528 ZlibMethodAndFlags, // CMF 529 ZlibFlags(/* CMF */ u8), // FLG, 530 Bits(BitsNext, BitState), 531 LenDist((BitsNext, BitState), /* len */ u16, /* dist */ u16), 532 Uncompressed(/* len */ u16), 533 CheckCRC(/* len */ u8, /* bytes */ [u8; 4]), 534 Finished 535 } 536 537 use self::State::*; 538 539 enum BitsNext { 540 BlockHeader, 541 BlockUncompressedLen, 542 BlockUncompressedNlen(/* len */ u16), 543 BlockDynHlit, 544 BlockDynHdist(/* hlit */ u8), 545 BlockDynHclen(/* hlit */ u8, /* hdist */ u8), 546 BlockDynClenCodeLengths(/* hlit */ u8, /* hdist */ u8, /* hclen */ u8, /* idx */ u8, /* clens */ Box<[u8; 19]>), 547 BlockDynCodeLengths(CodeLengthReader), 548 BlockDyn(/* lit/len */ DynHuffman16, /* dist */ DynHuffman16, /* prev_len */ u16) 549 } 550 551 use self::BitsNext::*; 552 553 pub struct InflateStream { 554 buffer: Vec<u8>, 555 pos: u16, 556 state: Option<State>, 557 final_block: bool, 558 checksum: Checksum, 559 read_checksum: Option<u32>, 560 } 561 562 impl InflateStream { 563 #[allow(dead_code)] 564 /// Create a new stream for decoding raw deflate encoded data. new() -> InflateStream565 pub fn new() -> InflateStream { 566 let state = Bits(BlockHeader, BitState { n: 0, v: 0 }); 567 let buffer = Vec::with_capacity(32 * 1024); 568 InflateStream::with_state_and_buffer(state, buffer, Checksum::none()) 569 } 570 571 /// Create a new stream for decoding deflate encoded data with a zlib header and footer from_zlib() -> InflateStream572 pub fn from_zlib() -> InflateStream { 573 InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::zlib()) 574 } 575 576 /// Create a new stream for decoding deflate encoded data with a zlib header and footer 577 /// 578 /// This version creates a decoder that does not checksum the data to validate it with the 579 /// checksum provided with the zlib wrapper. from_zlib_no_checksum() -> InflateStream580 pub fn from_zlib_no_checksum() -> InflateStream { 581 InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::none()) 582 } 583 reset(&mut self)584 pub fn reset(&mut self) { 585 self.buffer.clear(); 586 self.pos = 0; 587 self.state = Some(Bits(BlockHeader, BitState { n: 0, v: 0 })); 588 self.final_block = false; 589 } 590 reset_to_zlib(&mut self)591 pub fn reset_to_zlib(&mut self) { 592 self.reset(); 593 self.state = Some(ZlibMethodAndFlags); 594 } 595 with_state_and_buffer(state: State, buffer: Vec<u8>, checksum: Checksum) -> InflateStream596 fn with_state_and_buffer(state: State, buffer: Vec<u8>, checksum: Checksum) 597 -> InflateStream { 598 InflateStream { 599 buffer: buffer, 600 pos: 0, 601 state: Some(state), 602 final_block: false, 603 checksum: checksum, 604 read_checksum: None, 605 } 606 } 607 run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String>608 fn run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String> { 609 debug!("RLE -{}; {} (cap={} len={})", dist, len, 610 self.buffer.capacity(), self.buffer.len()); 611 if dist < 1 { 612 return Err("invalid run length in stream".to_owned()); 613 } 614 // `buffer_size` is used for validating `unsafe` below, handle with care 615 let buffer_size = self.buffer.capacity() as u16; 616 let len = if self.pos < dist { 617 // Handle copying from ahead, until we hit the end reading. 618 let pos_end = self.pos + len; 619 let (pos_end, left) = if pos_end < dist { 620 (pos_end, 0) 621 } else { 622 (dist, pos_end - dist) 623 }; 624 if dist > buffer_size { 625 return Err("run length distance is bigger than the window size".to_owned()); 626 } 627 let forward = buffer_size - dist; 628 if pos_end + forward > self.buffer.len() as u16 { 629 return Err("invalid run length in stream".to_owned()); 630 } 631 632 for i in self.pos as usize..pos_end as usize { 633 self.buffer[i] = self.buffer[i + forward as usize]; 634 } 635 self.pos = pos_end; 636 left 637 } else { 638 len 639 }; 640 // Handle copying from before, until we hit the end writing. 641 let pos_end = self.pos + len; 642 let (pos_end, left) = if pos_end <= buffer_size { 643 (pos_end, None) 644 } else { 645 (buffer_size, Some(pos_end - buffer_size)) 646 }; 647 648 if self.pos < dist && pos_end > self.pos { 649 return Err("invalid run length in stream".to_owned()); 650 } 651 652 if self.buffer.len() < pos_end as usize { 653 // ensure the buffer length will not exceed the amount of allocated memory 654 assert!(pos_end <= buffer_size); 655 // ensure that the uninitialized chunk of memory will be fully overwritten 656 assert!(self.pos as usize <= self.buffer.len()); 657 unsafe { 658 self.buffer.set_len(pos_end as usize); 659 } 660 } 661 assert!(dist > 0); // validation against reading uninitialized memory 662 for i in self.pos as usize..pos_end as usize { 663 self.buffer[i] = self.buffer[i - dist as usize]; 664 } 665 self.pos = pos_end; 666 Ok(left) 667 } 668 next_state(&mut self, data: &[u8]) -> Result<usize, String>669 fn next_state(&mut self, data: &[u8]) -> Result<usize, String> { 670 macro_rules! ok_bytes (($n:expr, $state:expr) => ({ 671 self.state = Some($state); 672 Ok($n) 673 })); 674 let debug_byte = |_i, _b| debug!("[{:04x}] {:02x}", _i, _b); 675 macro_rules! push_or (($b:expr, $ret:expr) => (if self.pos < self.buffer.capacity() as u16 { 676 let b = $b; 677 debug_byte(self.pos, b); 678 if (self.pos as usize) < self.buffer.len() { 679 self.buffer[self.pos as usize] = b; 680 } else { 681 assert_eq!(self.pos as usize, self.buffer.len()); 682 self.buffer.push(b); 683 } 684 self.pos += 1; 685 } else { 686 return $ret; 687 })); 688 macro_rules! run_len_dist (($len:expr, $dist:expr => ($bytes:expr, $next:expr, $state:expr)) => ({ 689 let dist = $dist; 690 let left = try!(self.run_len_dist($len, dist)); 691 if let Some(len) = left { 692 return ok_bytes!($bytes, LenDist(($next, $state), len, dist)); 693 } 694 })); 695 match self.state.take().unwrap() { 696 ZlibMethodAndFlags => { 697 let b = match data.get(0) { 698 Some(&x) => x, 699 None => { 700 self.state = Some(ZlibMethodAndFlags); 701 return Ok(0); 702 } 703 }; 704 let (method, info) = (b & 0xF, b >> 4); 705 debug!("ZLIB CM=0x{:x} CINFO=0x{:x}", method, info); 706 707 // CM = 8 (DEFLATE) is the only method defined by the ZLIB specification. 708 match method { 709 8 => {/* DEFLATE */} 710 _ => return Err(format!("unknown ZLIB method CM=0x{:x}", method)) 711 } 712 713 if info > 7 { 714 return Err(format!("invalid ZLIB info CINFO=0x{:x}", info)); 715 } 716 717 self.buffer = Vec::with_capacity(1 << (8 + info)); 718 719 ok_bytes!(1, ZlibFlags(b)) 720 } 721 ZlibFlags(cmf) => { 722 let b = match data.get(0) { 723 Some(&x) => x, 724 None => { 725 self.state = Some(ZlibFlags(cmf)); 726 return Ok(0); 727 } 728 }; 729 let (_check, dict, _level) = (b & 0x1F, (b & 0x20) != 0, b >> 6); 730 debug!("ZLIB FCHECK=0x{:x} FDICT={} FLEVEL=0x{:x}", _check, dict, _level); 731 732 if (((cmf as u16) << 8) | b as u16) % 31 != 0 { 733 return Err(format!("invalid ZLIB checksum CMF=0x{:x} FLG=0x{:x}", cmf, b)); 734 } 735 736 if dict { 737 return Err("unimplemented ZLIB FDICT=1".into()); 738 } 739 740 ok_bytes!(1, Bits(BlockHeader, BitState { n: 0, v: 0 })) 741 } 742 Bits(next, state) => { 743 let mut stream = BitStream::new(data, state); 744 macro_rules! ok_state (($state:expr) => ({self.state = Some($state); Ok(stream.used)})); 745 macro_rules! ok (($next:expr) => (ok_state!(Bits($next, stream.fill())))); 746 macro_rules! take ( 747 ($n:expr => $next:expr) => (match stream.take($n) { 748 Some(v) => v, 749 None => return ok!($next) 750 }); 751 ($n:expr) => (take!($n => next)) 752 ); 753 macro_rules! take16 ( 754 ($n:expr => $next:expr) => (match stream.take16($n) { 755 Some(v) => v, 756 None => return ok!($next) 757 }); 758 ($n:expr) => (take16!($n => next)) 759 ); 760 macro_rules! len_dist ( 761 ($len:expr, $code:expr, $bits:expr => $next_early:expr, $next:expr) => ({ 762 let dist = 1 + if $bits == 0 { 0 } else { // new_base 763 2 << $bits 764 } + (($code as u16 - if $bits == 0 { 0 } else { // old_base 765 $bits * 2 + 2 766 }) << $bits) + take16!($bits => $next_early) as u16; 767 run_len_dist!($len, dist => (stream.used, $next, stream.state)); 768 }); 769 ($len:expr, $code:expr, $bits:expr) => ( 770 len_dist!($len, $code, $bits => next, next) 771 ) 772 ); 773 match next { 774 BlockHeader => { 775 if self.final_block { 776 let (len, bytes) = stream.trailing_bytes(); 777 return ok_state!(CheckCRC(len, bytes)); 778 } 779 let h = take!(3); 780 let (final_, block_type) = ((h & 1) != 0, (h >> 1) & 0b11); 781 782 self.final_block = final_; 783 784 match block_type { 785 0 => { 786 // Skip to the next byte for an uncompressed block. 787 stream.align_byte(); 788 ok!(BlockUncompressedLen) 789 } 790 1 => { 791 // Unwrap is safe because the data is valid. 792 let lit = DynHuffman16::new(&[ 793 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0-15 794 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 16-31 795 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 32-47 796 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 48-63 797 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 64-79 798 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 80-95 799 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 96-101 800 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 112-127 801 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128-143 802 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 144-159 803 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 160-175 804 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 176-191 805 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 192-207 806 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 208-223 807 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 224-239 808 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 240-255 809 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 256-271 810 7, 7, 7, 7, 7, 7, 7, 7, // 272-279 811 8, 8, 8, 8, 8, 8, 8, 8, // 280-287 812 ]).unwrap(); 813 let dist = DynHuffman16::new(&[ 814 5, 5, 5, 5, 5, 5, 5, 5, 815 5, 5, 5, 5, 5, 5, 5, 5, 816 5, 5, 5, 5, 5, 5, 5, 5, 817 5, 5, 5, 5, 5, 5, 5, 5 818 ]).unwrap(); 819 ok!(BlockDyn(lit, dist, 0)) 820 } 821 2 => ok!(BlockDynHlit), 822 _ => { 823 Err(format!("unimplemented DEFLATE block type 0b{:?}", 824 block_type)) 825 } 826 } 827 } 828 BlockUncompressedLen => { 829 let len = take16!(16); 830 ok_state!(Bits(BlockUncompressedNlen(len), stream.state)) 831 } 832 BlockUncompressedNlen(len) => { 833 let nlen = take16!(16); 834 assert_eq!(stream.state.n, 0); 835 if !len != nlen { 836 return Err(format!("invalid uncompressed block len: LEN: {:04x} NLEN: {:04x}", len, nlen)); 837 } 838 ok_state!(Uncompressed(len)) 839 } 840 BlockDynHlit => ok!(BlockDynHdist(take!(5) + 1)), 841 BlockDynHdist(hlit) => ok!(BlockDynHclen(hlit, take!(5) + 1)), 842 BlockDynHclen(hlit, hdist) => { 843 ok!(BlockDynClenCodeLengths(hlit, hdist, take!(4) + 4, 0, Box::new([0; 19]))) 844 } 845 BlockDynClenCodeLengths(hlit, hdist, hclen, i, mut clens) => { 846 let v = 847 match stream.take(3) { 848 Some(v) => v, 849 None => return ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i, clens)), 850 }; 851 clens[[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15][i as usize]] = v; 852 if i < hclen - 1 { 853 ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i + 1, clens)) 854 } else { 855 ok!(BlockDynCodeLengths(try!(CodeLengthReader::new(clens, hlit as u16 + 256, hdist)))) 856 } 857 } 858 BlockDynCodeLengths(mut reader) => { 859 let finished = try!(reader.read(&mut stream)); 860 if finished { 861 let (lit, dist) = try!(reader.to_lit_and_dist()); 862 ok!(BlockDyn(lit, dist, 0)) 863 } else { 864 ok!(BlockDynCodeLengths(reader)) 865 } 866 } 867 BlockDyn(huff_lit_len, huff_dist, mut prev_len) => { 868 macro_rules! next (($save_len:expr) => (BlockDyn(huff_lit_len, huff_dist, $save_len))); 869 loop { 870 let len = if prev_len != 0 { 871 let len = prev_len; 872 prev_len = 0; 873 len 874 } else { 875 let (save, code16) = match try!(huff_lit_len.read(&mut stream)) { 876 Some(data) => data, 877 None => return ok!(next!(0)), 878 }; 879 let code = code16 as u8; 880 debug!("{:09b}", code16); 881 match code16 { 882 0...255 => { 883 push_or!(code, ok!({stream = save; next!(0)})); 884 continue; 885 } 886 256...285 => {} 887 _ => return Err(format!("bad DEFLATE len code {}", code)), 888 } 889 890 macro_rules! len (($code:expr, $bits:expr) => ( 891 3 + if $bits == 0 { 0 } else { // new_base 892 4 << $bits 893 } + ((if $code == 29 { 894 256 895 } else { 896 $code as u16 897 } - if $bits == 0 { 0 } else { // old_base 898 $bits * 4 + 4 899 } - 1) << $bits) + take!($bits => {stream = save; next!(0)}) as u16 900 )); 901 match code { 902 0 => { 903 return if self.final_block { 904 let (len, bytes) = stream.trailing_bytes(); 905 ok_state!(CheckCRC(len, bytes)) 906 } else { 907 ok!(BlockHeader) 908 } 909 } 910 1...8 => len!(code, 0), 911 9...12 => len!(code, 1), 912 13...16 => len!(code, 2), 913 17...20 => len!(code, 3), 914 21...24 => len!(code, 4), 915 25...28 => len!(code, 5), 916 29 => len!(29, 0), 917 _ => return Err(format!("bad DEFLATE len code {}", code as u16 + 256)), 918 } 919 }; 920 921 let (save, dist_code) = match try!(huff_dist.read(&mut stream)) { 922 Some(data) => data, 923 None => return ok!(next!(len)), 924 }; 925 debug!(" {:05b}", dist_code); 926 macro_rules! len_dist_case (($bits:expr) => ( 927 len_dist!(len, dist_code, $bits => {stream = save; next!(len)}, next!(0)) 928 )); 929 match dist_code { 930 0...3 => len_dist_case!(0), 931 4...5 => len_dist_case!(1), 932 6...7 => len_dist_case!(2), 933 8...9 => len_dist_case!(3), 934 10...11 => len_dist_case!(4), 935 12...13 => len_dist_case!(5), 936 14...15 => len_dist_case!(6), 937 16...17 => len_dist_case!(7), 938 18...19 => len_dist_case!(8), 939 20...21 => len_dist_case!(9), 940 22...23 => len_dist_case!(10), 941 24...25 => len_dist_case!(11), 942 26...27 => len_dist_case!(12), 943 28...29 => len_dist_case!(13), 944 _ => return Err(format!("bad DEFLATE dist code {}", dist_code)), 945 } 946 } 947 } 948 } 949 } 950 LenDist((next, state), len, dist) => { 951 run_len_dist!(len, dist => (0, next, state)); 952 ok_bytes!(0, Bits(next, state)) 953 } 954 Uncompressed(mut len) => { 955 for (i, &b) in data.iter().enumerate() { 956 if len == 0 { 957 return ok_bytes!(i, Bits(BlockHeader, BitState { n: 0, v: 0 })); 958 } 959 push_or!(b, ok_bytes!(i, Uncompressed(len))); 960 len -= 1; 961 } 962 ok_bytes!(data.len(), Uncompressed(len)) 963 } 964 CheckCRC(mut len, mut bytes) => { 965 if self.checksum.is_none() { 966 // TODO: inform caller of unused bytes 967 return ok_bytes!(0, Finished); 968 } 969 970 // Get the checksum value from the end of the stream. 971 let mut used = 0; 972 while len < 4 && used < data.len() { 973 bytes[len as usize] = data[used]; 974 len += 1; 975 used += 1; 976 } 977 if len < 4 { 978 return ok_bytes!(used, CheckCRC(len, bytes)); 979 } 980 981 self.read_checksum = Some(adler32_from_bytes(&bytes)); 982 ok_bytes!(used, Finished) 983 } 984 Finished => { 985 // TODO: inform caller of unused bytes 986 ok_bytes!(data.len(), Finished) 987 } 988 } 989 } 990 991 /// Try to uncompress/decode the data in `data`. 992 /// 993 /// On success, returns how many bytes of the input data was decompressed, and a reference to 994 /// the buffer containing the decompressed data. 995 /// 996 /// This function may not uncompress all the provided data in one call, so it has to be called 997 /// repeatedly with the data that hasn't been decompressed yet as an input until the number of 998 /// bytes decoded returned is 0. (See the [top level crate documentation](index.html) 999 /// for an example.) 1000 /// 1001 /// # Errors 1002 /// If invalid input data is encountered, a string describing what went wrong is returned. update<'a>(&'a mut self, mut data: &[u8]) -> Result<(usize, &'a [u8]), String>1003 pub fn update<'a>(&'a mut self, mut data: &[u8]) -> Result<(usize, &'a [u8]), String> { 1004 let original_size = data.len(); 1005 let original_pos = self.pos as usize; 1006 let mut empty = false; 1007 while !empty && 1008 ((self.pos as usize) < self.buffer.capacity() || self.buffer.capacity() == 0) { 1009 // next_state must be called at least once after the data is empty. 1010 empty = data.is_empty(); 1011 match self.next_state(data) { 1012 Ok(n) => { 1013 data = &data[n..]; 1014 } 1015 Err(m) => return Err(m), 1016 } 1017 } 1018 let output = &self.buffer[original_pos..self.pos as usize]; 1019 if self.pos as usize >= self.buffer.capacity() { 1020 self.pos = 0; 1021 } 1022 1023 // Update the checksum.. 1024 self.checksum.update(output); 1025 // and validate if we are done decoding. 1026 if let Some(c) = self.read_checksum { 1027 try!(self.checksum.check(c)); 1028 } 1029 1030 Ok((original_size - data.len(), output)) 1031 } 1032 1033 /// Returns the calculated checksum value of the currently decoded data. 1034 /// 1035 /// Will return 0 for cases where the checksum is not validated. current_checksum(&self) -> u321036 pub fn current_checksum(&self) -> u32 { 1037 self.checksum.current_value() 1038 } 1039 } 1040