1 // Copyright 2014-2015 The Servo Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! A [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) decoder written in rust.
11 //!
12 //! This library provides functionality to decompress data compressed with the DEFLATE algorithm,
13 //! both with and without a [zlib](https://tools.ietf.org/html/rfc1950) header/trailer.
14 //!
15 //! # Examples
16 //! The easiest way to get `std::Vec<u8>` containing the decompressed bytes is to use either
17 //! `inflate::inflate_bytes` or `inflate::inflate_bytes_zlib` (depending on whether
18 //! the encoded data has zlib headers and trailers or not). The following example
19 //! decodes the DEFLATE encoded string "Hello, world" and prints it:
20 //!
21 //! ```rust
22 //! use inflate::inflate_bytes;
23 //! use std::str::from_utf8;
24 //!
25 //! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0];
26 //! let decoded = inflate_bytes(&encoded).unwrap();
27 //! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world"
28 //! ```
29 //!
30 //! If you need more flexibility, then the library also provides an implementation
31 //! of `std::io::Writer` in `inflate::writer`. Below is an example using an
32 //! `inflate::writer::InflateWriter` to decode the DEFLATE encoded string "Hello, world":
33 //!
34 //! ```rust
35 //! use inflate::InflateWriter;
36 //! use std::io::Write;
37 //! use std::str::from_utf8;
38 //!
39 //! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0];
40 //! let mut decoder = InflateWriter::new(Vec::new());
41 //! decoder.write(&encoded).unwrap();
42 //! let decoded = decoder.finish().unwrap();
43 //! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world"
44 //! ```
45 //!
46 //! Finally, if you need even more flexibility, or if you only want to depend on
47 //! `core`, you can use the `inflate::InflateStream` API. The below example
48 //! decodes an array of DEFLATE encoded bytes:
49 //!
50 //! ```rust
51 //! use inflate::InflateStream;
52 //!
53 //! let data = [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00];
54 //! let mut inflater = InflateStream::new();
55 //! let mut out = Vec::<u8>::new();
56 //! let mut n = 0;
57 //! while n < data.len() {
58 //!     let res = inflater.update(&data[n..]);
59 //!     if let Ok((num_bytes_read, result)) = res {
60 //!         n += num_bytes_read;
61 //!         out.extend(result.iter().cloned());
62 //!     } else {
63 //!         res.unwrap();
64 //!     }
65 //! }
66 //! ```
67 
68 extern crate adler32;
69 
70 use std::cmp;
71 use std::slice;
72 
73 mod checksum;
74 use checksum::{Checksum, adler32_from_bytes};
75 
76 mod writer;
77 pub use self::writer::{InflateWriter};
78 
79 mod utils;
80 pub use self::utils::{inflate_bytes, inflate_bytes_zlib, inflate_bytes_zlib_no_checksum};
81 
82 mod reader;
83 pub use self::reader::{DeflateDecoder, DeflateDecoderBuf};
84 
85 static BIT_REV_U8: [u8; 256] = [
86     0b0000_0000, 0b1000_0000, 0b0100_0000, 0b1100_0000,
87     0b0010_0000, 0b1010_0000, 0b0110_0000, 0b1110_0000,
88     0b0001_0000, 0b1001_0000, 0b0101_0000, 0b1101_0000,
89     0b0011_0000, 0b1011_0000, 0b0111_0000, 0b1111_0000,
90 
91     0b0000_1000, 0b1000_1000, 0b0100_1000, 0b1100_1000,
92     0b0010_1000, 0b1010_1000, 0b0110_1000, 0b1110_1000,
93     0b0001_1000, 0b1001_1000, 0b0101_1000, 0b1101_1000,
94     0b0011_1000, 0b1011_1000, 0b0111_1000, 0b1111_1000,
95 
96     0b0000_0100, 0b1000_0100, 0b0100_0100, 0b1100_0100,
97     0b0010_0100, 0b1010_0100, 0b0110_0100, 0b1110_0100,
98     0b0001_0100, 0b1001_0100, 0b0101_0100, 0b1101_0100,
99     0b0011_0100, 0b1011_0100, 0b0111_0100, 0b1111_0100,
100 
101     0b0000_1100, 0b1000_1100, 0b0100_1100, 0b1100_1100,
102     0b0010_1100, 0b1010_1100, 0b0110_1100, 0b1110_1100,
103     0b0001_1100, 0b1001_1100, 0b0101_1100, 0b1101_1100,
104     0b0011_1100, 0b1011_1100, 0b0111_1100, 0b1111_1100,
105 
106 
107     0b0000_0010, 0b1000_0010, 0b0100_0010, 0b1100_0010,
108     0b0010_0010, 0b1010_0010, 0b0110_0010, 0b1110_0010,
109     0b0001_0010, 0b1001_0010, 0b0101_0010, 0b1101_0010,
110     0b0011_0010, 0b1011_0010, 0b0111_0010, 0b1111_0010,
111 
112     0b0000_1010, 0b1000_1010, 0b0100_1010, 0b1100_1010,
113     0b0010_1010, 0b1010_1010, 0b0110_1010, 0b1110_1010,
114     0b0001_1010, 0b1001_1010, 0b0101_1010, 0b1101_1010,
115     0b0011_1010, 0b1011_1010, 0b0111_1010, 0b1111_1010,
116 
117     0b0000_0110, 0b1000_0110, 0b0100_0110, 0b1100_0110,
118     0b0010_0110, 0b1010_0110, 0b0110_0110, 0b1110_0110,
119     0b0001_0110, 0b1001_0110, 0b0101_0110, 0b1101_0110,
120     0b0011_0110, 0b1011_0110, 0b0111_0110, 0b1111_0110,
121 
122     0b0000_1110, 0b1000_1110, 0b0100_1110, 0b1100_1110,
123     0b0010_1110, 0b1010_1110, 0b0110_1110, 0b1110_1110,
124     0b0001_1110, 0b1001_1110, 0b0101_1110, 0b1101_1110,
125     0b0011_1110, 0b1011_1110, 0b0111_1110, 0b1111_1110,
126 
127 
128     0b0000_0001, 0b1000_0001, 0b0100_0001, 0b1100_0001,
129     0b0010_0001, 0b1010_0001, 0b0110_0001, 0b1110_0001,
130     0b0001_0001, 0b1001_0001, 0b0101_0001, 0b1101_0001,
131     0b0011_0001, 0b1011_0001, 0b0111_0001, 0b1111_0001,
132 
133     0b0000_1001, 0b1000_1001, 0b0100_1001, 0b1100_1001,
134     0b0010_1001, 0b1010_1001, 0b0110_1001, 0b1110_1001,
135     0b0001_1001, 0b1001_1001, 0b0101_1001, 0b1101_1001,
136     0b0011_1001, 0b1011_1001, 0b0111_1001, 0b1111_1001,
137 
138     0b0000_0101, 0b1000_0101, 0b0100_0101, 0b1100_0101,
139     0b0010_0101, 0b1010_0101, 0b0110_0101, 0b1110_0101,
140     0b0001_0101, 0b1001_0101, 0b0101_0101, 0b1101_0101,
141     0b0011_0101, 0b1011_0101, 0b0111_0101, 0b1111_0101,
142 
143     0b0000_1101, 0b1000_1101, 0b0100_1101, 0b1100_1101,
144     0b0010_1101, 0b1010_1101, 0b0110_1101, 0b1110_1101,
145     0b0001_1101, 0b1001_1101, 0b0101_1101, 0b1101_1101,
146     0b0011_1101, 0b1011_1101, 0b0111_1101, 0b1111_1101,
147 
148 
149     0b0000_0011, 0b1000_0011, 0b0100_0011, 0b1100_0011,
150     0b0010_0011, 0b1010_0011, 0b0110_0011, 0b1110_0011,
151     0b0001_0011, 0b1001_0011, 0b0101_0011, 0b1101_0011,
152     0b0011_0011, 0b1011_0011, 0b0111_0011, 0b1111_0011,
153 
154     0b0000_1011, 0b1000_1011, 0b0100_1011, 0b1100_1011,
155     0b0010_1011, 0b1010_1011, 0b0110_1011, 0b1110_1011,
156     0b0001_1011, 0b1001_1011, 0b0101_1011, 0b1101_1011,
157     0b0011_1011, 0b1011_1011, 0b0111_1011, 0b1111_1011,
158 
159     0b0000_0111, 0b1000_0111, 0b0100_0111, 0b1100_0111,
160     0b0010_0111, 0b1010_0111, 0b0110_0111, 0b1110_0111,
161     0b0001_0111, 0b1001_0111, 0b0101_0111, 0b1101_0111,
162     0b0011_0111, 0b1011_0111, 0b0111_0111, 0b1111_0111,
163 
164     0b0000_1111, 0b1000_1111, 0b0100_1111, 0b1100_1111,
165     0b0010_1111, 0b1010_1111, 0b0110_1111, 0b1110_1111,
166     0b0001_1111, 0b1001_1111, 0b0101_1111, 0b1101_1111,
167     0b0011_1111, 0b1011_1111, 0b0111_1111, 0b1111_1111
168 ];
169 
170 #[derive(Clone, Copy)]
171 struct BitState {
172     n: u8,
173     v: u32,
174 }
175 
176 #[derive(Clone)]
177 struct BitStream<'a> {
178     bytes: slice::Iter<'a, u8>,
179     used: usize,
180     state: BitState,
181 }
182 
183 #[cfg(debug)]
184 macro_rules! debug { ($($x:tt)*) => (println!($($x)*)) }
185 #[cfg(not(debug))]
186 macro_rules! debug { ($($x:tt)*) => (()) }
187 
188 impl<'a> BitStream<'a> {
new(bytes: &'a [u8], state: BitState) -> BitStream<'a>189     fn new(bytes: &'a [u8], state: BitState) -> BitStream<'a> {
190         BitStream {
191             bytes: bytes.iter(),
192             used: 0,
193             state: state,
194         }
195     }
196 
use_byte(&mut self) -> bool197     fn use_byte(&mut self) -> bool {
198         match self.bytes.next() {
199             Some(&b) => {
200                 self.state.v |= (b as u32) << self.state.n;
201                 self.state.n += 8;
202                 self.used += 1;
203                 true
204             }
205             None => false,
206         }
207     }
208 
need(&mut self, n: u8) -> bool209     fn need(&mut self, n: u8) -> bool {
210         if self.state.n < n {
211             if !self.use_byte() {
212                 return false;
213             }
214             if n > 8 && self.state.n < n {
215                 assert!(n <= 16);
216                 if !self.use_byte() {
217                     return false;
218                 }
219             }
220         }
221         true
222     }
223 
take16(&mut self, n: u8) -> Option<u16>224     fn take16(&mut self, n: u8) -> Option<u16> {
225         if self.need(n) {
226             self.state.n -= n;
227             let v = self.state.v & ((1 << n) - 1);
228             self.state.v >>= n;
229             Some(v as u16)
230         } else {
231             None
232         }
233     }
234 
take(&mut self, n: u8) -> Option<u8>235     fn take(&mut self, n: u8) -> Option<u8> {
236         assert!(n <= 8);
237         self.take16(n).map(|v: u16| v as u8)
238     }
239 
fill(&mut self) -> BitState240     fn fill(&mut self) -> BitState {
241         while self.state.n + 8 <= 32 && self.use_byte() {}
242         self.state
243     }
244 
align_byte(&mut self)245     fn align_byte(&mut self) {
246         if self.state.n > 0 {
247             let n = self.state.n % 8;
248             self.take(n);
249         }
250     }
251 
trailing_bytes(&mut self) -> (u8, [u8; 4])252     fn trailing_bytes(&mut self) -> (u8, [u8; 4]) {
253         let mut len = 0;
254         let mut bytes = [0; 4];
255         self.align_byte();
256         while self.state.n >= 8 {
257             bytes[len as usize] = self.state.v as u8;
258             len += 1;
259             self.state.n -= 8;
260             self.state.v >>= 8;
261         }
262         (len, bytes)
263     }
264 }
265 
266 /// Generate huffman codes from the given set of lengths and run `$cb` on them except the first
267 /// code for each length.
268 ///
269 /// See also the [deflate specification](http://www.gzip.org/zlib/rfc-deflate.html#huffman)
270 /// for an explanation of the algorithm.
271 macro_rules! with_codes (($clens:expr, $max_bits:expr => $code_ty:ty, $cb:expr) => ({
272     // Count the number of codes for each bit length.
273     let mut bl_count = [0 as $code_ty; ($max_bits+1)];
274     for &bits in $clens.iter() {
275         if bits != 0 {
276             // This should be safe from overflow as the number of lengths read from the input
277             // is bounded by the number of bits the number of lengths is represented by in the
278             // deflate compressed data.
279             bl_count[bits as usize] += 1;
280         }
281     }
282 
283     // Compute the first code value for each bit length.
284     let mut next_code = [0 as $code_ty; ($max_bits+1)];
285     let mut code = 0 as $code_ty;
286     // TODO use range_inclusive as soon as it is stable
287     //for bits in range_inclusive(1, $max_bits) {
288     for bits in 1..$max_bits + 1 {
289         code = try!(
290             code.checked_add(bl_count[bits as usize - 1])
291                 .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths")
292         ) << 1;
293         next_code[bits as usize] = code;
294     }
295 
296     // Compute the rest of the codes
297     for (i, &bits) in $clens.iter().enumerate() {
298         if bits != 0 {
299             let code = next_code[bits as usize];
300             // If there is an overflow here, the given set of code lengths won't allow enough
301             // unique codes to be generated.
302             let new_code = try!(
303                 code.checked_add(1)
304                     .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths!")
305             );
306             next_code[bits as usize] = new_code;
307             match $cb(i as $code_ty, code, bits) {
308                 Ok(()) => (),
309                 Err(err) => return Err(err)
310             }
311         }
312     }
313 }));
314 
315 struct CodeLengthReader {
316     patterns: Box<[u8; 128]>,
317     clens: Box<[u8; 19]>,
318     result: Vec<u8>,
319     num_lit: u16,
320     num_dist: u8,
321 }
322 
323 impl CodeLengthReader {
new(clens: Box<[u8; 19]>, num_lit: u16, num_dist: u8) -> Result<CodeLengthReader, String>324     fn new(clens: Box<[u8; 19]>, num_lit: u16, num_dist: u8) -> Result<CodeLengthReader, String> {
325         // Fill in the 7-bit patterns that match each code.
326         let mut patterns = Box::new([0xffu8; 128]);
327         with_codes!(clens, 7 => u8, |i: u8, code: u8, bits| -> _ {
328             /*let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) {
329                 Some(&base) => base,
330                 None => return Err("invalid length code".to_owned())
331             }*/
332             let base = BIT_REV_U8[(code << (8 - bits)) as usize];
333             for rest in 0u8 .. 1u8 << (7 - bits) {
334                 patterns[(base | (rest << bits)) as usize] = i;
335             }
336             Ok(())
337         });
338 
339         Ok(CodeLengthReader {
340             patterns: patterns,
341             clens: clens,
342             result: Vec::with_capacity(num_lit as usize + num_dist as usize),
343             num_lit: num_lit,
344             num_dist: num_dist,
345         })
346     }
347 
read(&mut self, stream: &mut BitStream) -> Result<bool, String>348     fn read(&mut self, stream: &mut BitStream) -> Result<bool, String> {
349         let total_len = self.num_lit as usize + self.num_dist as usize;
350         while self.result.len() < total_len {
351             if !stream.need(7) {
352                 return Ok(false);
353             }
354             let save = stream.clone();
355             macro_rules! take (($n:expr) => (match stream.take($n) {
356                 Some(v) => v,
357                 None => {
358                     *stream = save;
359                     return Ok(false);
360                 }
361             }));
362             let code = self.patterns[(stream.state.v & 0x7f) as usize];
363             stream.take(match self.clens.get(code as usize) {
364                 Some(&len) => len,
365                 None => return Err("invalid length code".to_owned()),
366             });
367             match code {
368                 0...15 => self.result.push(code),
369                 16 => {
370                     let last = match self.result.last() {
371                         Some(&v) => v,
372                         // 16 appeared before anything else
373                         None => return Err("invalid length code".to_owned()),
374                     };
375                     for _ in 0..3 + take!(2) {
376                         self.result.push(last);
377                     }
378                 }
379                 17 => {
380                     for _ in 0..3 + take!(3) {
381                         self.result.push(0);
382                     }
383                 }
384                 18 => {
385                     for _ in 0..11 + take!(7) {
386                         self.result.push(0);
387                     }
388                 }
389                 _ => unreachable!(),
390             }
391         }
392         Ok(true)
393     }
394 
to_lit_and_dist(&self) -> Result<(DynHuffman16, DynHuffman16), String>395     fn to_lit_and_dist(&self) -> Result<(DynHuffman16, DynHuffman16), String> {
396         let num_lit = self.num_lit as usize;
397         let lit = try!(DynHuffman16::new(&self.result[..num_lit]));
398         let dist = try!(DynHuffman16::new(&self.result[num_lit..]));
399         Ok((lit, dist))
400     }
401 }
402 
403 struct Trie8bit<T> {
404     data: [T; 16],
405     children: [Option<Box<[T; 16]>>; 16],
406 }
407 
408 struct DynHuffman16 {
409     patterns: Box<[u16; 256]>,
410     rest: Vec<Trie8bit<u16>>,
411 }
412 
413 impl DynHuffman16 {
new(clens: &[u8]) -> Result<DynHuffman16, String>414     fn new(clens: &[u8]) -> Result<DynHuffman16, String> {
415         // Fill in the 8-bit patterns that match each code.
416         // Longer patterns go into the trie.
417         let mut patterns = Box::new([0xffffu16; 256]);
418         let mut rest = Vec::new();
419         with_codes!(clens, 15 => u16, |i: u16, code: u16, bits: u8| -> _ {
420             let entry = i | ((bits as u16) << 12);
421             if bits <= 8 {
422                 let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) {
423                     Some(&v) => v,
424                     None => return Err("invalid length code".to_owned())
425                 };
426                 for rest in 0u8 .. 1 << (8 - bits) {
427                     patterns[(base | (rest << (bits & 7))) as usize] = entry;
428                 }
429             } else {
430                 let low = match BIT_REV_U8.get((code >> (bits - 8)) as usize) {
431                     Some(&v) => v,
432                     None => return Err("invalid length code".to_owned())
433                 };
434                 let high = BIT_REV_U8[((code << (16 - bits)) & 0xff) as usize];
435                 let (min_bits, idx) = if patterns[low as usize] != 0xffff {
436                     let bits_prev = (patterns[low as usize] >> 12) as u8;
437                     (cmp::min(bits_prev, bits), patterns[low as usize] & 0x7ff)
438                 } else {
439                     rest.push(Trie8bit {
440                         data: [0xffff; 16],
441                         children: [
442                             None, None, None, None,
443                             None, None, None, None,
444                             None, None, None, None,
445                             None, None, None, None
446                         ]
447                     });
448                     (bits, (rest.len() - 1) as u16)
449                 };
450                 patterns[low as usize] = idx | 0x800 | ((min_bits as u16) << 12);
451                 let trie_entry = match rest.get_mut(idx as usize) {
452                     Some(v) => v,
453                     None => return Err("invalid huffman code".to_owned())
454                 };
455                 if bits <= 12 {
456                     for rest in 0u8 .. 1 << (12 - bits) {
457                         trie_entry.data[(high | (rest << (bits - 8))) as usize] = entry;
458                     }
459                 } else {
460                     let child = &mut trie_entry.children[(high & 0xf) as usize];
461                     if child.is_none() {
462                         *child = Some(Box::new([0xffff; 16]));
463                     }
464                     let child = &mut **child.as_mut().unwrap();
465                     let high_top = high >> 4;
466                     for rest in 0u8 .. 1 << (16 - bits) {
467                         child[(high_top | (rest << (bits - 12))) as usize] = entry;
468                     }
469                 }
470             }
471             Ok(())
472         });
473         debug!("=== DYN HUFFMAN ===");
474         for _i in 0..256 {
475             debug!("{:08b} {:04x}", _i, patterns[BIT_REV_U8[_i] as usize]);
476         }
477         debug!("===================");
478         Ok(DynHuffman16 {
479             patterns: patterns,
480             rest: rest,
481         })
482     }
483 
read<'a>(&self, stream: &mut BitStream<'a>) -> Result<Option<(BitStream<'a>, u16)>, String>484     fn read<'a>(&self, stream: &mut BitStream<'a>) -> Result<Option<(BitStream<'a>, u16)>, String> {
485         let has8 = stream.need(8);
486         let entry = self.patterns[(stream.state.v & 0xff) as usize];
487         let bits = (entry >> 12) as u8;
488 
489         Ok(if !has8 {
490             if bits <= stream.state.n {
491                 let save = stream.clone();
492                 stream.state.n -= bits;
493                 stream.state.v >>= bits;
494                 Some((save, entry & 0xfff))
495             } else {
496                 None
497             }
498         } else if bits <= 8 {
499             let save = stream.clone();
500             stream.state.n -= bits;
501             stream.state.v >>= bits;
502             Some((save, entry & 0xfff))
503         } else {
504             let has16 = stream.need(16);
505             let trie = match self.rest.get((entry & 0x7ff) as usize) {
506                 Some(trie) => trie,
507                 None => return Err("invalid entry in stream".to_owned()),
508             };
509             let idx = stream.state.v >> 8;
510             let trie_entry = match trie.children[(idx & 0xf) as usize] {
511                 Some(ref child) => child[((idx >> 4) & 0xf) as usize],
512                 None => trie.data[(idx & 0xf) as usize],
513             };
514             let trie_bits = (trie_entry >> 12) as u8;
515             if has16 || trie_bits <= stream.state.n {
516                 let save = stream.clone();
517                 stream.state.n -= trie_bits;
518                 stream.state.v >>= trie_bits;
519                 Some((save, trie_entry & 0xfff))
520             } else {
521                 None
522             }
523         })
524     }
525 }
526 
527 enum State {
528     ZlibMethodAndFlags, // CMF
529     ZlibFlags(/* CMF */ u8), // FLG,
530     Bits(BitsNext, BitState),
531     LenDist((BitsNext, BitState), /* len */ u16, /* dist */ u16),
532     Uncompressed(/* len */ u16),
533     CheckCRC(/* len */ u8, /* bytes */ [u8; 4]),
534     Finished
535 }
536 
537 use self::State::*;
538 
539 enum BitsNext {
540     BlockHeader,
541     BlockUncompressedLen,
542     BlockUncompressedNlen(/* len */ u16),
543     BlockDynHlit,
544     BlockDynHdist(/* hlit */ u8),
545     BlockDynHclen(/* hlit */ u8, /* hdist */ u8),
546     BlockDynClenCodeLengths(/* hlit */ u8, /* hdist */ u8, /* hclen */ u8, /* idx */ u8, /* clens */ Box<[u8; 19]>),
547     BlockDynCodeLengths(CodeLengthReader),
548     BlockDyn(/* lit/len */ DynHuffman16, /* dist */ DynHuffman16, /* prev_len */ u16)
549 }
550 
551 use self::BitsNext::*;
552 
553 pub struct InflateStream {
554     buffer: Vec<u8>,
555     pos: u16,
556     state: Option<State>,
557     final_block: bool,
558     checksum: Checksum,
559     read_checksum: Option<u32>,
560 }
561 
562 impl InflateStream {
563     #[allow(dead_code)]
564     /// Create a new stream for decoding raw deflate encoded data.
new() -> InflateStream565     pub fn new() -> InflateStream {
566         let state = Bits(BlockHeader, BitState { n: 0, v: 0 });
567         let buffer = Vec::with_capacity(32 * 1024);
568         InflateStream::with_state_and_buffer(state, buffer, Checksum::none())
569     }
570 
571     /// Create a new stream for decoding deflate encoded data with a zlib header and footer
from_zlib() -> InflateStream572     pub fn from_zlib() -> InflateStream {
573         InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::zlib())
574     }
575 
576     /// Create a new stream for decoding deflate encoded data with a zlib header and footer
577     ///
578     /// This version creates a decoder that does not checksum the data to validate it with the
579     /// checksum provided with the zlib wrapper.
from_zlib_no_checksum() -> InflateStream580     pub fn from_zlib_no_checksum() -> InflateStream {
581         InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::none())
582     }
583 
reset(&mut self)584     pub fn reset(&mut self) {
585         self.buffer.clear();
586         self.pos = 0;
587         self.state = Some(Bits(BlockHeader, BitState { n: 0, v: 0 }));
588         self.final_block = false;
589     }
590 
reset_to_zlib(&mut self)591     pub fn reset_to_zlib(&mut self) {
592         self.reset();
593         self.state = Some(ZlibMethodAndFlags);
594     }
595 
with_state_and_buffer(state: State, buffer: Vec<u8>, checksum: Checksum) -> InflateStream596     fn with_state_and_buffer(state: State, buffer: Vec<u8>, checksum: Checksum)
597                              -> InflateStream {
598         InflateStream {
599             buffer: buffer,
600             pos: 0,
601             state: Some(state),
602             final_block: false,
603             checksum: checksum,
604             read_checksum: None,
605         }
606     }
607 
run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String>608     fn run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String> {
609         debug!("RLE -{}; {} (cap={} len={})", dist, len,
610                self.buffer.capacity(), self.buffer.len());
611         if dist < 1 {
612             return Err("invalid run length in stream".to_owned());
613         }
614         // `buffer_size` is used for validating `unsafe` below, handle with care
615         let buffer_size = self.buffer.capacity() as u16;
616         let len = if self.pos < dist {
617             // Handle copying from ahead, until we hit the end reading.
618             let pos_end = self.pos + len;
619             let (pos_end, left) = if pos_end < dist {
620                 (pos_end, 0)
621             } else {
622                 (dist, pos_end - dist)
623             };
624             if dist > buffer_size {
625                 return Err("run length distance is bigger than the window size".to_owned());
626             }
627             let forward = buffer_size - dist;
628             if pos_end + forward > self.buffer.len() as u16 {
629                 return Err("invalid run length in stream".to_owned());
630             }
631 
632             for i in self.pos as usize..pos_end as usize {
633                 self.buffer[i] = self.buffer[i + forward as usize];
634             }
635             self.pos = pos_end;
636             left
637         } else {
638             len
639         };
640         // Handle copying from before, until we hit the end writing.
641         let pos_end = self.pos + len;
642         let (pos_end, left) = if pos_end <= buffer_size {
643             (pos_end, None)
644         } else {
645             (buffer_size, Some(pos_end - buffer_size))
646         };
647 
648         if self.pos < dist && pos_end > self.pos {
649             return Err("invalid run length in stream".to_owned());
650         }
651 
652         if self.buffer.len() < pos_end as usize {
653             // ensure the buffer length will not exceed the amount of allocated memory
654             assert!(pos_end <= buffer_size);
655             // ensure that the uninitialized chunk of memory will be fully overwritten
656             assert!(self.pos as usize <= self.buffer.len());
657             unsafe {
658                 self.buffer.set_len(pos_end as usize);
659             }
660         }
661         assert!(dist > 0); // validation against reading uninitialized memory
662         for i in self.pos as usize..pos_end as usize {
663             self.buffer[i] = self.buffer[i - dist as usize];
664         }
665         self.pos = pos_end;
666         Ok(left)
667     }
668 
next_state(&mut self, data: &[u8]) -> Result<usize, String>669     fn next_state(&mut self, data: &[u8]) -> Result<usize, String> {
670         macro_rules! ok_bytes (($n:expr, $state:expr) => ({
671             self.state = Some($state);
672             Ok($n)
673         }));
674         let debug_byte = |_i, _b| debug!("[{:04x}] {:02x}", _i, _b);
675         macro_rules! push_or (($b:expr, $ret:expr) => (if self.pos < self.buffer.capacity() as u16 {
676             let b = $b;
677             debug_byte(self.pos, b);
678             if (self.pos as usize) < self.buffer.len() {
679                 self.buffer[self.pos as usize] = b;
680             } else {
681                 assert_eq!(self.pos as usize, self.buffer.len());
682                 self.buffer.push(b);
683             }
684             self.pos += 1;
685         } else {
686             return $ret;
687         }));
688         macro_rules! run_len_dist (($len:expr, $dist:expr => ($bytes:expr, $next:expr, $state:expr)) => ({
689             let dist = $dist;
690             let left = try!(self.run_len_dist($len, dist));
691             if let Some(len) = left {
692                 return ok_bytes!($bytes, LenDist(($next, $state), len, dist));
693             }
694         }));
695         match self.state.take().unwrap() {
696             ZlibMethodAndFlags => {
697                 let b = match data.get(0) {
698                     Some(&x) => x,
699                     None => {
700                         self.state = Some(ZlibMethodAndFlags);
701                         return Ok(0);
702                     }
703                 };
704                 let (method, info) = (b & 0xF, b >> 4);
705                 debug!("ZLIB CM=0x{:x} CINFO=0x{:x}", method, info);
706 
707                 // CM = 8 (DEFLATE) is the only method defined by the ZLIB specification.
708                 match method {
709                     8 => {/* DEFLATE */}
710                     _ => return Err(format!("unknown ZLIB method CM=0x{:x}", method))
711                 }
712 
713                 if info > 7 {
714                     return Err(format!("invalid ZLIB info CINFO=0x{:x}", info));
715                 }
716 
717                 self.buffer = Vec::with_capacity(1 << (8 + info));
718 
719                 ok_bytes!(1, ZlibFlags(b))
720             }
721             ZlibFlags(cmf) => {
722                 let b = match data.get(0) {
723                     Some(&x) => x,
724                     None => {
725                         self.state = Some(ZlibFlags(cmf));
726                         return Ok(0);
727                     }
728                 };
729                 let (_check, dict, _level) = (b & 0x1F, (b & 0x20) != 0, b >> 6);
730                 debug!("ZLIB FCHECK=0x{:x} FDICT={} FLEVEL=0x{:x}", _check, dict, _level);
731 
732                 if (((cmf as u16) << 8) | b as u16) % 31 != 0 {
733                     return Err(format!("invalid ZLIB checksum CMF=0x{:x} FLG=0x{:x}", cmf, b));
734                 }
735 
736                 if dict {
737                     return Err("unimplemented ZLIB FDICT=1".into());
738                 }
739 
740                 ok_bytes!(1, Bits(BlockHeader, BitState { n: 0, v: 0 }))
741             }
742             Bits(next, state) => {
743                 let mut stream = BitStream::new(data, state);
744                 macro_rules! ok_state (($state:expr) => ({self.state = Some($state); Ok(stream.used)}));
745                 macro_rules! ok (($next:expr) => (ok_state!(Bits($next, stream.fill()))));
746                 macro_rules! take (
747                     ($n:expr => $next:expr) => (match stream.take($n) {
748                         Some(v) => v,
749                         None => return ok!($next)
750                     });
751                     ($n:expr) => (take!($n => next))
752                 );
753                 macro_rules! take16 (
754                     ($n:expr => $next:expr) => (match stream.take16($n) {
755                         Some(v) => v,
756                         None => return ok!($next)
757                     });
758                     ($n:expr) => (take16!($n => next))
759                 );
760                 macro_rules! len_dist (
761                     ($len:expr, $code:expr, $bits:expr => $next_early:expr, $next:expr) => ({
762                         let dist = 1 + if $bits == 0 { 0 } else { // new_base
763                             2 << $bits
764                         } + (($code as u16 - if $bits == 0 { 0 } else { // old_base
765                             $bits * 2 + 2
766                         }) << $bits) + take16!($bits => $next_early) as u16;
767                         run_len_dist!($len, dist => (stream.used, $next, stream.state));
768                     });
769                     ($len:expr, $code:expr, $bits:expr) => (
770                         len_dist!($len, $code, $bits => next, next)
771                     )
772                 );
773                 match next {
774                     BlockHeader => {
775                         if self.final_block {
776                             let (len, bytes) = stream.trailing_bytes();
777                             return ok_state!(CheckCRC(len, bytes));
778                         }
779                         let h = take!(3);
780                         let (final_, block_type) = ((h & 1) != 0, (h >> 1) & 0b11);
781 
782                         self.final_block = final_;
783 
784                         match block_type {
785                             0 => {
786                                 // Skip to the next byte for an uncompressed block.
787                                 stream.align_byte();
788                                 ok!(BlockUncompressedLen)
789                             }
790                             1 => {
791                                 // Unwrap is safe because the data is valid.
792                                 let lit = DynHuffman16::new(&[
793                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0-15
794                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 16-31
795                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 32-47
796                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 48-63
797                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 64-79
798                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 80-95
799                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 96-101
800                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 112-127
801                                     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128-143
802                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 144-159
803                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 160-175
804                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 176-191
805                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 192-207
806                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 208-223
807                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 224-239
808                                     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 240-255
809                                     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 256-271
810                                     7, 7, 7, 7, 7, 7, 7, 7, // 272-279
811                                     8, 8, 8, 8, 8, 8, 8, 8, // 280-287
812                                 ]).unwrap();
813                                 let dist = DynHuffman16::new(&[
814                                     5, 5, 5, 5, 5, 5, 5, 5,
815                                     5, 5, 5, 5, 5, 5, 5, 5,
816                                     5, 5, 5, 5, 5, 5, 5, 5,
817                                     5, 5, 5, 5, 5, 5, 5, 5
818                                 ]).unwrap();
819                                 ok!(BlockDyn(lit, dist, 0))
820                             }
821                             2 => ok!(BlockDynHlit),
822                             _ => {
823                                 Err(format!("unimplemented DEFLATE block type 0b{:?}",
824                                              block_type))
825                             }
826                         }
827                     }
828                     BlockUncompressedLen => {
829                         let len = take16!(16);
830                         ok_state!(Bits(BlockUncompressedNlen(len), stream.state))
831                     }
832                     BlockUncompressedNlen(len) => {
833                         let nlen = take16!(16);
834                         assert_eq!(stream.state.n, 0);
835                         if !len != nlen {
836                             return Err(format!("invalid uncompressed block len: LEN: {:04x} NLEN: {:04x}", len, nlen));
837                         }
838                         ok_state!(Uncompressed(len))
839                     }
840                     BlockDynHlit => ok!(BlockDynHdist(take!(5) + 1)),
841                     BlockDynHdist(hlit) => ok!(BlockDynHclen(hlit, take!(5) + 1)),
842                     BlockDynHclen(hlit, hdist) => {
843                         ok!(BlockDynClenCodeLengths(hlit, hdist, take!(4) + 4, 0, Box::new([0; 19])))
844                     }
845                     BlockDynClenCodeLengths(hlit, hdist, hclen, i, mut clens) => {
846                         let v =
847                             match stream.take(3) {
848                                 Some(v) => v,
849                                 None => return ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i, clens)),
850                             };
851                         clens[[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15][i as usize]] = v;
852                         if i < hclen - 1 {
853                             ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i + 1, clens))
854                         } else {
855                             ok!(BlockDynCodeLengths(try!(CodeLengthReader::new(clens, hlit as u16 + 256, hdist))))
856                         }
857                     }
858                     BlockDynCodeLengths(mut reader) => {
859                         let finished = try!(reader.read(&mut stream));
860                         if finished {
861                             let (lit, dist) = try!(reader.to_lit_and_dist());
862                             ok!(BlockDyn(lit, dist, 0))
863                         } else {
864                             ok!(BlockDynCodeLengths(reader))
865                         }
866                     }
867                     BlockDyn(huff_lit_len, huff_dist, mut prev_len) => {
868                         macro_rules! next (($save_len:expr) => (BlockDyn(huff_lit_len, huff_dist, $save_len)));
869                         loop {
870                             let len = if prev_len != 0 {
871                                 let len = prev_len;
872                                 prev_len = 0;
873                                 len
874                             } else {
875                                 let (save, code16) = match try!(huff_lit_len.read(&mut stream)) {
876                                     Some(data) => data,
877                                     None => return ok!(next!(0)),
878                                 };
879                                 let code = code16 as u8;
880                                 debug!("{:09b}", code16);
881                                 match code16 {
882                                     0...255 => {
883                                         push_or!(code, ok!({stream = save; next!(0)}));
884                                         continue;
885                                     }
886                                     256...285 => {}
887                                     _ => return Err(format!("bad DEFLATE len code {}", code)),
888                                 }
889 
890                                 macro_rules! len (($code:expr, $bits:expr) => (
891                                     3 + if $bits == 0 { 0 } else { // new_base
892                                         4 << $bits
893                                     } + ((if $code == 29 {
894                                         256
895                                     } else {
896                                         $code as u16
897                                     } - if $bits == 0 { 0 } else { // old_base
898                                         $bits * 4 + 4
899                                     } - 1) << $bits) + take!($bits => {stream = save; next!(0)}) as u16
900                                 ));
901                                 match code {
902                                     0 => {
903                                         return if self.final_block {
904                                             let (len, bytes) = stream.trailing_bytes();
905                                             ok_state!(CheckCRC(len, bytes))
906                                         } else {
907                                             ok!(BlockHeader)
908                                         }
909                                     }
910                                     1...8 => len!(code, 0),
911                                     9...12 => len!(code, 1),
912                                     13...16 => len!(code, 2),
913                                     17...20 => len!(code, 3),
914                                     21...24 => len!(code, 4),
915                                     25...28 => len!(code, 5),
916                                     29 => len!(29, 0),
917                                     _ => return Err(format!("bad DEFLATE len code {}", code as u16 + 256)),
918                                 }
919                             };
920 
921                             let (save, dist_code) = match try!(huff_dist.read(&mut stream)) {
922                                 Some(data) => data,
923                                 None => return ok!(next!(len)),
924                             };
925                             debug!("  {:05b}", dist_code);
926                             macro_rules! len_dist_case (($bits:expr) => (
927                                 len_dist!(len, dist_code, $bits => {stream = save; next!(len)}, next!(0))
928                             ));
929                             match dist_code {
930                                 0...3 => len_dist_case!(0),
931                                 4...5 => len_dist_case!(1),
932                                 6...7 => len_dist_case!(2),
933                                 8...9 => len_dist_case!(3),
934                                 10...11 => len_dist_case!(4),
935                                 12...13 => len_dist_case!(5),
936                                 14...15 => len_dist_case!(6),
937                                 16...17 => len_dist_case!(7),
938                                 18...19 => len_dist_case!(8),
939                                 20...21 => len_dist_case!(9),
940                                 22...23 => len_dist_case!(10),
941                                 24...25 => len_dist_case!(11),
942                                 26...27 => len_dist_case!(12),
943                                 28...29 => len_dist_case!(13),
944                                 _ => return Err(format!("bad DEFLATE dist code {}", dist_code)),
945                             }
946                         }
947                     }
948                 }
949             }
950             LenDist((next, state), len, dist) => {
951                 run_len_dist!(len, dist => (0, next, state));
952                 ok_bytes!(0, Bits(next, state))
953             }
954             Uncompressed(mut len) => {
955                 for (i, &b) in data.iter().enumerate() {
956                     if len == 0 {
957                         return ok_bytes!(i, Bits(BlockHeader, BitState { n: 0, v: 0 }));
958                     }
959                     push_or!(b, ok_bytes!(i, Uncompressed(len)));
960                     len -= 1;
961                 }
962                 ok_bytes!(data.len(), Uncompressed(len))
963             }
964             CheckCRC(mut len, mut bytes) => {
965                 if self.checksum.is_none() {
966                     // TODO: inform caller of unused bytes
967                     return ok_bytes!(0, Finished);
968                 }
969 
970                 // Get the checksum value from the end of the stream.
971                 let mut used = 0;
972                 while len < 4 && used < data.len() {
973                     bytes[len as usize] = data[used];
974                     len += 1;
975                     used += 1;
976                 }
977                 if len < 4 {
978                     return ok_bytes!(used, CheckCRC(len, bytes));
979                 }
980 
981                 self.read_checksum = Some(adler32_from_bytes(&bytes));
982                 ok_bytes!(used, Finished)
983             }
984             Finished => {
985                 // TODO: inform caller of unused bytes
986                 ok_bytes!(data.len(), Finished)
987             }
988         }
989     }
990 
991     /// Try to uncompress/decode the data in `data`.
992     ///
993     /// On success, returns how many bytes of the input data was decompressed, and a reference to
994     /// the buffer containing the decompressed data.
995     ///
996     /// This function may not uncompress all the provided data in one call, so it has to be called
997     /// repeatedly with the data that hasn't been decompressed yet as an input until the number of
998     /// bytes decoded returned is 0. (See the [top level crate documentation](index.html)
999     /// for an example.)
1000     ///
1001     /// # Errors
1002     /// If invalid input data is encountered, a string describing what went wrong is returned.
update<'a>(&'a mut self, mut data: &[u8]) -> Result<(usize, &'a [u8]), String>1003     pub fn update<'a>(&'a mut self, mut data: &[u8]) -> Result<(usize, &'a [u8]), String> {
1004         let original_size = data.len();
1005         let original_pos = self.pos as usize;
1006         let mut empty = false;
1007         while !empty &&
1008               ((self.pos as usize) < self.buffer.capacity() || self.buffer.capacity() == 0) {
1009             // next_state must be called at least once after the data is empty.
1010             empty = data.is_empty();
1011             match self.next_state(data) {
1012                 Ok(n) => {
1013                     data = &data[n..];
1014                 }
1015                 Err(m) => return Err(m),
1016             }
1017         }
1018         let output = &self.buffer[original_pos..self.pos as usize];
1019         if self.pos as usize >= self.buffer.capacity() {
1020             self.pos = 0;
1021         }
1022 
1023         // Update the checksum..
1024         self.checksum.update(output);
1025         // and validate if we are done decoding.
1026         if let Some(c) = self.read_checksum {
1027             try!(self.checksum.check(c));
1028         }
1029 
1030         Ok((original_size - data.len(), output))
1031     }
1032 
1033     /// Returns the calculated checksum value of the currently decoded data.
1034     ///
1035     /// Will return 0 for cases where the checksum is not validated.
current_checksum(&self) -> u321036     pub fn current_checksum(&self) -> u32 {
1037         self.checksum.current_value()
1038     }
1039 }
1040