1 use std::cmp;
2 use std::io::prelude::*;
3 use std::io;
4 use std::mem;
5 
6 use super::{Builder, Header};
7 use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
8 use Compression;
9 use crc::CrcReader;
10 use deflate;
11 
copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize12 fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
13     let min = cmp::min(into.len(), from.len() - *pos);
14     for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
15         *slot = *val;
16     }
17     *pos += min;
18     return min;
19 }
corrupt() -> io::Error20 fn corrupt() -> io::Error {
21     io::Error::new(
22         io::ErrorKind::InvalidInput,
23         "corrupt gzip stream does not have a matching checksum",
24     )
25 }
26 
bad_header() -> io::Error27 fn bad_header() -> io::Error {
28     io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
29 }
30 
read_le_u16<R: Read>(r: &mut R) -> io::Result<u16>31 fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
32     let mut b = [0; 2];
33     try!(r.read_exact(&mut b));
34     Ok((b[0] as u16) | ((b[1] as u16) << 8))
35 }
36 
read_gz_header<R: Read>(r: &mut R) -> io::Result<Header>37 fn read_gz_header<R: Read>(r: &mut R) -> io::Result<Header> {
38     let mut crc_reader = CrcReader::new(r);
39     let mut header = [0; 10];
40     try!(crc_reader.read_exact(&mut header));
41 
42     let id1 = header[0];
43     let id2 = header[1];
44     if id1 != 0x1f || id2 != 0x8b {
45         return Err(bad_header());
46     }
47     let cm = header[2];
48     if cm != 8 {
49         return Err(bad_header());
50     }
51 
52     let flg = header[3];
53     let mtime = ((header[4] as u32) << 0) | ((header[5] as u32) << 8) | ((header[6] as u32) << 16) |
54         ((header[7] as u32) << 24);
55     let _xfl = header[8];
56     let _os = header[9];
57 
58     let extra = if flg & FEXTRA != 0 {
59         let xlen = try!(read_le_u16(&mut crc_reader));
60         let mut extra = vec![0; xlen as usize];
61         try!(crc_reader.read_exact(&mut extra));
62         Some(extra)
63     } else {
64         None
65     };
66     let filename = if flg & FNAME != 0 {
67         // wow this is slow
68         let mut b = Vec::new();
69         for byte in crc_reader.by_ref().bytes() {
70             let byte = try!(byte);
71             if byte == 0 {
72                 break;
73             }
74             b.push(byte);
75         }
76         Some(b)
77     } else {
78         None
79     };
80     let comment = if flg & FCOMMENT != 0 {
81         // wow this is slow
82         let mut b = Vec::new();
83         for byte in crc_reader.by_ref().bytes() {
84             let byte = try!(byte);
85             if byte == 0 {
86                 break;
87             }
88             b.push(byte);
89         }
90         Some(b)
91     } else {
92         None
93     };
94 
95     if flg & FHCRC != 0 {
96         let calced_crc = crc_reader.crc().sum() as u16;
97         let stored_crc = try!(read_le_u16(&mut crc_reader));
98         if calced_crc != stored_crc {
99             return Err(corrupt());
100         }
101     }
102 
103     Ok(Header {
104         extra: extra,
105         filename: filename,
106         comment: comment,
107         mtime: mtime,
108     })
109 }
110 
111 
112 /// A gzip streaming encoder
113 ///
114 /// This structure exposes a [`BufRead`] interface that will read uncompressed data
115 /// from the underlying reader and expose the compressed version as a [`BufRead`]
116 /// interface.
117 ///
118 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use std::io::prelude::*;
124 /// use std::io;
125 /// use flate2::Compression;
126 /// use flate2::bufread::GzEncoder;
127 /// use std::fs::File;
128 /// use std::io::BufReader;
129 ///
130 /// // Opens sample file, compresses the contents and returns a Vector or error
131 /// // File wrapped in a BufReader implements BufRead
132 ///
133 /// fn open_hello_world() -> io::Result<Vec<u8>> {
134 ///     let f = File::open("examples/hello_world.txt")?;
135 ///     let b = BufReader::new(f);
136 ///     let mut gz = GzEncoder::new(b, Compression::Fast);
137 ///     let mut buffer = Vec::new();
138 ///     gz.read_to_end(&mut buffer)?;
139 ///     Ok(buffer)
140 /// }
141 /// ```
142 #[derive(Debug)]
143 pub struct GzEncoder<R> {
144     inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
145     header: Vec<u8>,
146     pos: usize,
147     eof: bool,
148 }
149 
gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R>150 pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression)
151     -> GzEncoder<R>
152 {
153     let crc = CrcReader::new(r);
154     GzEncoder {
155         inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
156         header: header,
157         pos: 0,
158         eof: false,
159     }
160 }
161 
162 impl<R: BufRead> GzEncoder<R> {
163     /// Creates a new encoder which will use the given compression level.
164     ///
165     /// The encoder is not configured specially for the emitted header. For
166     /// header configuration, see the `Builder` type.
167     ///
168     /// The data read from the stream `r` will be compressed and available
169     /// through the returned reader.
new(r: R, level: Compression) -> GzEncoder<R>170     pub fn new(r: R, level: Compression) -> GzEncoder<R> {
171         Builder::new().buf_read(r, level)
172     }
173 
read_footer(&mut self, into: &mut [u8]) -> io::Result<usize>174     fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
175         if self.pos == 8 {
176             return Ok(0);
177         }
178         let crc = self.inner.get_ref().crc();
179         let ref arr = [
180             (crc.sum() >> 0) as u8,
181             (crc.sum() >> 8) as u8,
182             (crc.sum() >> 16) as u8,
183             (crc.sum() >> 24) as u8,
184             (crc.amount() >> 0) as u8,
185             (crc.amount() >> 8) as u8,
186             (crc.amount() >> 16) as u8,
187             (crc.amount() >> 24) as u8,
188         ];
189         Ok(copy(into, arr, &mut self.pos))
190     }
191 }
192 
193 impl<R> GzEncoder<R> {
194     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R195     pub fn get_ref(&self) -> &R {
196         self.inner.get_ref().get_ref()
197     }
198 
199     /// Acquires a mutable reference to the underlying reader.
200     ///
201     /// Note that mutation of the reader may result in surprising results if
202     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R203     pub fn get_mut(&mut self) -> &mut R {
204         self.inner.get_mut().get_mut()
205     }
206 
207     /// Returns the underlying stream, consuming this encoder
into_inner(self) -> R208     pub fn into_inner(self) -> R {
209         self.inner.into_inner().into_inner()
210     }
211 }
212 
213 impl<R: BufRead> Read for GzEncoder<R> {
read(&mut self, mut into: &mut [u8]) -> io::Result<usize>214     fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
215         let mut amt = 0;
216         if self.eof {
217             return self.read_footer(into);
218         } else if self.pos < self.header.len() {
219             amt += copy(into, &self.header, &mut self.pos);
220             if amt == into.len() {
221                 return Ok(amt);
222             }
223             let tmp = into;
224             into = &mut tmp[amt..];
225         }
226         match try!(self.inner.read(into)) {
227             0 => {
228                 self.eof = true;
229                 self.pos = 0;
230                 self.read_footer(into)
231             }
232             n => Ok(amt + n),
233         }
234     }
235 }
236 
237 impl<R: BufRead + Write> Write for GzEncoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>238     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
239         self.get_mut().write(buf)
240     }
241 
flush(&mut self) -> io::Result<()>242     fn flush(&mut self) -> io::Result<()> {
243         self.get_mut().flush()
244     }
245 }
246 
247 
248 /// A gzip streaming decoder
249 ///
250 /// This structure exposes a [`ReadBuf`] interface that will consume compressed
251 /// data from the underlying reader and emit uncompressed data.
252 ///
253 /// [`ReadBuf`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// use std::io::prelude::*;
259 /// use std::io;
260 /// # use flate2::Compression;
261 /// # use flate2::write::GzEncoder;
262 /// use flate2::bufread::GzDecoder;
263 ///
264 /// # fn main() {
265 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::Default);
266 /// #   e.write(b"Hello World").unwrap();
267 /// #   let bytes = e.finish().unwrap();
268 /// #   println!("{}", decode_reader(bytes).unwrap());
269 /// # }
270 /// #
271 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
272 /// // Here &[u8] implements BufRead
273 ///
274 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
275 ///    let mut gz = GzDecoder::new(&bytes[..])?;
276 ///    let mut s = String::new();
277 ///    gz.read_to_string(&mut s)?;
278 ///    Ok(s)
279 /// }
280 /// ```
281 #[derive(Debug)]
282 pub struct GzDecoder<R> {
283     inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
284     header: Header,
285     finished: bool,
286 }
287 
288 
289 impl<R: BufRead> GzDecoder<R> {
290     /// Creates a new decoder from the given reader, immediately parsing the
291     /// gzip header.
292     ///
293     /// # Errors
294     ///
295     /// If an error is encountered when parsing the gzip header, an error is
296     /// returned.
new(mut r: R) -> io::Result<GzDecoder<R>>297     pub fn new(mut r: R) -> io::Result<GzDecoder<R>> {
298         let header = try!(read_gz_header(&mut r));
299 
300         let flate = deflate::bufread::DeflateDecoder::new(r);
301         return Ok(GzDecoder {
302             inner: CrcReader::new(flate),
303             header: header,
304             finished: false,
305         });
306     }
307 
finish(&mut self) -> io::Result<()>308     fn finish(&mut self) -> io::Result<()> {
309         if self.finished {
310             return Ok(());
311         }
312         let ref mut buf = [0u8; 8];
313         {
314             let mut len = 0;
315 
316             while len < buf.len() {
317                 match try!(self.inner.get_mut().get_mut().read(&mut buf[len..])) {
318                     0 => return Err(corrupt()),
319                     n => len += n,
320                 }
321             }
322         }
323 
324         let crc = ((buf[0] as u32) << 0) | ((buf[1] as u32) << 8) | ((buf[2] as u32) << 16) |
325             ((buf[3] as u32) << 24);
326         let amt = ((buf[4] as u32) << 0) | ((buf[5] as u32) << 8) | ((buf[6] as u32) << 16) |
327             ((buf[7] as u32) << 24);
328         if crc != self.inner.crc().sum() as u32 {
329             return Err(corrupt());
330         }
331         if amt != self.inner.crc().amount() {
332             return Err(corrupt());
333         }
334         self.finished = true;
335         Ok(())
336     }
337 }
338 
339 impl<R> GzDecoder<R> {
340     /// Returns the header associated with this stream.
header(&self) -> &Header341     pub fn header(&self) -> &Header {
342         &self.header
343     }
344 
345     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R346     pub fn get_ref(&self) -> &R {
347         self.inner.get_ref().get_ref()
348     }
349 
350     /// Acquires a mutable reference to the underlying stream.
351     ///
352     /// Note that mutation of the stream may result in surprising results if
353     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R354     pub fn get_mut(&mut self) -> &mut R {
355         self.inner.get_mut().get_mut()
356     }
357 
358     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R359     pub fn into_inner(self) -> R {
360         self.inner.into_inner().into_inner()
361     }
362 }
363 
364 impl<R: BufRead> Read for GzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>365     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
366         match try!(self.inner.read(into)) {
367             0 => {
368                 try!(self.finish());
369                 Ok(0)
370             }
371             n => Ok(n),
372         }
373     }
374 }
375 
376 impl<R: BufRead + Write> Write for GzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>377     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
378         self.get_mut().write(buf)
379     }
380 
flush(&mut self) -> io::Result<()>381     fn flush(&mut self) -> io::Result<()> {
382         self.get_mut().flush()
383     }
384 }
385 
386 
387 
388 /// A gzip streaming decoder that decodes all members of a multistream
389 ///
390 /// A gzip member consists of a header, compressed data and a trailer. The [gzip
391 /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
392 /// gzip members to be joined in a single stream. `MultiGzDecoder` will
393 /// decode all consecutive members while `GzDecoder` will only decompress
394 /// the first gzip member. The multistream format is commonly used in
395 /// bioinformatics, for example when using the BGZF compressed data.
396 ///
397 /// This structure exposes a [`BufRead`] interface that will consume all gzip members
398 /// from the underlying reader and emit uncompressed data.
399 ///
400 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
401 ///
402 /// # Examples
403 ///
404 /// ```
405 /// use std::io::prelude::*;
406 /// use std::io;
407 /// # use flate2::Compression;
408 /// # use flate2::write::GzEncoder;
409 /// use flate2::bufread::MultiGzDecoder;
410 ///
411 /// # fn main() {
412 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::Default);
413 /// #   e.write(b"Hello World").unwrap();
414 /// #   let bytes = e.finish().unwrap();
415 /// #   println!("{}", decode_reader(bytes).unwrap());
416 /// # }
417 /// #
418 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
419 /// // Here &[u8] implements BufRead
420 ///
421 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
422 ///    let mut gz = MultiGzDecoder::new(&bytes[..])?;
423 ///    let mut s = String::new();
424 ///    gz.read_to_string(&mut s)?;
425 ///    Ok(s)
426 /// }
427 /// ```
428 #[derive(Debug)]
429 pub struct MultiGzDecoder<R> {
430     inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
431     header: Header,
432     finished: bool,
433 }
434 
435 
436 impl<R: BufRead> MultiGzDecoder<R> {
437     /// Creates a new decoder from the given reader, immediately parsing the
438     /// (first) gzip header. If the gzip stream contains multiple members all will
439     /// be decoded.
440     ///
441     /// # Errors
442     ///
443     /// If an error is encountered when parsing the gzip header, an error is
444     /// returned.
new(mut r: R) -> io::Result<MultiGzDecoder<R>>445     pub fn new(mut r: R) -> io::Result<MultiGzDecoder<R>> {
446         let header = try!(read_gz_header(&mut r));
447 
448         let flate = deflate::bufread::DeflateDecoder::new(r);
449         return Ok(MultiGzDecoder {
450             inner: CrcReader::new(flate),
451             header: header,
452             finished: false,
453         });
454     }
455 
finish_member(&mut self) -> io::Result<usize>456     fn finish_member(&mut self) -> io::Result<usize> {
457         if self.finished {
458             return Ok(0);
459         }
460         let ref mut buf = [0u8; 8];
461         {
462             let mut len = 0;
463 
464             while len < buf.len() {
465                 match try!(self.inner.get_mut().get_mut().read(&mut buf[len..])) {
466                     0 => return Err(corrupt()),
467                     n => len += n,
468                 }
469             }
470         }
471 
472         let crc = ((buf[0] as u32) << 0) | ((buf[1] as u32) << 8) | ((buf[2] as u32) << 16) |
473             ((buf[3] as u32) << 24);
474         let amt = ((buf[4] as u32) << 0) | ((buf[5] as u32) << 8) | ((buf[6] as u32) << 16) |
475             ((buf[7] as u32) << 24);
476         if crc != self.inner.crc().sum() as u32 {
477             return Err(corrupt());
478         }
479         if amt != self.inner.crc().amount() {
480             return Err(corrupt());
481         }
482         let remaining = match self.inner.get_mut().get_mut().fill_buf() {
483             Ok(b) => if b.is_empty() {
484                 self.finished = true;
485                 return Ok(0);
486             } else {
487                 b.len()
488             },
489             Err(e) => return Err(e),
490         };
491 
492         let next_header = try!(read_gz_header(self.inner.get_mut().get_mut()));
493         mem::replace(&mut self.header, next_header);
494         self.inner.reset();
495         self.inner.get_mut().reset_data();
496 
497         Ok(remaining)
498     }
499 }
500 
501 impl<R> MultiGzDecoder<R> {
502     /// Returns the current header associated with this stream.
header(&self) -> &Header503     pub fn header(&self) -> &Header {
504         &self.header
505     }
506 
507     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R508     pub fn get_ref(&self) -> &R {
509         self.inner.get_ref().get_ref()
510     }
511 
512     /// Acquires a mutable reference to the underlying stream.
513     ///
514     /// Note that mutation of the stream may result in surprising results if
515     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R516     pub fn get_mut(&mut self) -> &mut R {
517         self.inner.get_mut().get_mut()
518     }
519 
520     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R521     pub fn into_inner(self) -> R {
522         self.inner.into_inner().into_inner()
523     }
524 }
525 
526 impl<R: BufRead> Read for MultiGzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>527     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
528         match try!(self.inner.read(into)) {
529             0 => match self.finish_member() {
530                 Ok(0) => Ok(0),
531                 Ok(_) => self.read(into),
532                 Err(e) => Err(e),
533             },
534             n => Ok(n),
535         }
536     }
537 }
538 
539 impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>540     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
541         self.get_mut().write(buf)
542     }
543 
flush(&mut self) -> io::Result<()>544     fn flush(&mut self) -> io::Result<()> {
545         self.get_mut().flush()
546     }
547 }
548