1 use std::cmp;
2 use std::io;
3 use std::io::prelude::*;
4 use std::mem;
5 
6 #[cfg(feature = "tokio")]
7 use futures::Poll;
8 #[cfg(feature = "tokio")]
9 use tokio_io::{AsyncRead, AsyncWrite};
10 
11 use super::{GzBuilder, GzHeader};
12 use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
13 use crate::crc::CrcReader;
14 use crate::deflate;
15 use crate::Compression;
16 
copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize17 fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
18     let min = cmp::min(into.len(), from.len() - *pos);
19     for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
20         *slot = *val;
21     }
22     *pos += min;
23     return min;
24 }
25 
corrupt() -> io::Error26 pub(crate) fn corrupt() -> io::Error {
27     io::Error::new(
28         io::ErrorKind::InvalidInput,
29         "corrupt gzip stream does not have a matching checksum",
30     )
31 }
32 
bad_header() -> io::Error33 fn bad_header() -> io::Error {
34     io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
35 }
36 
read_le_u16<R: Read>(r: &mut R) -> io::Result<u16>37 fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
38     let mut b = [0; 2];
39     r.read_exact(&mut b)?;
40     Ok((b[0] as u16) | ((b[1] as u16) << 8))
41 }
42 
read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader>43 pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
44     let mut crc_reader = CrcReader::new(r);
45     let mut header = [0; 10];
46     crc_reader.read_exact(&mut header)?;
47 
48     let id1 = header[0];
49     let id2 = header[1];
50     if id1 != 0x1f || id2 != 0x8b {
51         return Err(bad_header());
52     }
53     let cm = header[2];
54     if cm != 8 {
55         return Err(bad_header());
56     }
57 
58     let flg = header[3];
59     let mtime = ((header[4] as u32) << 0)
60         | ((header[5] as u32) << 8)
61         | ((header[6] as u32) << 16)
62         | ((header[7] as u32) << 24);
63     let _xfl = header[8];
64     let os = header[9];
65 
66     let extra = if flg & FEXTRA != 0 {
67         let xlen = read_le_u16(&mut crc_reader)?;
68         let mut extra = vec![0; xlen as usize];
69         crc_reader.read_exact(&mut extra)?;
70         Some(extra)
71     } else {
72         None
73     };
74     let filename = if flg & FNAME != 0 {
75         // wow this is slow
76         let mut b = Vec::new();
77         for byte in crc_reader.by_ref().bytes() {
78             let byte = byte?;
79             if byte == 0 {
80                 break;
81             }
82             b.push(byte);
83         }
84         Some(b)
85     } else {
86         None
87     };
88     let comment = if flg & FCOMMENT != 0 {
89         // wow this is slow
90         let mut b = Vec::new();
91         for byte in crc_reader.by_ref().bytes() {
92             let byte = byte?;
93             if byte == 0 {
94                 break;
95             }
96             b.push(byte);
97         }
98         Some(b)
99     } else {
100         None
101     };
102 
103     if flg & FHCRC != 0 {
104         let calced_crc = crc_reader.crc().sum() as u16;
105         let stored_crc = read_le_u16(&mut crc_reader)?;
106         if calced_crc != stored_crc {
107             return Err(corrupt());
108         }
109     }
110 
111     Ok(GzHeader {
112         extra: extra,
113         filename: filename,
114         comment: comment,
115         operating_system: os,
116         mtime: mtime,
117     })
118 }
119 
120 /// A gzip streaming encoder
121 ///
122 /// This structure exposes a [`BufRead`] interface that will read uncompressed data
123 /// from the underlying reader and expose the compressed version as a [`BufRead`]
124 /// interface.
125 ///
126 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use std::io::prelude::*;
132 /// use std::io;
133 /// use flate2::Compression;
134 /// use flate2::bufread::GzEncoder;
135 /// use std::fs::File;
136 /// use std::io::BufReader;
137 ///
138 /// // Opens sample file, compresses the contents and returns a Vector or error
139 /// // File wrapped in a BufReader implements BufRead
140 ///
141 /// fn open_hello_world() -> io::Result<Vec<u8>> {
142 ///     let f = File::open("examples/hello_world.txt")?;
143 ///     let b = BufReader::new(f);
144 ///     let mut gz = GzEncoder::new(b, Compression::fast());
145 ///     let mut buffer = Vec::new();
146 ///     gz.read_to_end(&mut buffer)?;
147 ///     Ok(buffer)
148 /// }
149 /// ```
150 #[derive(Debug)]
151 pub struct GzEncoder<R> {
152     inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
153     header: Vec<u8>,
154     pos: usize,
155     eof: bool,
156 }
157 
gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R>158 pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
159     let crc = CrcReader::new(r);
160     GzEncoder {
161         inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
162         header: header,
163         pos: 0,
164         eof: false,
165     }
166 }
167 
168 impl<R: BufRead> GzEncoder<R> {
169     /// Creates a new encoder which will use the given compression level.
170     ///
171     /// The encoder is not configured specially for the emitted header. For
172     /// header configuration, see the `GzBuilder` type.
173     ///
174     /// The data read from the stream `r` will be compressed and available
175     /// through the returned reader.
new(r: R, level: Compression) -> GzEncoder<R>176     pub fn new(r: R, level: Compression) -> GzEncoder<R> {
177         GzBuilder::new().buf_read(r, level)
178     }
179 
read_footer(&mut self, into: &mut [u8]) -> io::Result<usize>180     fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
181         if self.pos == 8 {
182             return Ok(0);
183         }
184         let crc = self.inner.get_ref().crc();
185         let ref arr = [
186             (crc.sum() >> 0) as u8,
187             (crc.sum() >> 8) as u8,
188             (crc.sum() >> 16) as u8,
189             (crc.sum() >> 24) as u8,
190             (crc.amount() >> 0) as u8,
191             (crc.amount() >> 8) as u8,
192             (crc.amount() >> 16) as u8,
193             (crc.amount() >> 24) as u8,
194         ];
195         Ok(copy(into, arr, &mut self.pos))
196     }
197 }
198 
199 impl<R> GzEncoder<R> {
200     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R201     pub fn get_ref(&self) -> &R {
202         self.inner.get_ref().get_ref()
203     }
204 
205     /// Acquires a mutable reference to the underlying reader.
206     ///
207     /// Note that mutation of the reader may result in surprising results if
208     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R209     pub fn get_mut(&mut self) -> &mut R {
210         self.inner.get_mut().get_mut()
211     }
212 
213     /// Returns the underlying stream, consuming this encoder
into_inner(self) -> R214     pub fn into_inner(self) -> R {
215         self.inner.into_inner().into_inner()
216     }
217 }
218 
219 #[inline]
finish(buf: &[u8; 8]) -> (u32, u32)220 fn finish(buf: &[u8; 8]) -> (u32, u32) {
221     let crc = ((buf[0] as u32) << 0)
222         | ((buf[1] as u32) << 8)
223         | ((buf[2] as u32) << 16)
224         | ((buf[3] as u32) << 24);
225     let amt = ((buf[4] as u32) << 0)
226         | ((buf[5] as u32) << 8)
227         | ((buf[6] as u32) << 16)
228         | ((buf[7] as u32) << 24);
229     (crc, amt)
230 }
231 
232 impl<R: BufRead> Read for GzEncoder<R> {
read(&mut self, mut into: &mut [u8]) -> io::Result<usize>233     fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
234         let mut amt = 0;
235         if self.eof {
236             return self.read_footer(into);
237         } else if self.pos < self.header.len() {
238             amt += copy(into, &self.header, &mut self.pos);
239             if amt == into.len() {
240                 return Ok(amt);
241             }
242             let tmp = into;
243             into = &mut tmp[amt..];
244         }
245         match self.inner.read(into)? {
246             0 => {
247                 self.eof = true;
248                 self.pos = 0;
249                 self.read_footer(into)
250             }
251             n => Ok(amt + n),
252         }
253     }
254 }
255 
256 impl<R: BufRead + Write> Write for GzEncoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>257     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
258         self.get_mut().write(buf)
259     }
260 
flush(&mut self) -> io::Result<()>261     fn flush(&mut self) -> io::Result<()> {
262         self.get_mut().flush()
263     }
264 }
265 
266 /// A gzip streaming decoder
267 ///
268 /// This structure consumes a [`BufRead`] interface, reading compressed data
269 /// from the underlying reader, and emitting uncompressed data.
270 ///
271 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
272 ///
273 /// # Examples
274 ///
275 /// ```
276 /// use std::io::prelude::*;
277 /// use std::io;
278 /// # use flate2::Compression;
279 /// # use flate2::write::GzEncoder;
280 /// use flate2::bufread::GzDecoder;
281 ///
282 /// # fn main() {
283 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
284 /// #   e.write_all(b"Hello World").unwrap();
285 /// #   let bytes = e.finish().unwrap();
286 /// #   println!("{}", decode_reader(bytes).unwrap());
287 /// # }
288 /// #
289 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
290 /// // Here &[u8] implements BufRead
291 ///
292 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
293 ///    let mut gz = GzDecoder::new(&bytes[..]);
294 ///    let mut s = String::new();
295 ///    gz.read_to_string(&mut s)?;
296 ///    Ok(s)
297 /// }
298 /// ```
299 #[derive(Debug)]
300 pub struct GzDecoder<R> {
301     inner: GzState,
302     header: Option<GzHeader>,
303     reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
304     multi: bool,
305 }
306 
307 #[derive(Debug)]
308 enum GzState {
309     Header(Vec<u8>),
310     Body,
311     Finished(usize, [u8; 8]),
312     Err(io::Error),
313     End,
314 }
315 
316 /// A small adapter which reads data originally from `buf` and then reads all
317 /// further data from `reader`. This will also buffer all data read from
318 /// `reader` into `buf` for reuse on a further call.
319 struct Buffer<'a, T: 'a> {
320     buf: &'a mut Vec<u8>,
321     buf_cur: usize,
322     buf_max: usize,
323     reader: &'a mut T,
324 }
325 
326 impl<'a, T> Buffer<'a, T> {
new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T>327     fn new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T> {
328         Buffer {
329             reader,
330             buf_cur: 0,
331             buf_max: buf.len(),
332             buf,
333         }
334     }
335 }
336 
337 impl<'a, T: Read> Read for Buffer<'a, T> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>338     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
339         if self.buf_cur == self.buf_max {
340             let len = self.reader.read(buf)?;
341             self.buf.extend_from_slice(&buf[..len]);
342             Ok(len)
343         } else {
344             let len = (&self.buf[self.buf_cur..self.buf_max]).read(buf)?;
345             self.buf_cur += len;
346             Ok(len)
347         }
348     }
349 }
350 
351 impl<R: BufRead> GzDecoder<R> {
352     /// Creates a new decoder from the given reader, immediately parsing the
353     /// gzip header.
new(mut r: R) -> GzDecoder<R>354     pub fn new(mut r: R) -> GzDecoder<R> {
355         let mut buf = Vec::with_capacity(10); // minimum header length
356         let mut header = None;
357 
358         let result = {
359             let mut reader = Buffer::new(&mut buf, &mut r);
360             read_gz_header(&mut reader)
361         };
362 
363         let state = match result {
364             Ok(hdr) => {
365                 header = Some(hdr);
366                 GzState::Body
367             }
368             Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(buf),
369             Err(err) => GzState::Err(err),
370         };
371 
372         GzDecoder {
373             inner: state,
374             reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
375             multi: false,
376             header,
377         }
378     }
379 
multi(mut self, flag: bool) -> GzDecoder<R>380     fn multi(mut self, flag: bool) -> GzDecoder<R> {
381         self.multi = flag;
382         self
383     }
384 }
385 
386 impl<R> GzDecoder<R> {
387     /// Returns the header associated with this stream, if it was valid
header(&self) -> Option<&GzHeader>388     pub fn header(&self) -> Option<&GzHeader> {
389         self.header.as_ref()
390     }
391 
392     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R393     pub fn get_ref(&self) -> &R {
394         self.reader.get_ref().get_ref()
395     }
396 
397     /// Acquires a mutable reference to the underlying stream.
398     ///
399     /// Note that mutation of the stream may result in surprising results if
400     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R401     pub fn get_mut(&mut self) -> &mut R {
402         self.reader.get_mut().get_mut()
403     }
404 
405     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R406     pub fn into_inner(self) -> R {
407         self.reader.into_inner().into_inner()
408     }
409 }
410 
411 impl<R: BufRead> Read for GzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>412     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
413         let GzDecoder {
414             inner,
415             header,
416             reader,
417             multi,
418         } = self;
419 
420         loop {
421             *inner = match mem::replace(inner, GzState::End) {
422                 GzState::Header(mut buf) => {
423                     let result = {
424                         let mut reader = Buffer::new(&mut buf, reader.get_mut().get_mut());
425                         read_gz_header(&mut reader)
426                     };
427                     let hdr = result.map_err(|err| {
428                         if io::ErrorKind::WouldBlock == err.kind() {
429                             *inner = GzState::Header(buf);
430                         }
431 
432                         err
433                     })?;
434                     *header = Some(hdr);
435                     GzState::Body
436                 }
437                 GzState::Body => {
438                     if into.is_empty() {
439                         *inner = GzState::Body;
440                         return Ok(0);
441                     }
442 
443                     let n = reader.read(into).map_err(|err| {
444                         if io::ErrorKind::WouldBlock == err.kind() {
445                             *inner = GzState::Body;
446                         }
447 
448                         err
449                     })?;
450 
451                     match n {
452                         0 => GzState::Finished(0, [0; 8]),
453                         n => {
454                             *inner = GzState::Body;
455                             return Ok(n);
456                         }
457                     }
458                 }
459                 GzState::Finished(pos, mut buf) => {
460                     if pos < buf.len() {
461                         let n = reader
462                             .get_mut()
463                             .get_mut()
464                             .read(&mut buf[pos..])
465                             .and_then(|n| {
466                                 if n == 0 {
467                                     Err(io::ErrorKind::UnexpectedEof.into())
468                                 } else {
469                                     Ok(n)
470                                 }
471                             })
472                             .map_err(|err| {
473                                 if io::ErrorKind::WouldBlock == err.kind() {
474                                     *inner = GzState::Finished(pos, buf);
475                                 }
476 
477                                 err
478                             })?;
479 
480                         GzState::Finished(pos + n, buf)
481                     } else {
482                         let (crc, amt) = finish(&buf);
483 
484                         if crc != reader.crc().sum() || amt != reader.crc().amount() {
485                             return Err(corrupt());
486                         } else if *multi {
487                             let is_eof = reader
488                                 .get_mut()
489                                 .get_mut()
490                                 .fill_buf()
491                                 .map(|buf| buf.is_empty())
492                                 .map_err(|err| {
493                                     if io::ErrorKind::WouldBlock == err.kind() {
494                                         *inner = GzState::Finished(pos, buf);
495                                     }
496 
497                                     err
498                                 })?;
499 
500                             if is_eof {
501                                 GzState::End
502                             } else {
503                                 reader.reset();
504                                 reader.get_mut().reset_data();
505                                 header.take();
506                                 GzState::Header(Vec::with_capacity(10))
507                             }
508                         } else {
509                             GzState::End
510                         }
511                     }
512                 }
513                 GzState::Err(err) => return Err(err),
514                 GzState::End => return Ok(0),
515             };
516         }
517     }
518 }
519 
520 #[cfg(feature = "tokio")]
521 impl<R: AsyncRead + BufRead> AsyncRead for GzDecoder<R> {}
522 
523 impl<R: BufRead + Write> Write for GzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>524     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
525         self.get_mut().write(buf)
526     }
527 
flush(&mut self) -> io::Result<()>528     fn flush(&mut self) -> io::Result<()> {
529         self.get_mut().flush()
530     }
531 }
532 
533 #[cfg(feature = "tokio")]
534 impl<R: AsyncWrite + BufRead> AsyncWrite for GzDecoder<R> {
shutdown(&mut self) -> Poll<(), io::Error>535     fn shutdown(&mut self) -> Poll<(), io::Error> {
536         self.get_mut().shutdown()
537     }
538 }
539 
540 /// A gzip streaming decoder that decodes all members of a multistream
541 ///
542 /// A gzip member consists of a header, compressed data and a trailer. The [gzip
543 /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
544 /// gzip members to be joined in a single stream. `MultiGzDecoder` will
545 /// decode all consecutive members while `GzDecoder` will only decompress
546 /// the first gzip member. The multistream format is commonly used in
547 /// bioinformatics, for example when using the BGZF compressed data.
548 ///
549 /// This structure exposes a [`BufRead`] interface that will consume all gzip members
550 /// from the underlying reader and emit uncompressed data.
551 ///
552 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
553 ///
554 /// # Examples
555 ///
556 /// ```
557 /// use std::io::prelude::*;
558 /// use std::io;
559 /// # use flate2::Compression;
560 /// # use flate2::write::GzEncoder;
561 /// use flate2::bufread::MultiGzDecoder;
562 ///
563 /// # fn main() {
564 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
565 /// #   e.write_all(b"Hello World").unwrap();
566 /// #   let bytes = e.finish().unwrap();
567 /// #   println!("{}", decode_reader(bytes).unwrap());
568 /// # }
569 /// #
570 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
571 /// // Here &[u8] implements BufRead
572 ///
573 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
574 ///    let mut gz = MultiGzDecoder::new(&bytes[..]);
575 ///    let mut s = String::new();
576 ///    gz.read_to_string(&mut s)?;
577 ///    Ok(s)
578 /// }
579 /// ```
580 #[derive(Debug)]
581 pub struct MultiGzDecoder<R>(GzDecoder<R>);
582 
583 impl<R: BufRead> MultiGzDecoder<R> {
584     /// Creates a new decoder from the given reader, immediately parsing the
585     /// (first) gzip header. If the gzip stream contains multiple members all will
586     /// be decoded.
new(r: R) -> MultiGzDecoder<R>587     pub fn new(r: R) -> MultiGzDecoder<R> {
588         MultiGzDecoder(GzDecoder::new(r).multi(true))
589     }
590 }
591 
592 impl<R> MultiGzDecoder<R> {
593     /// Returns the current header associated with this stream, if it's valid
header(&self) -> Option<&GzHeader>594     pub fn header(&self) -> Option<&GzHeader> {
595         self.0.header()
596     }
597 
598     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R599     pub fn get_ref(&self) -> &R {
600         self.0.get_ref()
601     }
602 
603     /// Acquires a mutable reference to the underlying stream.
604     ///
605     /// Note that mutation of the stream may result in surprising results if
606     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R607     pub fn get_mut(&mut self) -> &mut R {
608         self.0.get_mut()
609     }
610 
611     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R612     pub fn into_inner(self) -> R {
613         self.0.into_inner()
614     }
615 }
616 
617 impl<R: BufRead> Read for MultiGzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>618     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
619         self.0.read(into)
620     }
621 }
622 
623 #[cfg(feature = "tokio")]
624 impl<R: AsyncRead + BufRead> AsyncRead for MultiGzDecoder<R> {}
625 
626 impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>627     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
628         self.get_mut().write(buf)
629     }
630 
flush(&mut self) -> io::Result<()>631     fn flush(&mut self) -> io::Result<()> {
632         self.get_mut().flush()
633     }
634 }
635 
636 #[cfg(feature = "tokio")]
637 impl<R: AsyncWrite + BufRead> AsyncWrite for MultiGzDecoder<R> {
shutdown(&mut self) -> Poll<(), io::Error>638     fn shutdown(&mut self) -> Poll<(), io::Error> {
639         self.get_mut().shutdown()
640     }
641 }
642