1 use std::cmp;
2 use std::io;
3 use std::io::prelude::*;
4 use std::mem;
5
6 #[cfg(feature = "tokio")]
7 use futures::Poll;
8 #[cfg(feature = "tokio")]
9 use tokio_io::{AsyncRead, AsyncWrite};
10
11 use super::{GzBuilder, GzHeader};
12 use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
13 use crate::crc::CrcReader;
14 use crate::deflate;
15 use crate::Compression;
16
copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize17 fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
18 let min = cmp::min(into.len(), from.len() - *pos);
19 for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
20 *slot = *val;
21 }
22 *pos += min;
23 return min;
24 }
25
corrupt() -> io::Error26 pub(crate) fn corrupt() -> io::Error {
27 io::Error::new(
28 io::ErrorKind::InvalidInput,
29 "corrupt gzip stream does not have a matching checksum",
30 )
31 }
32
bad_header() -> io::Error33 fn bad_header() -> io::Error {
34 io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
35 }
36
read_le_u16<R: Read>(r: &mut R) -> io::Result<u16>37 fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
38 let mut b = [0; 2];
39 r.read_exact(&mut b)?;
40 Ok((b[0] as u16) | ((b[1] as u16) << 8))
41 }
42
read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader>43 pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
44 let mut crc_reader = CrcReader::new(r);
45 let mut header = [0; 10];
46 crc_reader.read_exact(&mut header)?;
47
48 let id1 = header[0];
49 let id2 = header[1];
50 if id1 != 0x1f || id2 != 0x8b {
51 return Err(bad_header());
52 }
53 let cm = header[2];
54 if cm != 8 {
55 return Err(bad_header());
56 }
57
58 let flg = header[3];
59 let mtime = ((header[4] as u32) << 0)
60 | ((header[5] as u32) << 8)
61 | ((header[6] as u32) << 16)
62 | ((header[7] as u32) << 24);
63 let _xfl = header[8];
64 let os = header[9];
65
66 let extra = if flg & FEXTRA != 0 {
67 let xlen = read_le_u16(&mut crc_reader)?;
68 let mut extra = vec![0; xlen as usize];
69 crc_reader.read_exact(&mut extra)?;
70 Some(extra)
71 } else {
72 None
73 };
74 let filename = if flg & FNAME != 0 {
75 // wow this is slow
76 let mut b = Vec::new();
77 for byte in crc_reader.by_ref().bytes() {
78 let byte = byte?;
79 if byte == 0 {
80 break;
81 }
82 b.push(byte);
83 }
84 Some(b)
85 } else {
86 None
87 };
88 let comment = if flg & FCOMMENT != 0 {
89 // wow this is slow
90 let mut b = Vec::new();
91 for byte in crc_reader.by_ref().bytes() {
92 let byte = byte?;
93 if byte == 0 {
94 break;
95 }
96 b.push(byte);
97 }
98 Some(b)
99 } else {
100 None
101 };
102
103 if flg & FHCRC != 0 {
104 let calced_crc = crc_reader.crc().sum() as u16;
105 let stored_crc = read_le_u16(&mut crc_reader)?;
106 if calced_crc != stored_crc {
107 return Err(corrupt());
108 }
109 }
110
111 Ok(GzHeader {
112 extra: extra,
113 filename: filename,
114 comment: comment,
115 operating_system: os,
116 mtime: mtime,
117 })
118 }
119
120 /// A gzip streaming encoder
121 ///
122 /// This structure exposes a [`BufRead`] interface that will read uncompressed data
123 /// from the underlying reader and expose the compressed version as a [`BufRead`]
124 /// interface.
125 ///
126 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use std::io::prelude::*;
132 /// use std::io;
133 /// use flate2::Compression;
134 /// use flate2::bufread::GzEncoder;
135 /// use std::fs::File;
136 /// use std::io::BufReader;
137 ///
138 /// // Opens sample file, compresses the contents and returns a Vector or error
139 /// // File wrapped in a BufReader implements BufRead
140 ///
141 /// fn open_hello_world() -> io::Result<Vec<u8>> {
142 /// let f = File::open("examples/hello_world.txt")?;
143 /// let b = BufReader::new(f);
144 /// let mut gz = GzEncoder::new(b, Compression::fast());
145 /// let mut buffer = Vec::new();
146 /// gz.read_to_end(&mut buffer)?;
147 /// Ok(buffer)
148 /// }
149 /// ```
150 #[derive(Debug)]
151 pub struct GzEncoder<R> {
152 inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
153 header: Vec<u8>,
154 pos: usize,
155 eof: bool,
156 }
157
gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R>158 pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
159 let crc = CrcReader::new(r);
160 GzEncoder {
161 inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
162 header: header,
163 pos: 0,
164 eof: false,
165 }
166 }
167
168 impl<R: BufRead> GzEncoder<R> {
169 /// Creates a new encoder which will use the given compression level.
170 ///
171 /// The encoder is not configured specially for the emitted header. For
172 /// header configuration, see the `GzBuilder` type.
173 ///
174 /// The data read from the stream `r` will be compressed and available
175 /// through the returned reader.
new(r: R, level: Compression) -> GzEncoder<R>176 pub fn new(r: R, level: Compression) -> GzEncoder<R> {
177 GzBuilder::new().buf_read(r, level)
178 }
179
read_footer(&mut self, into: &mut [u8]) -> io::Result<usize>180 fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
181 if self.pos == 8 {
182 return Ok(0);
183 }
184 let crc = self.inner.get_ref().crc();
185 let ref arr = [
186 (crc.sum() >> 0) as u8,
187 (crc.sum() >> 8) as u8,
188 (crc.sum() >> 16) as u8,
189 (crc.sum() >> 24) as u8,
190 (crc.amount() >> 0) as u8,
191 (crc.amount() >> 8) as u8,
192 (crc.amount() >> 16) as u8,
193 (crc.amount() >> 24) as u8,
194 ];
195 Ok(copy(into, arr, &mut self.pos))
196 }
197 }
198
199 impl<R> GzEncoder<R> {
200 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R201 pub fn get_ref(&self) -> &R {
202 self.inner.get_ref().get_ref()
203 }
204
205 /// Acquires a mutable reference to the underlying reader.
206 ///
207 /// Note that mutation of the reader may result in surprising results if
208 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R209 pub fn get_mut(&mut self) -> &mut R {
210 self.inner.get_mut().get_mut()
211 }
212
213 /// Returns the underlying stream, consuming this encoder
into_inner(self) -> R214 pub fn into_inner(self) -> R {
215 self.inner.into_inner().into_inner()
216 }
217 }
218
219 #[inline]
finish(buf: &[u8; 8]) -> (u32, u32)220 fn finish(buf: &[u8; 8]) -> (u32, u32) {
221 let crc = ((buf[0] as u32) << 0)
222 | ((buf[1] as u32) << 8)
223 | ((buf[2] as u32) << 16)
224 | ((buf[3] as u32) << 24);
225 let amt = ((buf[4] as u32) << 0)
226 | ((buf[5] as u32) << 8)
227 | ((buf[6] as u32) << 16)
228 | ((buf[7] as u32) << 24);
229 (crc, amt)
230 }
231
232 impl<R: BufRead> Read for GzEncoder<R> {
read(&mut self, mut into: &mut [u8]) -> io::Result<usize>233 fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
234 let mut amt = 0;
235 if self.eof {
236 return self.read_footer(into);
237 } else if self.pos < self.header.len() {
238 amt += copy(into, &self.header, &mut self.pos);
239 if amt == into.len() {
240 return Ok(amt);
241 }
242 let tmp = into;
243 into = &mut tmp[amt..];
244 }
245 match self.inner.read(into)? {
246 0 => {
247 self.eof = true;
248 self.pos = 0;
249 self.read_footer(into)
250 }
251 n => Ok(amt + n),
252 }
253 }
254 }
255
256 impl<R: BufRead + Write> Write for GzEncoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>257 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
258 self.get_mut().write(buf)
259 }
260
flush(&mut self) -> io::Result<()>261 fn flush(&mut self) -> io::Result<()> {
262 self.get_mut().flush()
263 }
264 }
265
266 /// A gzip streaming decoder
267 ///
268 /// This structure consumes a [`BufRead`] interface, reading compressed data
269 /// from the underlying reader, and emitting uncompressed data.
270 ///
271 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
272 ///
273 /// # Examples
274 ///
275 /// ```
276 /// use std::io::prelude::*;
277 /// use std::io;
278 /// # use flate2::Compression;
279 /// # use flate2::write::GzEncoder;
280 /// use flate2::bufread::GzDecoder;
281 ///
282 /// # fn main() {
283 /// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
284 /// # e.write_all(b"Hello World").unwrap();
285 /// # let bytes = e.finish().unwrap();
286 /// # println!("{}", decode_reader(bytes).unwrap());
287 /// # }
288 /// #
289 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
290 /// // Here &[u8] implements BufRead
291 ///
292 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
293 /// let mut gz = GzDecoder::new(&bytes[..]);
294 /// let mut s = String::new();
295 /// gz.read_to_string(&mut s)?;
296 /// Ok(s)
297 /// }
298 /// ```
299 #[derive(Debug)]
300 pub struct GzDecoder<R> {
301 inner: GzState,
302 header: Option<GzHeader>,
303 reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
304 multi: bool,
305 }
306
307 #[derive(Debug)]
308 enum GzState {
309 Header(Vec<u8>),
310 Body,
311 Finished(usize, [u8; 8]),
312 Err(io::Error),
313 End,
314 }
315
316 /// A small adapter which reads data originally from `buf` and then reads all
317 /// further data from `reader`. This will also buffer all data read from
318 /// `reader` into `buf` for reuse on a further call.
319 struct Buffer<'a, T: 'a> {
320 buf: &'a mut Vec<u8>,
321 buf_cur: usize,
322 buf_max: usize,
323 reader: &'a mut T,
324 }
325
326 impl<'a, T> Buffer<'a, T> {
new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T>327 fn new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T> {
328 Buffer {
329 reader,
330 buf_cur: 0,
331 buf_max: buf.len(),
332 buf,
333 }
334 }
335 }
336
337 impl<'a, T: Read> Read for Buffer<'a, T> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>338 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
339 if self.buf_cur == self.buf_max {
340 let len = self.reader.read(buf)?;
341 self.buf.extend_from_slice(&buf[..len]);
342 Ok(len)
343 } else {
344 let len = (&self.buf[self.buf_cur..self.buf_max]).read(buf)?;
345 self.buf_cur += len;
346 Ok(len)
347 }
348 }
349 }
350
351 impl<R: BufRead> GzDecoder<R> {
352 /// Creates a new decoder from the given reader, immediately parsing the
353 /// gzip header.
new(mut r: R) -> GzDecoder<R>354 pub fn new(mut r: R) -> GzDecoder<R> {
355 let mut buf = Vec::with_capacity(10); // minimum header length
356 let mut header = None;
357
358 let result = {
359 let mut reader = Buffer::new(&mut buf, &mut r);
360 read_gz_header(&mut reader)
361 };
362
363 let state = match result {
364 Ok(hdr) => {
365 header = Some(hdr);
366 GzState::Body
367 }
368 Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(buf),
369 Err(err) => GzState::Err(err),
370 };
371
372 GzDecoder {
373 inner: state,
374 reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
375 multi: false,
376 header,
377 }
378 }
379
multi(mut self, flag: bool) -> GzDecoder<R>380 fn multi(mut self, flag: bool) -> GzDecoder<R> {
381 self.multi = flag;
382 self
383 }
384 }
385
386 impl<R> GzDecoder<R> {
387 /// Returns the header associated with this stream, if it was valid
header(&self) -> Option<&GzHeader>388 pub fn header(&self) -> Option<&GzHeader> {
389 self.header.as_ref()
390 }
391
392 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R393 pub fn get_ref(&self) -> &R {
394 self.reader.get_ref().get_ref()
395 }
396
397 /// Acquires a mutable reference to the underlying stream.
398 ///
399 /// Note that mutation of the stream may result in surprising results if
400 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R401 pub fn get_mut(&mut self) -> &mut R {
402 self.reader.get_mut().get_mut()
403 }
404
405 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R406 pub fn into_inner(self) -> R {
407 self.reader.into_inner().into_inner()
408 }
409 }
410
411 impl<R: BufRead> Read for GzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>412 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
413 let GzDecoder {
414 inner,
415 header,
416 reader,
417 multi,
418 } = self;
419
420 loop {
421 *inner = match mem::replace(inner, GzState::End) {
422 GzState::Header(mut buf) => {
423 let result = {
424 let mut reader = Buffer::new(&mut buf, reader.get_mut().get_mut());
425 read_gz_header(&mut reader)
426 };
427 let hdr = result.map_err(|err| {
428 if io::ErrorKind::WouldBlock == err.kind() {
429 *inner = GzState::Header(buf);
430 }
431
432 err
433 })?;
434 *header = Some(hdr);
435 GzState::Body
436 }
437 GzState::Body => {
438 if into.is_empty() {
439 *inner = GzState::Body;
440 return Ok(0);
441 }
442
443 let n = reader.read(into).map_err(|err| {
444 if io::ErrorKind::WouldBlock == err.kind() {
445 *inner = GzState::Body;
446 }
447
448 err
449 })?;
450
451 match n {
452 0 => GzState::Finished(0, [0; 8]),
453 n => {
454 *inner = GzState::Body;
455 return Ok(n);
456 }
457 }
458 }
459 GzState::Finished(pos, mut buf) => {
460 if pos < buf.len() {
461 let n = reader
462 .get_mut()
463 .get_mut()
464 .read(&mut buf[pos..])
465 .and_then(|n| {
466 if n == 0 {
467 Err(io::ErrorKind::UnexpectedEof.into())
468 } else {
469 Ok(n)
470 }
471 })
472 .map_err(|err| {
473 if io::ErrorKind::WouldBlock == err.kind() {
474 *inner = GzState::Finished(pos, buf);
475 }
476
477 err
478 })?;
479
480 GzState::Finished(pos + n, buf)
481 } else {
482 let (crc, amt) = finish(&buf);
483
484 if crc != reader.crc().sum() || amt != reader.crc().amount() {
485 return Err(corrupt());
486 } else if *multi {
487 let is_eof = reader
488 .get_mut()
489 .get_mut()
490 .fill_buf()
491 .map(|buf| buf.is_empty())
492 .map_err(|err| {
493 if io::ErrorKind::WouldBlock == err.kind() {
494 *inner = GzState::Finished(pos, buf);
495 }
496
497 err
498 })?;
499
500 if is_eof {
501 GzState::End
502 } else {
503 reader.reset();
504 reader.get_mut().reset_data();
505 header.take();
506 GzState::Header(Vec::with_capacity(10))
507 }
508 } else {
509 GzState::End
510 }
511 }
512 }
513 GzState::Err(err) => return Err(err),
514 GzState::End => return Ok(0),
515 };
516 }
517 }
518 }
519
520 #[cfg(feature = "tokio")]
521 impl<R: AsyncRead + BufRead> AsyncRead for GzDecoder<R> {}
522
523 impl<R: BufRead + Write> Write for GzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>524 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
525 self.get_mut().write(buf)
526 }
527
flush(&mut self) -> io::Result<()>528 fn flush(&mut self) -> io::Result<()> {
529 self.get_mut().flush()
530 }
531 }
532
533 #[cfg(feature = "tokio")]
534 impl<R: AsyncWrite + BufRead> AsyncWrite for GzDecoder<R> {
shutdown(&mut self) -> Poll<(), io::Error>535 fn shutdown(&mut self) -> Poll<(), io::Error> {
536 self.get_mut().shutdown()
537 }
538 }
539
540 /// A gzip streaming decoder that decodes all members of a multistream
541 ///
542 /// A gzip member consists of a header, compressed data and a trailer. The [gzip
543 /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
544 /// gzip members to be joined in a single stream. `MultiGzDecoder` will
545 /// decode all consecutive members while `GzDecoder` will only decompress
546 /// the first gzip member. The multistream format is commonly used in
547 /// bioinformatics, for example when using the BGZF compressed data.
548 ///
549 /// This structure exposes a [`BufRead`] interface that will consume all gzip members
550 /// from the underlying reader and emit uncompressed data.
551 ///
552 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
553 ///
554 /// # Examples
555 ///
556 /// ```
557 /// use std::io::prelude::*;
558 /// use std::io;
559 /// # use flate2::Compression;
560 /// # use flate2::write::GzEncoder;
561 /// use flate2::bufread::MultiGzDecoder;
562 ///
563 /// # fn main() {
564 /// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
565 /// # e.write_all(b"Hello World").unwrap();
566 /// # let bytes = e.finish().unwrap();
567 /// # println!("{}", decode_reader(bytes).unwrap());
568 /// # }
569 /// #
570 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
571 /// // Here &[u8] implements BufRead
572 ///
573 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
574 /// let mut gz = MultiGzDecoder::new(&bytes[..]);
575 /// let mut s = String::new();
576 /// gz.read_to_string(&mut s)?;
577 /// Ok(s)
578 /// }
579 /// ```
580 #[derive(Debug)]
581 pub struct MultiGzDecoder<R>(GzDecoder<R>);
582
583 impl<R: BufRead> MultiGzDecoder<R> {
584 /// Creates a new decoder from the given reader, immediately parsing the
585 /// (first) gzip header. If the gzip stream contains multiple members all will
586 /// be decoded.
new(r: R) -> MultiGzDecoder<R>587 pub fn new(r: R) -> MultiGzDecoder<R> {
588 MultiGzDecoder(GzDecoder::new(r).multi(true))
589 }
590 }
591
592 impl<R> MultiGzDecoder<R> {
593 /// Returns the current header associated with this stream, if it's valid
header(&self) -> Option<&GzHeader>594 pub fn header(&self) -> Option<&GzHeader> {
595 self.0.header()
596 }
597
598 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R599 pub fn get_ref(&self) -> &R {
600 self.0.get_ref()
601 }
602
603 /// Acquires a mutable reference to the underlying stream.
604 ///
605 /// Note that mutation of the stream may result in surprising results if
606 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R607 pub fn get_mut(&mut self) -> &mut R {
608 self.0.get_mut()
609 }
610
611 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R612 pub fn into_inner(self) -> R {
613 self.0.into_inner()
614 }
615 }
616
617 impl<R: BufRead> Read for MultiGzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>618 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
619 self.0.read(into)
620 }
621 }
622
623 #[cfg(feature = "tokio")]
624 impl<R: AsyncRead + BufRead> AsyncRead for MultiGzDecoder<R> {}
625
626 impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>627 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
628 self.get_mut().write(buf)
629 }
630
flush(&mut self) -> io::Result<()>631 fn flush(&mut self) -> io::Result<()> {
632 self.get_mut().flush()
633 }
634 }
635
636 #[cfg(feature = "tokio")]
637 impl<R: AsyncWrite + BufRead> AsyncWrite for MultiGzDecoder<R> {
shutdown(&mut self) -> Poll<(), io::Error>638 fn shutdown(&mut self) -> Poll<(), io::Error> {
639 self.get_mut().shutdown()
640 }
641 }
642