1 use std::cmp;
2 use std::io::prelude::*;
3 use std::io;
4 use std::mem;
5
6 use super::{Builder, Header};
7 use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
8 use Compression;
9 use crc::CrcReader;
10 use deflate;
11
copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize12 fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
13 let min = cmp::min(into.len(), from.len() - *pos);
14 for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
15 *slot = *val;
16 }
17 *pos += min;
18 return min;
19 }
corrupt() -> io::Error20 fn corrupt() -> io::Error {
21 io::Error::new(
22 io::ErrorKind::InvalidInput,
23 "corrupt gzip stream does not have a matching checksum",
24 )
25 }
26
bad_header() -> io::Error27 fn bad_header() -> io::Error {
28 io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
29 }
30
read_le_u16<R: Read>(r: &mut R) -> io::Result<u16>31 fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
32 let mut b = [0; 2];
33 try!(r.read_exact(&mut b));
34 Ok((b[0] as u16) | ((b[1] as u16) << 8))
35 }
36
read_gz_header<R: Read>(r: &mut R) -> io::Result<Header>37 fn read_gz_header<R: Read>(r: &mut R) -> io::Result<Header> {
38 let mut crc_reader = CrcReader::new(r);
39 let mut header = [0; 10];
40 try!(crc_reader.read_exact(&mut header));
41
42 let id1 = header[0];
43 let id2 = header[1];
44 if id1 != 0x1f || id2 != 0x8b {
45 return Err(bad_header());
46 }
47 let cm = header[2];
48 if cm != 8 {
49 return Err(bad_header());
50 }
51
52 let flg = header[3];
53 let mtime = ((header[4] as u32) << 0) | ((header[5] as u32) << 8) | ((header[6] as u32) << 16) |
54 ((header[7] as u32) << 24);
55 let _xfl = header[8];
56 let _os = header[9];
57
58 let extra = if flg & FEXTRA != 0 {
59 let xlen = try!(read_le_u16(&mut crc_reader));
60 let mut extra = vec![0; xlen as usize];
61 try!(crc_reader.read_exact(&mut extra));
62 Some(extra)
63 } else {
64 None
65 };
66 let filename = if flg & FNAME != 0 {
67 // wow this is slow
68 let mut b = Vec::new();
69 for byte in crc_reader.by_ref().bytes() {
70 let byte = try!(byte);
71 if byte == 0 {
72 break;
73 }
74 b.push(byte);
75 }
76 Some(b)
77 } else {
78 None
79 };
80 let comment = if flg & FCOMMENT != 0 {
81 // wow this is slow
82 let mut b = Vec::new();
83 for byte in crc_reader.by_ref().bytes() {
84 let byte = try!(byte);
85 if byte == 0 {
86 break;
87 }
88 b.push(byte);
89 }
90 Some(b)
91 } else {
92 None
93 };
94
95 if flg & FHCRC != 0 {
96 let calced_crc = crc_reader.crc().sum() as u16;
97 let stored_crc = try!(read_le_u16(&mut crc_reader));
98 if calced_crc != stored_crc {
99 return Err(corrupt());
100 }
101 }
102
103 Ok(Header {
104 extra: extra,
105 filename: filename,
106 comment: comment,
107 mtime: mtime,
108 })
109 }
110
111
112 /// A gzip streaming encoder
113 ///
114 /// This structure exposes a [`BufRead`] interface that will read uncompressed data
115 /// from the underlying reader and expose the compressed version as a [`BufRead`]
116 /// interface.
117 ///
118 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use std::io::prelude::*;
124 /// use std::io;
125 /// use flate2::Compression;
126 /// use flate2::bufread::GzEncoder;
127 /// use std::fs::File;
128 /// use std::io::BufReader;
129 ///
130 /// // Opens sample file, compresses the contents and returns a Vector or error
131 /// // File wrapped in a BufReader implements BufRead
132 ///
133 /// fn open_hello_world() -> io::Result<Vec<u8>> {
134 /// let f = File::open("examples/hello_world.txt")?;
135 /// let b = BufReader::new(f);
136 /// let mut gz = GzEncoder::new(b, Compression::Fast);
137 /// let mut buffer = Vec::new();
138 /// gz.read_to_end(&mut buffer)?;
139 /// Ok(buffer)
140 /// }
141 /// ```
142 #[derive(Debug)]
143 pub struct GzEncoder<R> {
144 inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
145 header: Vec<u8>,
146 pos: usize,
147 eof: bool,
148 }
149
gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R>150 pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression)
151 -> GzEncoder<R>
152 {
153 let crc = CrcReader::new(r);
154 GzEncoder {
155 inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
156 header: header,
157 pos: 0,
158 eof: false,
159 }
160 }
161
162 impl<R: BufRead> GzEncoder<R> {
163 /// Creates a new encoder which will use the given compression level.
164 ///
165 /// The encoder is not configured specially for the emitted header. For
166 /// header configuration, see the `Builder` type.
167 ///
168 /// The data read from the stream `r` will be compressed and available
169 /// through the returned reader.
new(r: R, level: Compression) -> GzEncoder<R>170 pub fn new(r: R, level: Compression) -> GzEncoder<R> {
171 Builder::new().buf_read(r, level)
172 }
173
read_footer(&mut self, into: &mut [u8]) -> io::Result<usize>174 fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
175 if self.pos == 8 {
176 return Ok(0);
177 }
178 let crc = self.inner.get_ref().crc();
179 let ref arr = [
180 (crc.sum() >> 0) as u8,
181 (crc.sum() >> 8) as u8,
182 (crc.sum() >> 16) as u8,
183 (crc.sum() >> 24) as u8,
184 (crc.amount() >> 0) as u8,
185 (crc.amount() >> 8) as u8,
186 (crc.amount() >> 16) as u8,
187 (crc.amount() >> 24) as u8,
188 ];
189 Ok(copy(into, arr, &mut self.pos))
190 }
191 }
192
193 impl<R> GzEncoder<R> {
194 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R195 pub fn get_ref(&self) -> &R {
196 self.inner.get_ref().get_ref()
197 }
198
199 /// Acquires a mutable reference to the underlying reader.
200 ///
201 /// Note that mutation of the reader may result in surprising results if
202 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R203 pub fn get_mut(&mut self) -> &mut R {
204 self.inner.get_mut().get_mut()
205 }
206
207 /// Returns the underlying stream, consuming this encoder
into_inner(self) -> R208 pub fn into_inner(self) -> R {
209 self.inner.into_inner().into_inner()
210 }
211 }
212
213 impl<R: BufRead> Read for GzEncoder<R> {
read(&mut self, mut into: &mut [u8]) -> io::Result<usize>214 fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
215 let mut amt = 0;
216 if self.eof {
217 return self.read_footer(into);
218 } else if self.pos < self.header.len() {
219 amt += copy(into, &self.header, &mut self.pos);
220 if amt == into.len() {
221 return Ok(amt);
222 }
223 let tmp = into;
224 into = &mut tmp[amt..];
225 }
226 match try!(self.inner.read(into)) {
227 0 => {
228 self.eof = true;
229 self.pos = 0;
230 self.read_footer(into)
231 }
232 n => Ok(amt + n),
233 }
234 }
235 }
236
237 impl<R: BufRead + Write> Write for GzEncoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>238 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
239 self.get_mut().write(buf)
240 }
241
flush(&mut self) -> io::Result<()>242 fn flush(&mut self) -> io::Result<()> {
243 self.get_mut().flush()
244 }
245 }
246
247
248 /// A gzip streaming decoder
249 ///
250 /// This structure exposes a [`ReadBuf`] interface that will consume compressed
251 /// data from the underlying reader and emit uncompressed data.
252 ///
253 /// [`ReadBuf`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// use std::io::prelude::*;
259 /// use std::io;
260 /// # use flate2::Compression;
261 /// # use flate2::write::GzEncoder;
262 /// use flate2::bufread::GzDecoder;
263 ///
264 /// # fn main() {
265 /// # let mut e = GzEncoder::new(Vec::new(), Compression::Default);
266 /// # e.write(b"Hello World").unwrap();
267 /// # let bytes = e.finish().unwrap();
268 /// # println!("{}", decode_reader(bytes).unwrap());
269 /// # }
270 /// #
271 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
272 /// // Here &[u8] implements BufRead
273 ///
274 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
275 /// let mut gz = GzDecoder::new(&bytes[..])?;
276 /// let mut s = String::new();
277 /// gz.read_to_string(&mut s)?;
278 /// Ok(s)
279 /// }
280 /// ```
281 #[derive(Debug)]
282 pub struct GzDecoder<R> {
283 inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
284 header: Header,
285 finished: bool,
286 }
287
288
289 impl<R: BufRead> GzDecoder<R> {
290 /// Creates a new decoder from the given reader, immediately parsing the
291 /// gzip header.
292 ///
293 /// # Errors
294 ///
295 /// If an error is encountered when parsing the gzip header, an error is
296 /// returned.
new(mut r: R) -> io::Result<GzDecoder<R>>297 pub fn new(mut r: R) -> io::Result<GzDecoder<R>> {
298 let header = try!(read_gz_header(&mut r));
299
300 let flate = deflate::bufread::DeflateDecoder::new(r);
301 return Ok(GzDecoder {
302 inner: CrcReader::new(flate),
303 header: header,
304 finished: false,
305 });
306 }
307
finish(&mut self) -> io::Result<()>308 fn finish(&mut self) -> io::Result<()> {
309 if self.finished {
310 return Ok(());
311 }
312 let ref mut buf = [0u8; 8];
313 {
314 let mut len = 0;
315
316 while len < buf.len() {
317 match try!(self.inner.get_mut().get_mut().read(&mut buf[len..])) {
318 0 => return Err(corrupt()),
319 n => len += n,
320 }
321 }
322 }
323
324 let crc = ((buf[0] as u32) << 0) | ((buf[1] as u32) << 8) | ((buf[2] as u32) << 16) |
325 ((buf[3] as u32) << 24);
326 let amt = ((buf[4] as u32) << 0) | ((buf[5] as u32) << 8) | ((buf[6] as u32) << 16) |
327 ((buf[7] as u32) << 24);
328 if crc != self.inner.crc().sum() as u32 {
329 return Err(corrupt());
330 }
331 if amt != self.inner.crc().amount() {
332 return Err(corrupt());
333 }
334 self.finished = true;
335 Ok(())
336 }
337 }
338
339 impl<R> GzDecoder<R> {
340 /// Returns the header associated with this stream.
header(&self) -> &Header341 pub fn header(&self) -> &Header {
342 &self.header
343 }
344
345 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R346 pub fn get_ref(&self) -> &R {
347 self.inner.get_ref().get_ref()
348 }
349
350 /// Acquires a mutable reference to the underlying stream.
351 ///
352 /// Note that mutation of the stream may result in surprising results if
353 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R354 pub fn get_mut(&mut self) -> &mut R {
355 self.inner.get_mut().get_mut()
356 }
357
358 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R359 pub fn into_inner(self) -> R {
360 self.inner.into_inner().into_inner()
361 }
362 }
363
364 impl<R: BufRead> Read for GzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>365 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
366 match try!(self.inner.read(into)) {
367 0 => {
368 try!(self.finish());
369 Ok(0)
370 }
371 n => Ok(n),
372 }
373 }
374 }
375
376 impl<R: BufRead + Write> Write for GzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>377 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
378 self.get_mut().write(buf)
379 }
380
flush(&mut self) -> io::Result<()>381 fn flush(&mut self) -> io::Result<()> {
382 self.get_mut().flush()
383 }
384 }
385
386
387
388 /// A gzip streaming decoder that decodes all members of a multistream
389 ///
390 /// A gzip member consists of a header, compressed data and a trailer. The [gzip
391 /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
392 /// gzip members to be joined in a single stream. `MultiGzDecoder` will
393 /// decode all consecutive members while `GzDecoder` will only decompress
394 /// the first gzip member. The multistream format is commonly used in
395 /// bioinformatics, for example when using the BGZF compressed data.
396 ///
397 /// This structure exposes a [`BufRead`] interface that will consume all gzip members
398 /// from the underlying reader and emit uncompressed data.
399 ///
400 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
401 ///
402 /// # Examples
403 ///
404 /// ```
405 /// use std::io::prelude::*;
406 /// use std::io;
407 /// # use flate2::Compression;
408 /// # use flate2::write::GzEncoder;
409 /// use flate2::bufread::MultiGzDecoder;
410 ///
411 /// # fn main() {
412 /// # let mut e = GzEncoder::new(Vec::new(), Compression::Default);
413 /// # e.write(b"Hello World").unwrap();
414 /// # let bytes = e.finish().unwrap();
415 /// # println!("{}", decode_reader(bytes).unwrap());
416 /// # }
417 /// #
418 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
419 /// // Here &[u8] implements BufRead
420 ///
421 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
422 /// let mut gz = MultiGzDecoder::new(&bytes[..])?;
423 /// let mut s = String::new();
424 /// gz.read_to_string(&mut s)?;
425 /// Ok(s)
426 /// }
427 /// ```
428 #[derive(Debug)]
429 pub struct MultiGzDecoder<R> {
430 inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
431 header: Header,
432 finished: bool,
433 }
434
435
436 impl<R: BufRead> MultiGzDecoder<R> {
437 /// Creates a new decoder from the given reader, immediately parsing the
438 /// (first) gzip header. If the gzip stream contains multiple members all will
439 /// be decoded.
440 ///
441 /// # Errors
442 ///
443 /// If an error is encountered when parsing the gzip header, an error is
444 /// returned.
new(mut r: R) -> io::Result<MultiGzDecoder<R>>445 pub fn new(mut r: R) -> io::Result<MultiGzDecoder<R>> {
446 let header = try!(read_gz_header(&mut r));
447
448 let flate = deflate::bufread::DeflateDecoder::new(r);
449 return Ok(MultiGzDecoder {
450 inner: CrcReader::new(flate),
451 header: header,
452 finished: false,
453 });
454 }
455
finish_member(&mut self) -> io::Result<usize>456 fn finish_member(&mut self) -> io::Result<usize> {
457 if self.finished {
458 return Ok(0);
459 }
460 let ref mut buf = [0u8; 8];
461 {
462 let mut len = 0;
463
464 while len < buf.len() {
465 match try!(self.inner.get_mut().get_mut().read(&mut buf[len..])) {
466 0 => return Err(corrupt()),
467 n => len += n,
468 }
469 }
470 }
471
472 let crc = ((buf[0] as u32) << 0) | ((buf[1] as u32) << 8) | ((buf[2] as u32) << 16) |
473 ((buf[3] as u32) << 24);
474 let amt = ((buf[4] as u32) << 0) | ((buf[5] as u32) << 8) | ((buf[6] as u32) << 16) |
475 ((buf[7] as u32) << 24);
476 if crc != self.inner.crc().sum() as u32 {
477 return Err(corrupt());
478 }
479 if amt != self.inner.crc().amount() {
480 return Err(corrupt());
481 }
482 let remaining = match self.inner.get_mut().get_mut().fill_buf() {
483 Ok(b) => if b.is_empty() {
484 self.finished = true;
485 return Ok(0);
486 } else {
487 b.len()
488 },
489 Err(e) => return Err(e),
490 };
491
492 let next_header = try!(read_gz_header(self.inner.get_mut().get_mut()));
493 mem::replace(&mut self.header, next_header);
494 self.inner.reset();
495 self.inner.get_mut().reset_data();
496
497 Ok(remaining)
498 }
499 }
500
501 impl<R> MultiGzDecoder<R> {
502 /// Returns the current header associated with this stream.
header(&self) -> &Header503 pub fn header(&self) -> &Header {
504 &self.header
505 }
506
507 /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R508 pub fn get_ref(&self) -> &R {
509 self.inner.get_ref().get_ref()
510 }
511
512 /// Acquires a mutable reference to the underlying stream.
513 ///
514 /// Note that mutation of the stream may result in surprising results if
515 /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R516 pub fn get_mut(&mut self) -> &mut R {
517 self.inner.get_mut().get_mut()
518 }
519
520 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R521 pub fn into_inner(self) -> R {
522 self.inner.into_inner().into_inner()
523 }
524 }
525
526 impl<R: BufRead> Read for MultiGzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>527 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
528 match try!(self.inner.read(into)) {
529 0 => match self.finish_member() {
530 Ok(0) => Ok(0),
531 Ok(_) => self.read(into),
532 Err(e) => Err(e),
533 },
534 n => Ok(n),
535 }
536 }
537 }
538
539 impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>540 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
541 self.get_mut().write(buf)
542 }
543
flush(&mut self) -> io::Result<()>544 fn flush(&mut self) -> io::Result<()> {
545 self.get_mut().flush()
546 }
547 }
548