1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorightm in pure rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default,
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49 
50 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
51 
52 #[cfg(all(test, feature = "benchmarks"))]
53 extern crate test as test_std;
54 
55 #[cfg(test)]
56 extern crate flate2;
57 // #[cfg(test)]
58 // extern crate inflate;
59 
60 extern crate adler32;
61 extern crate byteorder;
62 #[cfg(feature = "gzip")]
63 extern crate gzip_header;
64 
65 mod compression_options;
66 mod huffman_table;
67 mod lz77;
68 mod lzvalue;
69 mod chained_hash_table;
70 mod length_encode;
71 mod output_writer;
72 mod stored_block;
73 mod huffman_lengths;
74 mod zlib;
75 mod checksum;
76 mod bit_reverse;
77 mod bitstream;
78 mod encoder_state;
79 mod matching;
80 mod input_buffer;
81 mod deflate_state;
82 mod compress;
83 mod rle;
84 mod writer;
85 #[cfg(test)]
86 mod test_utils;
87 
88 use std::io::Write;
89 use std::io;
90 
91 use byteorder::BigEndian;
92 #[cfg(feature = "gzip")]
93 use gzip_header::GzBuilder;
94 #[cfg(feature = "gzip")]
95 use gzip_header::Crc;
96 #[cfg(feature = "gzip")]
97 use byteorder::LittleEndian;
98 
99 use checksum::RollingChecksum;
100 use deflate_state::DeflateState;
101 
102 pub use compression_options::{CompressionOptions, SpecialOptions, Compression};
103 use compress::Flush;
104 pub use lz77::MatchingType;
105 
106 use writer::compress_until_done;
107 
108 /// Encoders implementing a `Write` interface.
109 pub mod write {
110     pub use writer::{DeflateEncoder, ZlibEncoder};
111     #[cfg(feature = "gzip")]
112     pub use writer::gzip::GzEncoder;
113 }
114 
115 
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>116 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
117     input: &[u8],
118     writer: &mut W,
119     mut checksum: RC,
120     compression_options: CompressionOptions,
121 ) -> io::Result<()> {
122     checksum.update_from_slice(input);
123     // We use a box here to avoid putting the buffers on the stack
124     // It's done here rather than in the structs themselves for now to
125     // keep the data close in memory.
126     let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
127     compress_until_done(input, &mut deflate_state, Flush::Finish)
128 }
129 
130 /// Compress the given slice of bytes with DEFLATE compression.
131 ///
132 /// Returns a `Vec<u8>` of the compressed data.
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// use deflate::{deflate_bytes_conf, Compression};
138 ///
139 /// let data = b"This is some test data";
140 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
141 /// # let _ = compressed_data;
142 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>143 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
144     let mut writer = Vec::with_capacity(input.len() / 3);
145     compress_data_dynamic(
146         input,
147         &mut writer,
148         checksum::NoChecksum::new(),
149         options.into(),
150     ).expect("Write error!");
151     writer
152 }
153 
154 /// Compress the given slice of bytes with DEFLATE compression using the default compression
155 /// level.
156 ///
157 /// Returns a `Vec<u8>` of the compressed data.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use deflate::deflate_bytes;
163 ///
164 /// let data = b"This is some test data";
165 /// let compressed_data = deflate_bytes(data);
166 /// # let _ = compressed_data;
167 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>168 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
169     deflate_bytes_conf(input, Compression::Default)
170 }
171 
172 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
173 ///
174 /// Returns a `Vec<u8>` of the compressed data.
175 ///
176 /// Zlib dictionaries are not yet suppored.
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// use deflate::{deflate_bytes_zlib_conf, Compression};
182 ///
183 /// let data = b"This is some test data";
184 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
185 /// # let _ = compressed_data;
186 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>187 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
188     use byteorder::WriteBytesExt;
189     let mut writer = Vec::with_capacity(input.len() / 3);
190     // Write header
191     zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
192         .expect("Write error when writing zlib header!");
193 
194     let mut checksum = checksum::Adler32Checksum::new();
195     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
196         .expect("Write error when writing compressed data!");
197 
198     let hash = checksum.current_hash();
199 
200     writer
201         .write_u32::<BigEndian>(hash)
202         .expect("Write error when writing checksum!");
203     writer
204 }
205 
206 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
207 /// using the default compression level.
208 ///
209 /// Returns a Vec<u8> of the compressed data.
210 ///
211 /// Zlib dictionaries are not yet suppored.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// use deflate::deflate_bytes_zlib;
217 ///
218 /// let data = b"This is some test data";
219 /// let compressed_data = deflate_bytes_zlib(data);
220 /// # let _ = compressed_data;
221 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>222 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
223     deflate_bytes_zlib_conf(input, Compression::Default)
224 }
225 
226 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
227 /// using the given gzip header and compression options.
228 ///
229 /// Returns a `Vec<u8>` of the compressed data.
230 ///
231 ///
232 /// # Examples
233 ///
234 /// ```
235 /// extern crate gzip_header;
236 /// extern crate deflate;
237 ///
238 /// # fn main() {
239 /// use deflate::{deflate_bytes_gzip_conf, Compression};
240 /// use gzip_header::GzBuilder;
241 ///
242 /// let data = b"This is some test data";
243 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
244 /// # let _ = compressed_data;
245 /// # }
246 /// ```
247 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>248 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
249     input: &[u8],
250     options: O,
251     gzip_header: GzBuilder,
252 ) -> Vec<u8> {
253     use byteorder::WriteBytesExt;
254     let mut writer = Vec::with_capacity(input.len() / 3);
255 
256     // Write header
257     writer
258         .write_all(&gzip_header.into_header())
259         .expect("Write error when writing header!");
260     let mut checksum = checksum::NoChecksum::new();
261     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
262         .expect("Write error when writing compressed data!");
263 
264     let mut crc = Crc::new();
265     crc.update(input);
266 
267     writer
268         .write_u32::<LittleEndian>(crc.sum())
269         .expect("Write error when writing checksum!");
270     writer
271         .write_u32::<LittleEndian>(crc.amt_as_u32())
272         .expect("Write error when writing amt!");
273     writer
274 }
275 
276 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
277 /// using the default compression level, and a gzip header with default values.
278 ///
279 /// Returns a `Vec<u8>` of the compressed data.
280 ///
281 ///
282 /// # Examples
283 ///
284 /// ```
285 /// use deflate::deflate_bytes_gzip;
286 /// let data = b"This is some test data";
287 /// let compressed_data = deflate_bytes_gzip(data);
288 /// # let _ = compressed_data;
289 /// ```
290 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>291 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
292     deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
293 }
294 
295 #[cfg(test)]
296 mod test {
297     use super::*;
298     use std::io::Write;
299 
300     use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
301     #[cfg(feature = "gzip")]
302     use test_utils::decompress_gzip;
303 
304     type CO = CompressionOptions;
305 
306     /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)307     fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
308         for chunk in data.chunks(chunk_size) {
309             writer.write_all(&chunk).unwrap();
310         }
311     }
312 
313     #[test]
dynamic_string_mem()314     fn dynamic_string_mem() {
315         let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
316         let compressed = deflate_bytes(&test_data);
317 
318         assert!(compressed.len() < test_data.len());
319 
320         let result = decompress_to_end(&compressed);
321         assert_eq!(test_data, result);
322     }
323 
324     #[test]
dynamic_string_file()325     fn dynamic_string_file() {
326         let input = get_test_data();
327         let compressed = deflate_bytes(&input);
328 
329         let result = decompress_to_end(&compressed);
330         for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
331             if a != b {
332                 println!("First difference at {}, input: {}, output: {}", n, a, b);
333                 println!(
334                     "input: {:?}, output: {:?}",
335                     &input[n - 3..n + 3],
336                     &result[n - 3..n + 3]
337                 );
338                 break;
339             }
340         }
341         // Not using assert_eq here deliberately to avoid massive amounts of output spam
342         assert!(input == result);
343         // Check that we actually managed to compress the input
344         assert!(compressed.len() < input.len());
345     }
346 
347     #[test]
file_rle()348     fn file_rle() {
349         let input = get_test_data();
350         let compressed = deflate_bytes_conf(&input, CO::rle());
351 
352         let result = decompress_to_end(&compressed);
353         assert!(input == result);
354     }
355 
356     #[test]
file_zlib()357     fn file_zlib() {
358         let test_data = get_test_data();
359 
360         let compressed = deflate_bytes_zlib(&test_data);
361         // {
362         //     use std::fs::File;
363         //     use std::io::Write;
364         //     let mut f = File::create("out.zlib").unwrap();
365         //     f.write_all(&compressed).unwrap();
366         // }
367 
368         println!("file_zlib compressed(default) length: {}", compressed.len());
369 
370         let result = decompress_zlib(&compressed);
371 
372         assert!(&test_data == &result);
373         assert!(compressed.len() < test_data.len());
374     }
375 
376     #[test]
zlib_short()377     fn zlib_short() {
378         let test_data = [10, 10, 10, 10, 10, 55];
379         roundtrip_zlib(&test_data, CO::default());
380     }
381 
382     #[test]
zlib_last_block()383     fn zlib_last_block() {
384         let mut test_data = vec![22; 32768];
385         test_data.extend(&[5, 2, 55, 11, 12]);
386         roundtrip_zlib(&test_data, CO::default());
387     }
388 
389     #[test]
deflate_short()390     fn deflate_short() {
391         let test_data = [10, 10, 10, 10, 10, 55];
392         let compressed = deflate_bytes(&test_data);
393 
394         let result = decompress_to_end(&compressed);
395         assert_eq!(&test_data, result.as_slice());
396         // If block type and compression is selected correctly, this should only take 5 bytes.
397         assert_eq!(compressed.len(), 5);
398     }
399 
400     #[cfg(feature = "gzip")]
401     #[test]
gzip()402     fn gzip() {
403         let data = get_test_data();
404         let comment = b"Test";
405         let compressed = deflate_bytes_gzip_conf(
406             &data,
407             Compression::Default,
408             GzBuilder::new().comment(&comment[..]),
409         );
410         let (dec, decompressed) = decompress_gzip(&compressed);
411         assert_eq!(dec.header().comment().unwrap(), comment);
412         assert!(data == decompressed);
413     }
414 
chunk_test(chunk_size: usize, level: CompressionOptions)415     fn chunk_test(chunk_size: usize, level: CompressionOptions) {
416         let mut compressed = Vec::with_capacity(32000);
417         let data = get_test_data();
418         {
419             let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
420             chunked_write(&mut compressor, &data, chunk_size);
421             compressor.finish().unwrap();
422         }
423         let compressed2 = deflate_bytes_zlib_conf(&data, level);
424         let res = decompress_zlib(&compressed);
425         assert!(res == data);
426         assert_eq!(compressed.len(), compressed2.len());
427         assert!(compressed == compressed2);
428     }
429 
writer_chunks_level(level: CompressionOptions)430     fn writer_chunks_level(level: CompressionOptions) {
431         use input_buffer::BUFFER_SIZE;
432         let ct = |n| chunk_test(n, level);
433         ct(1);
434         ct(50);
435         ct(400);
436         ct(32768);
437         ct(BUFFER_SIZE);
438         ct(50000);
439         ct((32768 * 2) + 258);
440     }
441 
442     #[ignore]
443     #[test]
444     /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()445     fn zlib_writer_chunks() {
446         writer_chunks_level(CompressionOptions::default());
447         writer_chunks_level(CompressionOptions::fast());
448         writer_chunks_level(CompressionOptions::rle());
449     }
450 
451     /// Check that the frequency values don't overflow.
452     #[test]
frequency_overflow()453     fn frequency_overflow() {
454         let _ = deflate_bytes_conf(
455             &vec![5; 100000],
456             compression_options::CompressionOptions::default(),
457         );
458     }
459 
roundtrip_zlib(data: &[u8], level: CompressionOptions)460     fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
461         let compressed = deflate_bytes_zlib_conf(data, level);
462         let res = decompress_zlib(&compressed);
463         if data.len() <= 32 {
464             assert_eq!(res, data, "Failed with level: {:?}", level);
465         } else {
466             assert!(res == data, "Failed with level: {:?}", level);
467         }
468     }
469 
check_zero(level: CompressionOptions)470     fn check_zero(level: CompressionOptions) {
471         roundtrip_zlib(&[], level);
472     }
473 
474     /// Compress with an empty slice.
475     #[test]
empty_input()476     fn empty_input() {
477         check_zero(CompressionOptions::default());
478         check_zero(CompressionOptions::fast());
479         check_zero(CompressionOptions::rle());
480     }
481 
482     #[test]
one_and_two_values()483     fn one_and_two_values() {
484         let one = &[1][..];
485         roundtrip_zlib(one, CO::rle());
486         roundtrip_zlib(one, CO::fast());
487         roundtrip_zlib(one, CO::default());
488         let two = &[5, 6, 7, 8][..];
489         roundtrip_zlib(two, CO::rle());
490         roundtrip_zlib(two, CO::fast());
491         roundtrip_zlib(two, CO::default());
492     }
493 
494 
495 }
496