1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorithm in pure Rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49 
50 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
51 
52 #[cfg(all(test, feature = "benchmarks"))]
53 extern crate test as test_std;
54 
55 #[cfg(test)]
56 extern crate miniz_oxide;
57 
58 extern crate adler32;
59 extern crate byteorder;
60 #[cfg(feature = "gzip")]
61 extern crate gzip_header;
62 
63 mod bit_reverse;
64 mod bitstream;
65 mod chained_hash_table;
66 mod checksum;
67 mod compress;
68 mod compression_options;
69 mod deflate_state;
70 mod encoder_state;
71 mod huffman_lengths;
72 mod huffman_table;
73 mod input_buffer;
74 mod length_encode;
75 mod lz77;
76 mod lzvalue;
77 mod matching;
78 mod output_writer;
79 mod rle;
80 mod stored_block;
81 #[cfg(test)]
82 mod test_utils;
83 mod writer;
84 mod zlib;
85 
86 use std::io;
87 use std::io::Write;
88 
89 use byteorder::BigEndian;
90 #[cfg(feature = "gzip")]
91 use byteorder::LittleEndian;
92 #[cfg(feature = "gzip")]
93 use gzip_header::Crc;
94 #[cfg(feature = "gzip")]
95 use gzip_header::GzBuilder;
96 
97 use crate::checksum::RollingChecksum;
98 use crate::deflate_state::DeflateState;
99 
100 use crate::compress::Flush;
101 pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
102 pub use lz77::MatchingType;
103 
104 use crate::writer::compress_until_done;
105 
106 /// Encoders implementing a `Write` interface.
107 pub mod write {
108     #[cfg(feature = "gzip")]
109     pub use crate::writer::gzip::GzEncoder;
110     pub use crate::writer::{DeflateEncoder, ZlibEncoder};
111 }
112 
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>113 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
114     input: &[u8],
115     writer: &mut W,
116     mut checksum: RC,
117     compression_options: CompressionOptions,
118 ) -> io::Result<()> {
119     checksum.update_from_slice(input);
120     // We use a box here to avoid putting the buffers on the stack
121     // It's done here rather than in the structs themselves for now to
122     // keep the data close in memory.
123     let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
124     compress_until_done(input, &mut deflate_state, Flush::Finish)
125 }
126 
127 /// Compress the given slice of bytes with DEFLATE compression.
128 ///
129 /// Returns a `Vec<u8>` of the compressed data.
130 ///
131 /// # Examples
132 ///
133 /// ```
134 /// use deflate::{deflate_bytes_conf, Compression};
135 ///
136 /// let data = b"This is some test data";
137 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
138 /// # let _ = compressed_data;
139 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>140 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
141     let mut writer = Vec::with_capacity(input.len() / 3);
142     compress_data_dynamic(
143         input,
144         &mut writer,
145         checksum::NoChecksum::new(),
146         options.into(),
147     )
148     .expect("Write error!");
149     writer
150 }
151 
152 /// Compress the given slice of bytes with DEFLATE compression using the default compression
153 /// level.
154 ///
155 /// Returns a `Vec<u8>` of the compressed data.
156 ///
157 /// # Examples
158 ///
159 /// ```
160 /// use deflate::deflate_bytes;
161 ///
162 /// let data = b"This is some test data";
163 /// let compressed_data = deflate_bytes(data);
164 /// # let _ = compressed_data;
165 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>166 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
167     deflate_bytes_conf(input, Compression::Default)
168 }
169 
170 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
171 ///
172 /// Returns a `Vec<u8>` of the compressed data.
173 ///
174 /// Zlib dictionaries are not yet suppored.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use deflate::{deflate_bytes_zlib_conf, Compression};
180 ///
181 /// let data = b"This is some test data";
182 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
183 /// # let _ = compressed_data;
184 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>185 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
186     use byteorder::WriteBytesExt;
187     let mut writer = Vec::with_capacity(input.len() / 3);
188     // Write header
189     zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
190         .expect("Write error when writing zlib header!");
191 
192     let mut checksum = checksum::Adler32Checksum::new();
193     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
194         .expect("Write error when writing compressed data!");
195 
196     let hash = checksum.current_hash();
197 
198     writer
199         .write_u32::<BigEndian>(hash)
200         .expect("Write error when writing checksum!");
201     writer
202 }
203 
204 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
205 /// using the default compression level.
206 ///
207 /// Returns a Vec<u8> of the compressed data.
208 ///
209 /// Zlib dictionaries are not yet suppored.
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use deflate::deflate_bytes_zlib;
215 ///
216 /// let data = b"This is some test data";
217 /// let compressed_data = deflate_bytes_zlib(data);
218 /// # let _ = compressed_data;
219 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>220 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
221     deflate_bytes_zlib_conf(input, Compression::Default)
222 }
223 
224 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
225 /// using the given gzip header and compression options.
226 ///
227 /// Returns a `Vec<u8>` of the compressed data.
228 ///
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// extern crate gzip_header;
234 /// extern crate deflate;
235 ///
236 /// # fn main() {
237 /// use deflate::{deflate_bytes_gzip_conf, Compression};
238 /// use gzip_header::GzBuilder;
239 ///
240 /// let data = b"This is some test data";
241 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
242 /// # let _ = compressed_data;
243 /// # }
244 /// ```
245 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>246 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
247     input: &[u8],
248     options: O,
249     gzip_header: GzBuilder,
250 ) -> Vec<u8> {
251     use byteorder::WriteBytesExt;
252     let mut writer = Vec::with_capacity(input.len() / 3);
253 
254     // Write header
255     writer
256         .write_all(&gzip_header.into_header())
257         .expect("Write error when writing header!");
258     let mut checksum = checksum::NoChecksum::new();
259     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
260         .expect("Write error when writing compressed data!");
261 
262     let mut crc = Crc::new();
263     crc.update(input);
264 
265     writer
266         .write_u32::<LittleEndian>(crc.sum())
267         .expect("Write error when writing checksum!");
268     writer
269         .write_u32::<LittleEndian>(crc.amt_as_u32())
270         .expect("Write error when writing amt!");
271     writer
272 }
273 
274 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
275 /// using the default compression level, and a gzip header with default values.
276 ///
277 /// Returns a `Vec<u8>` of the compressed data.
278 ///
279 ///
280 /// # Examples
281 ///
282 /// ```
283 /// use deflate::deflate_bytes_gzip;
284 /// let data = b"This is some test data";
285 /// let compressed_data = deflate_bytes_gzip(data);
286 /// # let _ = compressed_data;
287 /// ```
288 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>289 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
290     deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
291 }
292 
293 #[cfg(test)]
294 mod test {
295     use super::*;
296     use std::io::Write;
297 
298     #[cfg(feature = "gzip")]
299     use test_utils::decompress_gzip;
300     use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
301 
302     type CO = CompressionOptions;
303 
304     /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)305     fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
306         for chunk in data.chunks(chunk_size) {
307             writer.write_all(&chunk).unwrap();
308         }
309     }
310 
311     #[test]
dynamic_string_mem()312     fn dynamic_string_mem() {
313         let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
314         let compressed = deflate_bytes(&test_data);
315 
316         assert!(compressed.len() < test_data.len());
317 
318         let result = decompress_to_end(&compressed);
319         assert_eq!(test_data, result);
320     }
321 
322     #[test]
dynamic_string_file()323     fn dynamic_string_file() {
324         let input = get_test_data();
325         let compressed = deflate_bytes(&input);
326 
327         let result = decompress_to_end(&compressed);
328         for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
329             if a != b {
330                 println!("First difference at {}, input: {}, output: {}", n, a, b);
331                 println!(
332                     "input: {:?}, output: {:?}",
333                     &input[n - 3..n + 3],
334                     &result[n - 3..n + 3]
335                 );
336                 break;
337             }
338         }
339         // Not using assert_eq here deliberately to avoid massive amounts of output spam
340         assert!(input == result);
341         // Check that we actually managed to compress the input
342         assert!(compressed.len() < input.len());
343     }
344 
345     #[test]
file_rle()346     fn file_rle() {
347         let input = get_test_data();
348         let compressed = deflate_bytes_conf(&input, CO::rle());
349 
350         let result = decompress_to_end(&compressed);
351         assert!(input == result);
352     }
353 
354     #[test]
file_zlib()355     fn file_zlib() {
356         let test_data = get_test_data();
357 
358         let compressed = deflate_bytes_zlib(&test_data);
359         // {
360         //     use std::fs::File;
361         //     use std::io::Write;
362         //     let mut f = File::create("out.zlib").unwrap();
363         //     f.write_all(&compressed).unwrap();
364         // }
365 
366         println!("file_zlib compressed(default) length: {}", compressed.len());
367 
368         let result = decompress_zlib(&compressed);
369 
370         assert!(&test_data == &result);
371         assert!(compressed.len() < test_data.len());
372     }
373 
374     #[test]
zlib_short()375     fn zlib_short() {
376         let test_data = [10, 10, 10, 10, 10, 55];
377         roundtrip_zlib(&test_data, CO::default());
378     }
379 
380     #[test]
zlib_last_block()381     fn zlib_last_block() {
382         let mut test_data = vec![22; 32768];
383         test_data.extend(&[5, 2, 55, 11, 12]);
384         roundtrip_zlib(&test_data, CO::default());
385     }
386 
387     #[test]
deflate_short()388     fn deflate_short() {
389         let test_data = [10, 10, 10, 10, 10, 55];
390         let compressed = deflate_bytes(&test_data);
391 
392         let result = decompress_to_end(&compressed);
393         assert_eq!(&test_data, result.as_slice());
394         // If block type and compression is selected correctly, this should only take 5 bytes.
395         assert_eq!(compressed.len(), 5);
396     }
397 
398     #[cfg(feature = "gzip")]
399     #[test]
gzip()400     fn gzip() {
401         let data = get_test_data();
402         let comment = b"Test";
403         let compressed = deflate_bytes_gzip_conf(
404             &data,
405             Compression::Default,
406             GzBuilder::new().comment(&comment[..]),
407         );
408         let (dec, decompressed) = decompress_gzip(&compressed);
409         assert_eq!(dec.comment().unwrap(), comment);
410         assert!(data == decompressed);
411     }
412 
chunk_test(chunk_size: usize, level: CompressionOptions)413     fn chunk_test(chunk_size: usize, level: CompressionOptions) {
414         let mut compressed = Vec::with_capacity(32000);
415         let data = get_test_data();
416         {
417             let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
418             chunked_write(&mut compressor, &data, chunk_size);
419             compressor.finish().unwrap();
420         }
421         let compressed2 = deflate_bytes_zlib_conf(&data, level);
422         let res = decompress_zlib(&compressed);
423         assert!(res == data);
424         assert_eq!(compressed.len(), compressed2.len());
425         assert!(compressed == compressed2);
426     }
427 
writer_chunks_level(level: CompressionOptions)428     fn writer_chunks_level(level: CompressionOptions) {
429         use input_buffer::BUFFER_SIZE;
430         let ct = |n| chunk_test(n, level);
431         ct(1);
432         ct(50);
433         ct(400);
434         ct(32768);
435         ct(BUFFER_SIZE);
436         ct(50000);
437         ct((32768 * 2) + 258);
438     }
439 
440     #[ignore]
441     #[test]
442     /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()443     fn zlib_writer_chunks() {
444         writer_chunks_level(CompressionOptions::default());
445         writer_chunks_level(CompressionOptions::fast());
446         writer_chunks_level(CompressionOptions::rle());
447     }
448 
449     /// Check that the frequency values don't overflow.
450     #[test]
frequency_overflow()451     fn frequency_overflow() {
452         let _ = deflate_bytes_conf(
453             &vec![5; 100000],
454             compression_options::CompressionOptions::default(),
455         );
456     }
457 
roundtrip_zlib(data: &[u8], level: CompressionOptions)458     fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
459         let compressed = deflate_bytes_zlib_conf(data, level);
460         let res = decompress_zlib(&compressed);
461         if data.len() <= 32 {
462             assert_eq!(res, data, "Failed with level: {:?}", level);
463         } else {
464             assert!(res == data, "Failed with level: {:?}", level);
465         }
466     }
467 
check_zero(level: CompressionOptions)468     fn check_zero(level: CompressionOptions) {
469         roundtrip_zlib(&[], level);
470     }
471 
472     /// Compress with an empty slice.
473     #[test]
empty_input()474     fn empty_input() {
475         check_zero(CompressionOptions::default());
476         check_zero(CompressionOptions::fast());
477         check_zero(CompressionOptions::rle());
478     }
479 
480     #[test]
one_and_two_values()481     fn one_and_two_values() {
482         let one = &[1][..];
483         roundtrip_zlib(one, CO::rle());
484         roundtrip_zlib(one, CO::fast());
485         roundtrip_zlib(one, CO::default());
486         let two = &[5, 6, 7, 8][..];
487         roundtrip_zlib(two, CO::rle());
488         roundtrip_zlib(two, CO::fast());
489         roundtrip_zlib(two, CO::default());
490     }
491 }
492