1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorithm in pure Rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49 
50 #![forbid(unsafe_code)]
51 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
52 
53 #[cfg(all(test, feature = "benchmarks"))]
54 extern crate test as test_std;
55 
56 #[cfg(test)]
57 extern crate miniz_oxide;
58 
59 extern crate adler32;
60 extern crate byteorder;
61 #[cfg(feature = "gzip")]
62 extern crate gzip_header;
63 
64 mod bit_reverse;
65 mod bitstream;
66 mod chained_hash_table;
67 mod checksum;
68 mod compress;
69 mod compression_options;
70 mod deflate_state;
71 mod encoder_state;
72 mod huffman_lengths;
73 mod huffman_table;
74 mod input_buffer;
75 mod length_encode;
76 mod lz77;
77 mod lzvalue;
78 mod matching;
79 mod output_writer;
80 mod rle;
81 mod stored_block;
82 #[cfg(test)]
83 mod test_utils;
84 mod writer;
85 mod zlib;
86 
87 use std::io;
88 use std::io::Write;
89 
90 use byteorder::BigEndian;
91 #[cfg(feature = "gzip")]
92 use byteorder::LittleEndian;
93 #[cfg(feature = "gzip")]
94 use gzip_header::Crc;
95 #[cfg(feature = "gzip")]
96 use gzip_header::GzBuilder;
97 
98 use crate::checksum::RollingChecksum;
99 use crate::deflate_state::DeflateState;
100 
101 use crate::compress::Flush;
102 pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
103 pub use lz77::MatchingType;
104 
105 use crate::writer::compress_until_done;
106 
107 /// Encoders implementing a `Write` interface.
108 pub mod write {
109     #[cfg(feature = "gzip")]
110     pub use crate::writer::gzip::GzEncoder;
111     pub use crate::writer::{DeflateEncoder, ZlibEncoder};
112 }
113 
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>114 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
115     input: &[u8],
116     writer: &mut W,
117     mut checksum: RC,
118     compression_options: CompressionOptions,
119 ) -> io::Result<()> {
120     checksum.update_from_slice(input);
121     // We use a box here to avoid putting the buffers on the stack
122     // It's done here rather than in the structs themselves for now to
123     // keep the data close in memory.
124     let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
125     compress_until_done(input, &mut deflate_state, Flush::Finish)
126 }
127 
128 /// Compress the given slice of bytes with DEFLATE compression.
129 ///
130 /// Returns a `Vec<u8>` of the compressed data.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// use deflate::{deflate_bytes_conf, Compression};
136 ///
137 /// let data = b"This is some test data";
138 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
139 /// # let _ = compressed_data;
140 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>141 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
142     let mut writer = Vec::with_capacity(input.len() / 3);
143     compress_data_dynamic(
144         input,
145         &mut writer,
146         checksum::NoChecksum::new(),
147         options.into(),
148     )
149     .expect("Write error!");
150     writer
151 }
152 
153 /// Compress the given slice of bytes with DEFLATE compression using the default compression
154 /// level.
155 ///
156 /// Returns a `Vec<u8>` of the compressed data.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use deflate::deflate_bytes;
162 ///
163 /// let data = b"This is some test data";
164 /// let compressed_data = deflate_bytes(data);
165 /// # let _ = compressed_data;
166 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>167 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
168     deflate_bytes_conf(input, Compression::Default)
169 }
170 
171 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
172 ///
173 /// Returns a `Vec<u8>` of the compressed data.
174 ///
175 /// Zlib dictionaries are not yet suppored.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use deflate::{deflate_bytes_zlib_conf, Compression};
181 ///
182 /// let data = b"This is some test data";
183 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
184 /// # let _ = compressed_data;
185 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>186 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
187     use byteorder::WriteBytesExt;
188     let mut writer = Vec::with_capacity(input.len() / 3);
189     // Write header
190     zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
191         .expect("Write error when writing zlib header!");
192 
193     let mut checksum = checksum::Adler32Checksum::new();
194     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
195         .expect("Write error when writing compressed data!");
196 
197     let hash = checksum.current_hash();
198 
199     writer
200         .write_u32::<BigEndian>(hash)
201         .expect("Write error when writing checksum!");
202     writer
203 }
204 
205 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
206 /// using the default compression level.
207 ///
208 /// Returns a Vec<u8> of the compressed data.
209 ///
210 /// Zlib dictionaries are not yet suppored.
211 ///
212 /// # Examples
213 ///
214 /// ```
215 /// use deflate::deflate_bytes_zlib;
216 ///
217 /// let data = b"This is some test data";
218 /// let compressed_data = deflate_bytes_zlib(data);
219 /// # let _ = compressed_data;
220 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>221 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
222     deflate_bytes_zlib_conf(input, Compression::Default)
223 }
224 
225 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
226 /// using the given gzip header and compression options.
227 ///
228 /// Returns a `Vec<u8>` of the compressed data.
229 ///
230 ///
231 /// # Examples
232 ///
233 /// ```
234 /// extern crate gzip_header;
235 /// extern crate deflate;
236 ///
237 /// # fn main() {
238 /// use deflate::{deflate_bytes_gzip_conf, Compression};
239 /// use gzip_header::GzBuilder;
240 ///
241 /// let data = b"This is some test data";
242 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
243 /// # let _ = compressed_data;
244 /// # }
245 /// ```
246 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>247 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
248     input: &[u8],
249     options: O,
250     gzip_header: GzBuilder,
251 ) -> Vec<u8> {
252     use byteorder::WriteBytesExt;
253     let mut writer = Vec::with_capacity(input.len() / 3);
254 
255     // Write header
256     writer
257         .write_all(&gzip_header.into_header())
258         .expect("Write error when writing header!");
259     let mut checksum = checksum::NoChecksum::new();
260     compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
261         .expect("Write error when writing compressed data!");
262 
263     let mut crc = Crc::new();
264     crc.update(input);
265 
266     writer
267         .write_u32::<LittleEndian>(crc.sum())
268         .expect("Write error when writing checksum!");
269     writer
270         .write_u32::<LittleEndian>(crc.amt_as_u32())
271         .expect("Write error when writing amt!");
272     writer
273 }
274 
275 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
276 /// using the default compression level, and a gzip header with default values.
277 ///
278 /// Returns a `Vec<u8>` of the compressed data.
279 ///
280 ///
281 /// # Examples
282 ///
283 /// ```
284 /// use deflate::deflate_bytes_gzip;
285 /// let data = b"This is some test data";
286 /// let compressed_data = deflate_bytes_gzip(data);
287 /// # let _ = compressed_data;
288 /// ```
289 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>290 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
291     deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
292 }
293 
294 #[cfg(test)]
295 mod test {
296     use super::*;
297     use std::io::Write;
298 
299     #[cfg(feature = "gzip")]
300     use test_utils::decompress_gzip;
301     use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
302 
303     type CO = CompressionOptions;
304 
305     /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)306     fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
307         for chunk in data.chunks(chunk_size) {
308             writer.write_all(&chunk).unwrap();
309         }
310     }
311 
312     #[test]
dynamic_string_mem()313     fn dynamic_string_mem() {
314         let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
315         let compressed = deflate_bytes(&test_data);
316 
317         assert!(compressed.len() < test_data.len());
318 
319         let result = decompress_to_end(&compressed);
320         assert_eq!(test_data, result);
321     }
322 
323     #[test]
dynamic_string_file()324     fn dynamic_string_file() {
325         let input = get_test_data();
326         let compressed = deflate_bytes(&input);
327 
328         let result = decompress_to_end(&compressed);
329         for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
330             if a != b {
331                 println!("First difference at {}, input: {}, output: {}", n, a, b);
332                 println!(
333                     "input: {:?}, output: {:?}",
334                     &input[n - 3..n + 3],
335                     &result[n - 3..n + 3]
336                 );
337                 break;
338             }
339         }
340         // Not using assert_eq here deliberately to avoid massive amounts of output spam
341         assert!(input == result);
342         // Check that we actually managed to compress the input
343         assert!(compressed.len() < input.len());
344     }
345 
346     #[test]
file_rle()347     fn file_rle() {
348         let input = get_test_data();
349         let compressed = deflate_bytes_conf(&input, CO::rle());
350 
351         let result = decompress_to_end(&compressed);
352         assert!(input == result);
353     }
354 
355     #[test]
file_zlib()356     fn file_zlib() {
357         let test_data = get_test_data();
358 
359         let compressed = deflate_bytes_zlib(&test_data);
360         // {
361         //     use std::fs::File;
362         //     use std::io::Write;
363         //     let mut f = File::create("out.zlib").unwrap();
364         //     f.write_all(&compressed).unwrap();
365         // }
366 
367         println!("file_zlib compressed(default) length: {}", compressed.len());
368 
369         let result = decompress_zlib(&compressed);
370 
371         assert!(&test_data == &result);
372         assert!(compressed.len() < test_data.len());
373     }
374 
375     #[test]
zlib_short()376     fn zlib_short() {
377         let test_data = [10, 10, 10, 10, 10, 55];
378         roundtrip_zlib(&test_data, CO::default());
379     }
380 
381     #[test]
zlib_last_block()382     fn zlib_last_block() {
383         let mut test_data = vec![22; 32768];
384         test_data.extend(&[5, 2, 55, 11, 12]);
385         roundtrip_zlib(&test_data, CO::default());
386     }
387 
388     #[test]
deflate_short()389     fn deflate_short() {
390         let test_data = [10, 10, 10, 10, 10, 55];
391         let compressed = deflate_bytes(&test_data);
392 
393         let result = decompress_to_end(&compressed);
394         assert_eq!(&test_data, result.as_slice());
395         // If block type and compression is selected correctly, this should only take 5 bytes.
396         assert_eq!(compressed.len(), 5);
397     }
398 
399     #[cfg(feature = "gzip")]
400     #[test]
gzip()401     fn gzip() {
402         let data = get_test_data();
403         let comment = b"Test";
404         let compressed = deflate_bytes_gzip_conf(
405             &data,
406             Compression::Default,
407             GzBuilder::new().comment(&comment[..]),
408         );
409         let (dec, decompressed) = decompress_gzip(&compressed);
410         assert_eq!(dec.comment().unwrap(), comment);
411         assert!(data == decompressed);
412     }
413 
chunk_test(chunk_size: usize, level: CompressionOptions)414     fn chunk_test(chunk_size: usize, level: CompressionOptions) {
415         let mut compressed = Vec::with_capacity(32000);
416         let data = get_test_data();
417         {
418             let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
419             chunked_write(&mut compressor, &data, chunk_size);
420             compressor.finish().unwrap();
421         }
422         let compressed2 = deflate_bytes_zlib_conf(&data, level);
423         let res = decompress_zlib(&compressed);
424         assert!(res == data);
425         assert_eq!(compressed.len(), compressed2.len());
426         assert!(compressed == compressed2);
427     }
428 
writer_chunks_level(level: CompressionOptions)429     fn writer_chunks_level(level: CompressionOptions) {
430         use input_buffer::BUFFER_SIZE;
431         let ct = |n| chunk_test(n, level);
432         ct(1);
433         ct(50);
434         ct(400);
435         ct(32768);
436         ct(BUFFER_SIZE);
437         ct(50000);
438         ct((32768 * 2) + 258);
439     }
440 
441     #[ignore]
442     #[test]
443     /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()444     fn zlib_writer_chunks() {
445         writer_chunks_level(CompressionOptions::default());
446         writer_chunks_level(CompressionOptions::fast());
447         writer_chunks_level(CompressionOptions::rle());
448     }
449 
450     /// Check that the frequency values don't overflow.
451     #[test]
frequency_overflow()452     fn frequency_overflow() {
453         let _ = deflate_bytes_conf(
454             &vec![5; 100000],
455             compression_options::CompressionOptions::default(),
456         );
457     }
458 
roundtrip_zlib(data: &[u8], level: CompressionOptions)459     fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
460         let compressed = deflate_bytes_zlib_conf(data, level);
461         let res = decompress_zlib(&compressed);
462         if data.len() <= 32 {
463             assert_eq!(res, data, "Failed with level: {:?}", level);
464         } else {
465             assert!(res == data, "Failed with level: {:?}", level);
466         }
467     }
468 
check_zero(level: CompressionOptions)469     fn check_zero(level: CompressionOptions) {
470         roundtrip_zlib(&[], level);
471     }
472 
473     /// Compress with an empty slice.
474     #[test]
empty_input()475     fn empty_input() {
476         check_zero(CompressionOptions::default());
477         check_zero(CompressionOptions::fast());
478         check_zero(CompressionOptions::rle());
479     }
480 
481     #[test]
one_and_two_values()482     fn one_and_two_values() {
483         let one = &[1][..];
484         roundtrip_zlib(one, CO::rle());
485         roundtrip_zlib(one, CO::fast());
486         roundtrip_zlib(one, CO::default());
487         let two = &[5, 6, 7, 8][..];
488         roundtrip_zlib(two, CO::rle());
489         roundtrip_zlib(two, CO::fast());
490         roundtrip_zlib(two, CO::default());
491     }
492 }
493