1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorithm in pure Rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49
50 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
51
52 #[cfg(all(test, feature = "benchmarks"))]
53 extern crate test as test_std;
54
55 #[cfg(test)]
56 extern crate miniz_oxide;
57
58 extern crate adler32;
59 extern crate byteorder;
60 #[cfg(feature = "gzip")]
61 extern crate gzip_header;
62
63 mod bit_reverse;
64 mod bitstream;
65 mod chained_hash_table;
66 mod checksum;
67 mod compress;
68 mod compression_options;
69 mod deflate_state;
70 mod encoder_state;
71 mod huffman_lengths;
72 mod huffman_table;
73 mod input_buffer;
74 mod length_encode;
75 mod lz77;
76 mod lzvalue;
77 mod matching;
78 mod output_writer;
79 mod rle;
80 mod stored_block;
81 #[cfg(test)]
82 mod test_utils;
83 mod writer;
84 mod zlib;
85
86 use std::io;
87 use std::io::Write;
88
89 use byteorder::BigEndian;
90 #[cfg(feature = "gzip")]
91 use byteorder::LittleEndian;
92 #[cfg(feature = "gzip")]
93 use gzip_header::Crc;
94 #[cfg(feature = "gzip")]
95 use gzip_header::GzBuilder;
96
97 use crate::checksum::RollingChecksum;
98 use crate::deflate_state::DeflateState;
99
100 use crate::compress::Flush;
101 pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
102 pub use lz77::MatchingType;
103
104 use crate::writer::compress_until_done;
105
106 /// Encoders implementing a `Write` interface.
107 pub mod write {
108 #[cfg(feature = "gzip")]
109 pub use crate::writer::gzip::GzEncoder;
110 pub use crate::writer::{DeflateEncoder, ZlibEncoder};
111 }
112
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>113 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
114 input: &[u8],
115 writer: &mut W,
116 mut checksum: RC,
117 compression_options: CompressionOptions,
118 ) -> io::Result<()> {
119 checksum.update_from_slice(input);
120 // We use a box here to avoid putting the buffers on the stack
121 // It's done here rather than in the structs themselves for now to
122 // keep the data close in memory.
123 let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
124 compress_until_done(input, &mut deflate_state, Flush::Finish)
125 }
126
127 /// Compress the given slice of bytes with DEFLATE compression.
128 ///
129 /// Returns a `Vec<u8>` of the compressed data.
130 ///
131 /// # Examples
132 ///
133 /// ```
134 /// use deflate::{deflate_bytes_conf, Compression};
135 ///
136 /// let data = b"This is some test data";
137 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
138 /// # let _ = compressed_data;
139 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>140 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
141 let mut writer = Vec::with_capacity(input.len() / 3);
142 compress_data_dynamic(
143 input,
144 &mut writer,
145 checksum::NoChecksum::new(),
146 options.into(),
147 )
148 .expect("Write error!");
149 writer
150 }
151
152 /// Compress the given slice of bytes with DEFLATE compression using the default compression
153 /// level.
154 ///
155 /// Returns a `Vec<u8>` of the compressed data.
156 ///
157 /// # Examples
158 ///
159 /// ```
160 /// use deflate::deflate_bytes;
161 ///
162 /// let data = b"This is some test data";
163 /// let compressed_data = deflate_bytes(data);
164 /// # let _ = compressed_data;
165 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>166 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
167 deflate_bytes_conf(input, Compression::Default)
168 }
169
170 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
171 ///
172 /// Returns a `Vec<u8>` of the compressed data.
173 ///
174 /// Zlib dictionaries are not yet suppored.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use deflate::{deflate_bytes_zlib_conf, Compression};
180 ///
181 /// let data = b"This is some test data";
182 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
183 /// # let _ = compressed_data;
184 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>185 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
186 use byteorder::WriteBytesExt;
187 let mut writer = Vec::with_capacity(input.len() / 3);
188 // Write header
189 zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
190 .expect("Write error when writing zlib header!");
191
192 let mut checksum = checksum::Adler32Checksum::new();
193 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
194 .expect("Write error when writing compressed data!");
195
196 let hash = checksum.current_hash();
197
198 writer
199 .write_u32::<BigEndian>(hash)
200 .expect("Write error when writing checksum!");
201 writer
202 }
203
204 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
205 /// using the default compression level.
206 ///
207 /// Returns a Vec<u8> of the compressed data.
208 ///
209 /// Zlib dictionaries are not yet suppored.
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use deflate::deflate_bytes_zlib;
215 ///
216 /// let data = b"This is some test data";
217 /// let compressed_data = deflate_bytes_zlib(data);
218 /// # let _ = compressed_data;
219 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>220 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
221 deflate_bytes_zlib_conf(input, Compression::Default)
222 }
223
224 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
225 /// using the given gzip header and compression options.
226 ///
227 /// Returns a `Vec<u8>` of the compressed data.
228 ///
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// extern crate gzip_header;
234 /// extern crate deflate;
235 ///
236 /// # fn main() {
237 /// use deflate::{deflate_bytes_gzip_conf, Compression};
238 /// use gzip_header::GzBuilder;
239 ///
240 /// let data = b"This is some test data";
241 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
242 /// # let _ = compressed_data;
243 /// # }
244 /// ```
245 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>246 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
247 input: &[u8],
248 options: O,
249 gzip_header: GzBuilder,
250 ) -> Vec<u8> {
251 use byteorder::WriteBytesExt;
252 let mut writer = Vec::with_capacity(input.len() / 3);
253
254 // Write header
255 writer
256 .write_all(&gzip_header.into_header())
257 .expect("Write error when writing header!");
258 let mut checksum = checksum::NoChecksum::new();
259 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
260 .expect("Write error when writing compressed data!");
261
262 let mut crc = Crc::new();
263 crc.update(input);
264
265 writer
266 .write_u32::<LittleEndian>(crc.sum())
267 .expect("Write error when writing checksum!");
268 writer
269 .write_u32::<LittleEndian>(crc.amt_as_u32())
270 .expect("Write error when writing amt!");
271 writer
272 }
273
274 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
275 /// using the default compression level, and a gzip header with default values.
276 ///
277 /// Returns a `Vec<u8>` of the compressed data.
278 ///
279 ///
280 /// # Examples
281 ///
282 /// ```
283 /// use deflate::deflate_bytes_gzip;
284 /// let data = b"This is some test data";
285 /// let compressed_data = deflate_bytes_gzip(data);
286 /// # let _ = compressed_data;
287 /// ```
288 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>289 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
290 deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
291 }
292
293 #[cfg(test)]
294 mod test {
295 use super::*;
296 use std::io::Write;
297
298 #[cfg(feature = "gzip")]
299 use test_utils::decompress_gzip;
300 use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
301
302 type CO = CompressionOptions;
303
304 /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)305 fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
306 for chunk in data.chunks(chunk_size) {
307 writer.write_all(&chunk).unwrap();
308 }
309 }
310
311 #[test]
dynamic_string_mem()312 fn dynamic_string_mem() {
313 let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
314 let compressed = deflate_bytes(&test_data);
315
316 assert!(compressed.len() < test_data.len());
317
318 let result = decompress_to_end(&compressed);
319 assert_eq!(test_data, result);
320 }
321
322 #[test]
dynamic_string_file()323 fn dynamic_string_file() {
324 let input = get_test_data();
325 let compressed = deflate_bytes(&input);
326
327 let result = decompress_to_end(&compressed);
328 for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
329 if a != b {
330 println!("First difference at {}, input: {}, output: {}", n, a, b);
331 println!(
332 "input: {:?}, output: {:?}",
333 &input[n - 3..n + 3],
334 &result[n - 3..n + 3]
335 );
336 break;
337 }
338 }
339 // Not using assert_eq here deliberately to avoid massive amounts of output spam
340 assert!(input == result);
341 // Check that we actually managed to compress the input
342 assert!(compressed.len() < input.len());
343 }
344
345 #[test]
file_rle()346 fn file_rle() {
347 let input = get_test_data();
348 let compressed = deflate_bytes_conf(&input, CO::rle());
349
350 let result = decompress_to_end(&compressed);
351 assert!(input == result);
352 }
353
354 #[test]
file_zlib()355 fn file_zlib() {
356 let test_data = get_test_data();
357
358 let compressed = deflate_bytes_zlib(&test_data);
359 // {
360 // use std::fs::File;
361 // use std::io::Write;
362 // let mut f = File::create("out.zlib").unwrap();
363 // f.write_all(&compressed).unwrap();
364 // }
365
366 println!("file_zlib compressed(default) length: {}", compressed.len());
367
368 let result = decompress_zlib(&compressed);
369
370 assert!(&test_data == &result);
371 assert!(compressed.len() < test_data.len());
372 }
373
374 #[test]
zlib_short()375 fn zlib_short() {
376 let test_data = [10, 10, 10, 10, 10, 55];
377 roundtrip_zlib(&test_data, CO::default());
378 }
379
380 #[test]
zlib_last_block()381 fn zlib_last_block() {
382 let mut test_data = vec![22; 32768];
383 test_data.extend(&[5, 2, 55, 11, 12]);
384 roundtrip_zlib(&test_data, CO::default());
385 }
386
387 #[test]
deflate_short()388 fn deflate_short() {
389 let test_data = [10, 10, 10, 10, 10, 55];
390 let compressed = deflate_bytes(&test_data);
391
392 let result = decompress_to_end(&compressed);
393 assert_eq!(&test_data, result.as_slice());
394 // If block type and compression is selected correctly, this should only take 5 bytes.
395 assert_eq!(compressed.len(), 5);
396 }
397
398 #[cfg(feature = "gzip")]
399 #[test]
gzip()400 fn gzip() {
401 let data = get_test_data();
402 let comment = b"Test";
403 let compressed = deflate_bytes_gzip_conf(
404 &data,
405 Compression::Default,
406 GzBuilder::new().comment(&comment[..]),
407 );
408 let (dec, decompressed) = decompress_gzip(&compressed);
409 assert_eq!(dec.comment().unwrap(), comment);
410 assert!(data == decompressed);
411 }
412
chunk_test(chunk_size: usize, level: CompressionOptions)413 fn chunk_test(chunk_size: usize, level: CompressionOptions) {
414 let mut compressed = Vec::with_capacity(32000);
415 let data = get_test_data();
416 {
417 let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
418 chunked_write(&mut compressor, &data, chunk_size);
419 compressor.finish().unwrap();
420 }
421 let compressed2 = deflate_bytes_zlib_conf(&data, level);
422 let res = decompress_zlib(&compressed);
423 assert!(res == data);
424 assert_eq!(compressed.len(), compressed2.len());
425 assert!(compressed == compressed2);
426 }
427
writer_chunks_level(level: CompressionOptions)428 fn writer_chunks_level(level: CompressionOptions) {
429 use input_buffer::BUFFER_SIZE;
430 let ct = |n| chunk_test(n, level);
431 ct(1);
432 ct(50);
433 ct(400);
434 ct(32768);
435 ct(BUFFER_SIZE);
436 ct(50000);
437 ct((32768 * 2) + 258);
438 }
439
440 #[ignore]
441 #[test]
442 /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()443 fn zlib_writer_chunks() {
444 writer_chunks_level(CompressionOptions::default());
445 writer_chunks_level(CompressionOptions::fast());
446 writer_chunks_level(CompressionOptions::rle());
447 }
448
449 /// Check that the frequency values don't overflow.
450 #[test]
frequency_overflow()451 fn frequency_overflow() {
452 let _ = deflate_bytes_conf(
453 &vec![5; 100000],
454 compression_options::CompressionOptions::default(),
455 );
456 }
457
roundtrip_zlib(data: &[u8], level: CompressionOptions)458 fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
459 let compressed = deflate_bytes_zlib_conf(data, level);
460 let res = decompress_zlib(&compressed);
461 if data.len() <= 32 {
462 assert_eq!(res, data, "Failed with level: {:?}", level);
463 } else {
464 assert!(res == data, "Failed with level: {:?}", level);
465 }
466 }
467
check_zero(level: CompressionOptions)468 fn check_zero(level: CompressionOptions) {
469 roundtrip_zlib(&[], level);
470 }
471
472 /// Compress with an empty slice.
473 #[test]
empty_input()474 fn empty_input() {
475 check_zero(CompressionOptions::default());
476 check_zero(CompressionOptions::fast());
477 check_zero(CompressionOptions::rle());
478 }
479
480 #[test]
one_and_two_values()481 fn one_and_two_values() {
482 let one = &[1][..];
483 roundtrip_zlib(one, CO::rle());
484 roundtrip_zlib(one, CO::fast());
485 roundtrip_zlib(one, CO::default());
486 let two = &[5, 6, 7, 8][..];
487 roundtrip_zlib(two, CO::rle());
488 roundtrip_zlib(two, CO::fast());
489 roundtrip_zlib(two, CO::default());
490 }
491 }
492