1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorithm in pure Rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49
50 #![forbid(unsafe_code)]
51 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
52
53 #[cfg(all(test, feature = "benchmarks"))]
54 extern crate test as test_std;
55
56 #[cfg(test)]
57 extern crate miniz_oxide;
58
59 extern crate adler32;
60 extern crate byteorder;
61 #[cfg(feature = "gzip")]
62 extern crate gzip_header;
63
64 mod bit_reverse;
65 mod bitstream;
66 mod chained_hash_table;
67 mod checksum;
68 mod compress;
69 mod compression_options;
70 mod deflate_state;
71 mod encoder_state;
72 mod huffman_lengths;
73 mod huffman_table;
74 mod input_buffer;
75 mod length_encode;
76 mod lz77;
77 mod lzvalue;
78 mod matching;
79 mod output_writer;
80 mod rle;
81 mod stored_block;
82 #[cfg(test)]
83 mod test_utils;
84 mod writer;
85 mod zlib;
86
87 use std::io;
88 use std::io::Write;
89
90 use byteorder::BigEndian;
91 #[cfg(feature = "gzip")]
92 use byteorder::LittleEndian;
93 #[cfg(feature = "gzip")]
94 use gzip_header::Crc;
95 #[cfg(feature = "gzip")]
96 use gzip_header::GzBuilder;
97
98 use crate::checksum::RollingChecksum;
99 use crate::deflate_state::DeflateState;
100
101 use crate::compress::Flush;
102 pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
103 pub use lz77::MatchingType;
104
105 use crate::writer::compress_until_done;
106
107 /// Encoders implementing a `Write` interface.
108 pub mod write {
109 #[cfg(feature = "gzip")]
110 pub use crate::writer::gzip::GzEncoder;
111 pub use crate::writer::{DeflateEncoder, ZlibEncoder};
112 }
113
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>114 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
115 input: &[u8],
116 writer: &mut W,
117 mut checksum: RC,
118 compression_options: CompressionOptions,
119 ) -> io::Result<()> {
120 checksum.update_from_slice(input);
121 // We use a box here to avoid putting the buffers on the stack
122 // It's done here rather than in the structs themselves for now to
123 // keep the data close in memory.
124 let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
125 compress_until_done(input, &mut deflate_state, Flush::Finish)
126 }
127
128 /// Compress the given slice of bytes with DEFLATE compression.
129 ///
130 /// Returns a `Vec<u8>` of the compressed data.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// use deflate::{deflate_bytes_conf, Compression};
136 ///
137 /// let data = b"This is some test data";
138 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
139 /// # let _ = compressed_data;
140 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>141 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
142 let mut writer = Vec::with_capacity(input.len() / 3);
143 compress_data_dynamic(
144 input,
145 &mut writer,
146 checksum::NoChecksum::new(),
147 options.into(),
148 )
149 .expect("Write error!");
150 writer
151 }
152
153 /// Compress the given slice of bytes with DEFLATE compression using the default compression
154 /// level.
155 ///
156 /// Returns a `Vec<u8>` of the compressed data.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use deflate::deflate_bytes;
162 ///
163 /// let data = b"This is some test data";
164 /// let compressed_data = deflate_bytes(data);
165 /// # let _ = compressed_data;
166 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>167 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
168 deflate_bytes_conf(input, Compression::Default)
169 }
170
171 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
172 ///
173 /// Returns a `Vec<u8>` of the compressed data.
174 ///
175 /// Zlib dictionaries are not yet suppored.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use deflate::{deflate_bytes_zlib_conf, Compression};
181 ///
182 /// let data = b"This is some test data";
183 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
184 /// # let _ = compressed_data;
185 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>186 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
187 use byteorder::WriteBytesExt;
188 let mut writer = Vec::with_capacity(input.len() / 3);
189 // Write header
190 zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
191 .expect("Write error when writing zlib header!");
192
193 let mut checksum = checksum::Adler32Checksum::new();
194 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
195 .expect("Write error when writing compressed data!");
196
197 let hash = checksum.current_hash();
198
199 writer
200 .write_u32::<BigEndian>(hash)
201 .expect("Write error when writing checksum!");
202 writer
203 }
204
205 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
206 /// using the default compression level.
207 ///
208 /// Returns a Vec<u8> of the compressed data.
209 ///
210 /// Zlib dictionaries are not yet suppored.
211 ///
212 /// # Examples
213 ///
214 /// ```
215 /// use deflate::deflate_bytes_zlib;
216 ///
217 /// let data = b"This is some test data";
218 /// let compressed_data = deflate_bytes_zlib(data);
219 /// # let _ = compressed_data;
220 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>221 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
222 deflate_bytes_zlib_conf(input, Compression::Default)
223 }
224
225 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
226 /// using the given gzip header and compression options.
227 ///
228 /// Returns a `Vec<u8>` of the compressed data.
229 ///
230 ///
231 /// # Examples
232 ///
233 /// ```
234 /// extern crate gzip_header;
235 /// extern crate deflate;
236 ///
237 /// # fn main() {
238 /// use deflate::{deflate_bytes_gzip_conf, Compression};
239 /// use gzip_header::GzBuilder;
240 ///
241 /// let data = b"This is some test data";
242 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
243 /// # let _ = compressed_data;
244 /// # }
245 /// ```
246 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>247 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
248 input: &[u8],
249 options: O,
250 gzip_header: GzBuilder,
251 ) -> Vec<u8> {
252 use byteorder::WriteBytesExt;
253 let mut writer = Vec::with_capacity(input.len() / 3);
254
255 // Write header
256 writer
257 .write_all(&gzip_header.into_header())
258 .expect("Write error when writing header!");
259 let mut checksum = checksum::NoChecksum::new();
260 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
261 .expect("Write error when writing compressed data!");
262
263 let mut crc = Crc::new();
264 crc.update(input);
265
266 writer
267 .write_u32::<LittleEndian>(crc.sum())
268 .expect("Write error when writing checksum!");
269 writer
270 .write_u32::<LittleEndian>(crc.amt_as_u32())
271 .expect("Write error when writing amt!");
272 writer
273 }
274
275 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
276 /// using the default compression level, and a gzip header with default values.
277 ///
278 /// Returns a `Vec<u8>` of the compressed data.
279 ///
280 ///
281 /// # Examples
282 ///
283 /// ```
284 /// use deflate::deflate_bytes_gzip;
285 /// let data = b"This is some test data";
286 /// let compressed_data = deflate_bytes_gzip(data);
287 /// # let _ = compressed_data;
288 /// ```
289 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>290 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
291 deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
292 }
293
294 #[cfg(test)]
295 mod test {
296 use super::*;
297 use std::io::Write;
298
299 #[cfg(feature = "gzip")]
300 use test_utils::decompress_gzip;
301 use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
302
303 type CO = CompressionOptions;
304
305 /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)306 fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
307 for chunk in data.chunks(chunk_size) {
308 writer.write_all(&chunk).unwrap();
309 }
310 }
311
312 #[test]
dynamic_string_mem()313 fn dynamic_string_mem() {
314 let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
315 let compressed = deflate_bytes(&test_data);
316
317 assert!(compressed.len() < test_data.len());
318
319 let result = decompress_to_end(&compressed);
320 assert_eq!(test_data, result);
321 }
322
323 #[test]
dynamic_string_file()324 fn dynamic_string_file() {
325 let input = get_test_data();
326 let compressed = deflate_bytes(&input);
327
328 let result = decompress_to_end(&compressed);
329 for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
330 if a != b {
331 println!("First difference at {}, input: {}, output: {}", n, a, b);
332 println!(
333 "input: {:?}, output: {:?}",
334 &input[n - 3..n + 3],
335 &result[n - 3..n + 3]
336 );
337 break;
338 }
339 }
340 // Not using assert_eq here deliberately to avoid massive amounts of output spam
341 assert!(input == result);
342 // Check that we actually managed to compress the input
343 assert!(compressed.len() < input.len());
344 }
345
346 #[test]
file_rle()347 fn file_rle() {
348 let input = get_test_data();
349 let compressed = deflate_bytes_conf(&input, CO::rle());
350
351 let result = decompress_to_end(&compressed);
352 assert!(input == result);
353 }
354
355 #[test]
file_zlib()356 fn file_zlib() {
357 let test_data = get_test_data();
358
359 let compressed = deflate_bytes_zlib(&test_data);
360 // {
361 // use std::fs::File;
362 // use std::io::Write;
363 // let mut f = File::create("out.zlib").unwrap();
364 // f.write_all(&compressed).unwrap();
365 // }
366
367 println!("file_zlib compressed(default) length: {}", compressed.len());
368
369 let result = decompress_zlib(&compressed);
370
371 assert!(&test_data == &result);
372 assert!(compressed.len() < test_data.len());
373 }
374
375 #[test]
zlib_short()376 fn zlib_short() {
377 let test_data = [10, 10, 10, 10, 10, 55];
378 roundtrip_zlib(&test_data, CO::default());
379 }
380
381 #[test]
zlib_last_block()382 fn zlib_last_block() {
383 let mut test_data = vec![22; 32768];
384 test_data.extend(&[5, 2, 55, 11, 12]);
385 roundtrip_zlib(&test_data, CO::default());
386 }
387
388 #[test]
deflate_short()389 fn deflate_short() {
390 let test_data = [10, 10, 10, 10, 10, 55];
391 let compressed = deflate_bytes(&test_data);
392
393 let result = decompress_to_end(&compressed);
394 assert_eq!(&test_data, result.as_slice());
395 // If block type and compression is selected correctly, this should only take 5 bytes.
396 assert_eq!(compressed.len(), 5);
397 }
398
399 #[cfg(feature = "gzip")]
400 #[test]
gzip()401 fn gzip() {
402 let data = get_test_data();
403 let comment = b"Test";
404 let compressed = deflate_bytes_gzip_conf(
405 &data,
406 Compression::Default,
407 GzBuilder::new().comment(&comment[..]),
408 );
409 let (dec, decompressed) = decompress_gzip(&compressed);
410 assert_eq!(dec.comment().unwrap(), comment);
411 assert!(data == decompressed);
412 }
413
chunk_test(chunk_size: usize, level: CompressionOptions)414 fn chunk_test(chunk_size: usize, level: CompressionOptions) {
415 let mut compressed = Vec::with_capacity(32000);
416 let data = get_test_data();
417 {
418 let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
419 chunked_write(&mut compressor, &data, chunk_size);
420 compressor.finish().unwrap();
421 }
422 let compressed2 = deflate_bytes_zlib_conf(&data, level);
423 let res = decompress_zlib(&compressed);
424 assert!(res == data);
425 assert_eq!(compressed.len(), compressed2.len());
426 assert!(compressed == compressed2);
427 }
428
writer_chunks_level(level: CompressionOptions)429 fn writer_chunks_level(level: CompressionOptions) {
430 use input_buffer::BUFFER_SIZE;
431 let ct = |n| chunk_test(n, level);
432 ct(1);
433 ct(50);
434 ct(400);
435 ct(32768);
436 ct(BUFFER_SIZE);
437 ct(50000);
438 ct((32768 * 2) + 258);
439 }
440
441 #[ignore]
442 #[test]
443 /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()444 fn zlib_writer_chunks() {
445 writer_chunks_level(CompressionOptions::default());
446 writer_chunks_level(CompressionOptions::fast());
447 writer_chunks_level(CompressionOptions::rle());
448 }
449
450 /// Check that the frequency values don't overflow.
451 #[test]
frequency_overflow()452 fn frequency_overflow() {
453 let _ = deflate_bytes_conf(
454 &vec![5; 100000],
455 compression_options::CompressionOptions::default(),
456 );
457 }
458
roundtrip_zlib(data: &[u8], level: CompressionOptions)459 fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
460 let compressed = deflate_bytes_zlib_conf(data, level);
461 let res = decompress_zlib(&compressed);
462 if data.len() <= 32 {
463 assert_eq!(res, data, "Failed with level: {:?}", level);
464 } else {
465 assert!(res == data, "Failed with level: {:?}", level);
466 }
467 }
468
check_zero(level: CompressionOptions)469 fn check_zero(level: CompressionOptions) {
470 roundtrip_zlib(&[], level);
471 }
472
473 /// Compress with an empty slice.
474 #[test]
empty_input()475 fn empty_input() {
476 check_zero(CompressionOptions::default());
477 check_zero(CompressionOptions::fast());
478 check_zero(CompressionOptions::rle());
479 }
480
481 #[test]
one_and_two_values()482 fn one_and_two_values() {
483 let one = &[1][..];
484 roundtrip_zlib(one, CO::rle());
485 roundtrip_zlib(one, CO::fast());
486 roundtrip_zlib(one, CO::default());
487 let two = &[5, 6, 7, 8][..];
488 roundtrip_zlib(two, CO::rle());
489 roundtrip_zlib(two, CO::fast());
490 roundtrip_zlib(two, CO::default());
491 }
492 }
493