1 //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2 //! compression algorightm in pure rust.
3 //!
4 //! This library provides functions to compress data using the DEFLATE algorithm,
5 //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6 //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7 //! The current implementation is still a bit lacking speed-wise compared to C-libraries
8 //! like zlib and miniz.
9 //!
10 //! The deflate algorithm is an older compression algorithm that is still widely used today,
11 //! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip`
12 //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13 //! some extra metadata and a checksum to validate the integrity of the raw data.
14 //!
15 //! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as
16 //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17 //! the `DEFLATE` format (with or without wrappers) is not required.
18 //!
19 //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default,
20 //! but can be enabled with the `gzip` feature.
21 //!
22 //! As this library is still in development, the compression output may change slightly
23 //! between versions.
24 //!
25 //!
26 //! # Examples:
27 //! ## Simple compression function:
28 //! ``` rust
29 //! use deflate::deflate_bytes;
30 //!
31 //! let data = b"Some data";
32 //! let compressed = deflate_bytes(data);
33 //! # let _ = compressed;
34 //! ```
35 //!
36 //! ## Using a writer:
37 //! ``` rust
38 //! use std::io::Write;
39 //!
40 //! use deflate::Compression;
41 //! use deflate::write::ZlibEncoder;
42 //!
43 //! let data = b"This is some test data";
44 //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45 //! encoder.write_all(data).expect("Write error!");
46 //! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47 //! # let _ = compressed_data;
48 //! ```
49
50 #![cfg_attr(all(feature = "benchmarks", test), feature(test))]
51
52 #[cfg(all(test, feature = "benchmarks"))]
53 extern crate test as test_std;
54
55 #[cfg(test)]
56 extern crate flate2;
57 // #[cfg(test)]
58 // extern crate inflate;
59
60 extern crate adler32;
61 extern crate byteorder;
62 #[cfg(feature = "gzip")]
63 extern crate gzip_header;
64
65 mod compression_options;
66 mod huffman_table;
67 mod lz77;
68 mod lzvalue;
69 mod chained_hash_table;
70 mod length_encode;
71 mod output_writer;
72 mod stored_block;
73 mod huffman_lengths;
74 mod zlib;
75 mod checksum;
76 mod bit_reverse;
77 mod bitstream;
78 mod encoder_state;
79 mod matching;
80 mod input_buffer;
81 mod deflate_state;
82 mod compress;
83 mod rle;
84 mod writer;
85 #[cfg(test)]
86 mod test_utils;
87
88 use std::io::Write;
89 use std::io;
90
91 use byteorder::BigEndian;
92 #[cfg(feature = "gzip")]
93 use gzip_header::GzBuilder;
94 #[cfg(feature = "gzip")]
95 use gzip_header::Crc;
96 #[cfg(feature = "gzip")]
97 use byteorder::LittleEndian;
98
99 use checksum::RollingChecksum;
100 use deflate_state::DeflateState;
101
102 pub use compression_options::{CompressionOptions, SpecialOptions, Compression};
103 use compress::Flush;
104 pub use lz77::MatchingType;
105
106 use writer::compress_until_done;
107
108 /// Encoders implementing a `Write` interface.
109 pub mod write {
110 pub use writer::{DeflateEncoder, ZlibEncoder};
111 #[cfg(feature = "gzip")]
112 pub use writer::gzip::GzEncoder;
113 }
114
115
compress_data_dynamic<RC: RollingChecksum, W: Write>( input: &[u8], writer: &mut W, mut checksum: RC, compression_options: CompressionOptions, ) -> io::Result<()>116 fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
117 input: &[u8],
118 writer: &mut W,
119 mut checksum: RC,
120 compression_options: CompressionOptions,
121 ) -> io::Result<()> {
122 checksum.update_from_slice(input);
123 // We use a box here to avoid putting the buffers on the stack
124 // It's done here rather than in the structs themselves for now to
125 // keep the data close in memory.
126 let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
127 compress_until_done(input, &mut deflate_state, Flush::Finish)
128 }
129
130 /// Compress the given slice of bytes with DEFLATE compression.
131 ///
132 /// Returns a `Vec<u8>` of the compressed data.
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// use deflate::{deflate_bytes_conf, Compression};
138 ///
139 /// let data = b"This is some test data";
140 /// let compressed_data = deflate_bytes_conf(data, Compression::Best);
141 /// # let _ = compressed_data;
142 /// ```
deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>143 pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
144 let mut writer = Vec::with_capacity(input.len() / 3);
145 compress_data_dynamic(
146 input,
147 &mut writer,
148 checksum::NoChecksum::new(),
149 options.into(),
150 ).expect("Write error!");
151 writer
152 }
153
154 /// Compress the given slice of bytes with DEFLATE compression using the default compression
155 /// level.
156 ///
157 /// Returns a `Vec<u8>` of the compressed data.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use deflate::deflate_bytes;
163 ///
164 /// let data = b"This is some test data";
165 /// let compressed_data = deflate_bytes(data);
166 /// # let _ = compressed_data;
167 /// ```
deflate_bytes(input: &[u8]) -> Vec<u8>168 pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
169 deflate_bytes_conf(input, Compression::Default)
170 }
171
172 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
173 ///
174 /// Returns a `Vec<u8>` of the compressed data.
175 ///
176 /// Zlib dictionaries are not yet suppored.
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// use deflate::{deflate_bytes_zlib_conf, Compression};
182 ///
183 /// let data = b"This is some test data";
184 /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
185 /// # let _ = compressed_data;
186 /// ```
deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8>187 pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
188 use byteorder::WriteBytesExt;
189 let mut writer = Vec::with_capacity(input.len() / 3);
190 // Write header
191 zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
192 .expect("Write error when writing zlib header!");
193
194 let mut checksum = checksum::Adler32Checksum::new();
195 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
196 .expect("Write error when writing compressed data!");
197
198 let hash = checksum.current_hash();
199
200 writer
201 .write_u32::<BigEndian>(hash)
202 .expect("Write error when writing checksum!");
203 writer
204 }
205
206 /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
207 /// using the default compression level.
208 ///
209 /// Returns a Vec<u8> of the compressed data.
210 ///
211 /// Zlib dictionaries are not yet suppored.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// use deflate::deflate_bytes_zlib;
217 ///
218 /// let data = b"This is some test data";
219 /// let compressed_data = deflate_bytes_zlib(data);
220 /// # let _ = compressed_data;
221 /// ```
deflate_bytes_zlib(input: &[u8]) -> Vec<u8>222 pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
223 deflate_bytes_zlib_conf(input, Compression::Default)
224 }
225
226 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
227 /// using the given gzip header and compression options.
228 ///
229 /// Returns a `Vec<u8>` of the compressed data.
230 ///
231 ///
232 /// # Examples
233 ///
234 /// ```
235 /// extern crate gzip_header;
236 /// extern crate deflate;
237 ///
238 /// # fn main() {
239 /// use deflate::{deflate_bytes_gzip_conf, Compression};
240 /// use gzip_header::GzBuilder;
241 ///
242 /// let data = b"This is some test data";
243 /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
244 /// # let _ = compressed_data;
245 /// # }
246 /// ```
247 #[cfg(feature = "gzip")]
deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( input: &[u8], options: O, gzip_header: GzBuilder, ) -> Vec<u8>248 pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
249 input: &[u8],
250 options: O,
251 gzip_header: GzBuilder,
252 ) -> Vec<u8> {
253 use byteorder::WriteBytesExt;
254 let mut writer = Vec::with_capacity(input.len() / 3);
255
256 // Write header
257 writer
258 .write_all(&gzip_header.into_header())
259 .expect("Write error when writing header!");
260 let mut checksum = checksum::NoChecksum::new();
261 compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
262 .expect("Write error when writing compressed data!");
263
264 let mut crc = Crc::new();
265 crc.update(input);
266
267 writer
268 .write_u32::<LittleEndian>(crc.sum())
269 .expect("Write error when writing checksum!");
270 writer
271 .write_u32::<LittleEndian>(crc.amt_as_u32())
272 .expect("Write error when writing amt!");
273 writer
274 }
275
276 /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
277 /// using the default compression level, and a gzip header with default values.
278 ///
279 /// Returns a `Vec<u8>` of the compressed data.
280 ///
281 ///
282 /// # Examples
283 ///
284 /// ```
285 /// use deflate::deflate_bytes_gzip;
286 /// let data = b"This is some test data";
287 /// let compressed_data = deflate_bytes_gzip(data);
288 /// # let _ = compressed_data;
289 /// ```
290 #[cfg(feature = "gzip")]
deflate_bytes_gzip(input: &[u8]) -> Vec<u8>291 pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
292 deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
293 }
294
295 #[cfg(test)]
296 mod test {
297 use super::*;
298 use std::io::Write;
299
300 use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
301 #[cfg(feature = "gzip")]
302 use test_utils::decompress_gzip;
303
304 type CO = CompressionOptions;
305
306 /// Write data to the writer in chunks of chunk_size.
chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize)307 fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
308 for chunk in data.chunks(chunk_size) {
309 writer.write_all(&chunk).unwrap();
310 }
311 }
312
313 #[test]
dynamic_string_mem()314 fn dynamic_string_mem() {
315 let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
316 let compressed = deflate_bytes(&test_data);
317
318 assert!(compressed.len() < test_data.len());
319
320 let result = decompress_to_end(&compressed);
321 assert_eq!(test_data, result);
322 }
323
324 #[test]
dynamic_string_file()325 fn dynamic_string_file() {
326 let input = get_test_data();
327 let compressed = deflate_bytes(&input);
328
329 let result = decompress_to_end(&compressed);
330 for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
331 if a != b {
332 println!("First difference at {}, input: {}, output: {}", n, a, b);
333 println!(
334 "input: {:?}, output: {:?}",
335 &input[n - 3..n + 3],
336 &result[n - 3..n + 3]
337 );
338 break;
339 }
340 }
341 // Not using assert_eq here deliberately to avoid massive amounts of output spam
342 assert!(input == result);
343 // Check that we actually managed to compress the input
344 assert!(compressed.len() < input.len());
345 }
346
347 #[test]
file_rle()348 fn file_rle() {
349 let input = get_test_data();
350 let compressed = deflate_bytes_conf(&input, CO::rle());
351
352 let result = decompress_to_end(&compressed);
353 assert!(input == result);
354 }
355
356 #[test]
file_zlib()357 fn file_zlib() {
358 let test_data = get_test_data();
359
360 let compressed = deflate_bytes_zlib(&test_data);
361 // {
362 // use std::fs::File;
363 // use std::io::Write;
364 // let mut f = File::create("out.zlib").unwrap();
365 // f.write_all(&compressed).unwrap();
366 // }
367
368 println!("file_zlib compressed(default) length: {}", compressed.len());
369
370 let result = decompress_zlib(&compressed);
371
372 assert!(&test_data == &result);
373 assert!(compressed.len() < test_data.len());
374 }
375
376 #[test]
zlib_short()377 fn zlib_short() {
378 let test_data = [10, 10, 10, 10, 10, 55];
379 roundtrip_zlib(&test_data, CO::default());
380 }
381
382 #[test]
zlib_last_block()383 fn zlib_last_block() {
384 let mut test_data = vec![22; 32768];
385 test_data.extend(&[5, 2, 55, 11, 12]);
386 roundtrip_zlib(&test_data, CO::default());
387 }
388
389 #[test]
deflate_short()390 fn deflate_short() {
391 let test_data = [10, 10, 10, 10, 10, 55];
392 let compressed = deflate_bytes(&test_data);
393
394 let result = decompress_to_end(&compressed);
395 assert_eq!(&test_data, result.as_slice());
396 // If block type and compression is selected correctly, this should only take 5 bytes.
397 assert_eq!(compressed.len(), 5);
398 }
399
400 #[cfg(feature = "gzip")]
401 #[test]
gzip()402 fn gzip() {
403 let data = get_test_data();
404 let comment = b"Test";
405 let compressed = deflate_bytes_gzip_conf(
406 &data,
407 Compression::Default,
408 GzBuilder::new().comment(&comment[..]),
409 );
410 let (dec, decompressed) = decompress_gzip(&compressed);
411 assert_eq!(dec.header().comment().unwrap(), comment);
412 assert!(data == decompressed);
413 }
414
chunk_test(chunk_size: usize, level: CompressionOptions)415 fn chunk_test(chunk_size: usize, level: CompressionOptions) {
416 let mut compressed = Vec::with_capacity(32000);
417 let data = get_test_data();
418 {
419 let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
420 chunked_write(&mut compressor, &data, chunk_size);
421 compressor.finish().unwrap();
422 }
423 let compressed2 = deflate_bytes_zlib_conf(&data, level);
424 let res = decompress_zlib(&compressed);
425 assert!(res == data);
426 assert_eq!(compressed.len(), compressed2.len());
427 assert!(compressed == compressed2);
428 }
429
writer_chunks_level(level: CompressionOptions)430 fn writer_chunks_level(level: CompressionOptions) {
431 use input_buffer::BUFFER_SIZE;
432 let ct = |n| chunk_test(n, level);
433 ct(1);
434 ct(50);
435 ct(400);
436 ct(32768);
437 ct(BUFFER_SIZE);
438 ct(50000);
439 ct((32768 * 2) + 258);
440 }
441
442 #[ignore]
443 #[test]
444 /// Test the writer by inputing data in one chunk at the time.
zlib_writer_chunks()445 fn zlib_writer_chunks() {
446 writer_chunks_level(CompressionOptions::default());
447 writer_chunks_level(CompressionOptions::fast());
448 writer_chunks_level(CompressionOptions::rle());
449 }
450
451 /// Check that the frequency values don't overflow.
452 #[test]
frequency_overflow()453 fn frequency_overflow() {
454 let _ = deflate_bytes_conf(
455 &vec![5; 100000],
456 compression_options::CompressionOptions::default(),
457 );
458 }
459
roundtrip_zlib(data: &[u8], level: CompressionOptions)460 fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
461 let compressed = deflate_bytes_zlib_conf(data, level);
462 let res = decompress_zlib(&compressed);
463 if data.len() <= 32 {
464 assert_eq!(res, data, "Failed with level: {:?}", level);
465 } else {
466 assert!(res == data, "Failed with level: {:?}", level);
467 }
468 }
469
check_zero(level: CompressionOptions)470 fn check_zero(level: CompressionOptions) {
471 roundtrip_zlib(&[], level);
472 }
473
474 /// Compress with an empty slice.
475 #[test]
empty_input()476 fn empty_input() {
477 check_zero(CompressionOptions::default());
478 check_zero(CompressionOptions::fast());
479 check_zero(CompressionOptions::rle());
480 }
481
482 #[test]
one_and_two_values()483 fn one_and_two_values() {
484 let one = &[1][..];
485 roundtrip_zlib(one, CO::rle());
486 roundtrip_zlib(one, CO::fast());
487 roundtrip_zlib(one, CO::default());
488 let two = &[5, 6, 7, 8][..];
489 roundtrip_zlib(two, CO::rle());
490 roundtrip_zlib(two, CO::fast());
491 roundtrip_zlib(two, CO::default());
492 }
493
494
495 }
496