1 use std::io::Write;
2 use std::io;
3
4 use deflate_state::DeflateState;
5 use encoder_state::EncoderState;
6 use lzvalue::LZValue;
7 use lz77::{lz77_compress_block, LZ77Status};
8 use huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
9 use bitstream::LsbWriter;
10 use stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
11
12 const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
13
14 /// Flush mode to use when compressing input received in multiple steps.
15 ///
16 /// (The more obscure ZLIB flush modes are not implemented.)
17 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
18 pub enum Flush {
19 // Simply wait for more input when we are out of input data to process.
20 None,
21 // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
22 // outputting all pending data, and then outputs an empty stored block.
23 // (That is, the block header indicating a stored block followed by `0000FFFF`).
24 Sync,
25 _Partial,
26 _Block,
27 _Full,
28 // Finish compressing and output all remaining input.
29 Finish,
30 }
31
32 /// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
33 /// with the end of block code.
flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState)34 pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
35 for &b in buffer {
36 state.write_lzvalue(b.value());
37 }
38 state.write_end_of_block()
39 }
40
41 /// Compress the input data using only fixed huffman codes.
42 ///
43 /// Currently only used in tests.
44 #[cfg(test)]
compress_data_fixed(input: &[u8]) -> Vec<u8>45 pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
46 use lz77::lz77_compress;
47
48 let mut state = EncoderState::fixed(Vec::new());
49 let compressed = lz77_compress(input).unwrap();
50
51 // We currently don't split blocks here(this function is just used for tests anyhow)
52 state.write_start_of_block(true, true);
53 flush_to_bitstream(&compressed, &mut state);
54
55 state.flush();
56 state.reset(Vec::new())
57 }
58
write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool)59 fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
60
61 // If the input is not zero, we write stored blocks for the input data.
62 if !input.is_empty() {
63 let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
64
65 while let Some(chunk) = i.next() {
66 let last_chunk = i.peek().is_none();
67 // Write the block header
68 write_stored_header(writer, final_block && last_chunk);
69
70 // Write the actual data.
71 compress_block_stored(chunk, &mut writer).expect("Write error");
72
73 }
74 } else {
75 // If the input length is zero, we output an empty block. This is used for syncing.
76 write_stored_header(writer, final_block);
77 compress_block_stored(&[], &mut writer).expect("Write error");
78 }
79 }
80
81 /// Inner compression function used by both the writers and the simple compression functions.
compress_data_dynamic_n<W: Write>( input: &[u8], deflate_state: &mut DeflateState<W>, flush: Flush, ) -> io::Result<usize>82 pub fn compress_data_dynamic_n<W: Write>(
83 input: &[u8],
84 deflate_state: &mut DeflateState<W>,
85 flush: Flush,
86 ) -> io::Result<usize> {
87 let mut bytes_written = 0;
88
89 let mut slice = input;
90
91 loop {
92 let output_buf_len = deflate_state.output_buf().len();
93 let output_buf_pos = deflate_state.output_buf_pos;
94 // If the output buffer has too much data in it already, flush it before doing anything
95 // else.
96 if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
97 let written = deflate_state
98 .inner
99 .as_mut()
100 .expect("Missing writer!")
101 .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
102
103 if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
104 // Only some of the data was flushed, so keep track of where we were.
105 deflate_state.output_buf_pos += written;
106 } else {
107 // If we flushed all of the output, reset the output buffer.
108 deflate_state.output_buf_pos = 0;
109 deflate_state.output_buf().clear();
110 }
111
112 if bytes_written == 0 {
113 // If the buffer was already full when the function was called, this has to be
114 // returned rather than Ok(0) to indicate that we didn't write anything, but are
115 // not done yet.
116 return Err(io::Error::new(
117 io::ErrorKind::Interrupted,
118 "Internal buffer full.",
119 ));
120 } else {
121 return Ok(bytes_written);
122 }
123 }
124
125 if deflate_state.lz77_state.is_last_block() {
126 // The last block has already been written, so we don't ave anything to compress.
127 break;
128 }
129
130 let (written, status, position) = lz77_compress_block(
131 slice,
132 &mut deflate_state.lz77_state,
133 &mut deflate_state.input_buffer,
134 &mut deflate_state.lz77_writer,
135 flush,
136 );
137
138 // Bytes written in this call
139 bytes_written += written;
140 // Total bytes written since the compression process started
141 // TODO: Should we realistically have to worry about overflowing here?
142 deflate_state.bytes_written += written as u64;
143
144 if status == LZ77Status::NeedInput {
145 // If we've consumed all the data input so far, and we're not
146 // finishing or syncing or ending the block here, simply return
147 // the number of bytes consumed so far.
148 return Ok(bytes_written);
149 }
150
151 // Increment start of input data
152 slice = &slice[written..];
153
154 // We need to check if this is the last block as the header will then be
155 // slightly different to indicate this.
156 let last_block = deflate_state.lz77_state.is_last_block();
157
158 let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
159
160 if cfg!(debug_assertions) {
161 deflate_state
162 .bytes_written_control
163 .add(current_block_input_bytes);
164 }
165
166 let partial_bits = deflate_state.encoder_state.writer.pending_bits();
167
168 let res = {
169 let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
170 let (l_lengths, d_lengths) =
171 deflate_state.encoder_state.huffman_table.get_lengths_mut();
172
173 gen_huffman_lengths(
174 l_freqs,
175 d_freqs,
176 current_block_input_bytes,
177 partial_bits,
178 l_lengths,
179 d_lengths,
180 &mut deflate_state.length_buffers,
181 )
182 };
183
184 // Check if we've actually managed to compress the input, and output stored blocks
185 // if not.
186 match res {
187 BlockType::Dynamic(header) => {
188 // Write the block header.
189 deflate_state
190 .encoder_state
191 .write_start_of_block(false, last_block);
192
193 // Output the lengths of the huffman codes used in this block.
194 write_huffman_lengths(
195 &header,
196 &deflate_state.encoder_state.huffman_table,
197 &mut deflate_state.length_buffers.length_buf,
198 &mut deflate_state.encoder_state.writer,
199 );
200
201 // Uupdate the huffman codes that will be used to encode the
202 // lz77-compressed data.
203 deflate_state
204 .encoder_state
205 .huffman_table
206 .update_from_lengths();
207
208
209 // Write the huffman compressed data and the end of block marker.
210 flush_to_bitstream(
211 deflate_state.lz77_writer.get_buffer(),
212 &mut deflate_state.encoder_state,
213 );
214 }
215 BlockType::Fixed => {
216 // Write the block header for fixed code blocks.
217 deflate_state
218 .encoder_state
219 .write_start_of_block(true, last_block);
220
221 // Use the pre-defined static huffman codes.
222 deflate_state.encoder_state.set_huffman_to_fixed();
223
224 // Write the compressed data and the end of block marker.
225 flush_to_bitstream(
226 deflate_state.lz77_writer.get_buffer(),
227 &mut deflate_state.encoder_state,
228 );
229 }
230 BlockType::Stored => {
231 // If compression fails, output a stored block instead.
232
233 let start_pos = position.saturating_sub(current_block_input_bytes as usize);
234
235 assert!(
236 position >= current_block_input_bytes as usize,
237 "Error! Trying to output a stored block with forgotten data!\
238 if you encounter this error, please file an issue!"
239 );
240
241 write_stored_block(
242 &deflate_state.input_buffer.get_buffer()[start_pos..position],
243 &mut deflate_state.encoder_state.writer,
244 flush == Flush::Finish && last_block,
245 );
246 }
247 };
248
249 // Clear the current lz77 data in the writer for the next call.
250 deflate_state.lz77_writer.clear();
251 // We are done with the block, so we reset the number of bytes taken
252 // for the next one.
253 deflate_state.lz77_state.reset_input_bytes();
254
255 // We are done for now.
256 if status == LZ77Status::Finished {
257 // This flush mode means that there should be an empty stored block at the end.
258 if flush == Flush::Sync {
259 write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
260 } else if !deflate_state.lz77_state.is_last_block() {
261 // Make sure a block with the last block header has been output.
262 // Not sure this can actually happen, but we make sure to finish properly
263 // if it somehow does.
264 // An empty fixed block is the shortest.
265 let es = &mut deflate_state.encoder_state;
266 es.set_huffman_to_fixed();
267 es.write_start_of_block(true, true);
268 es.write_end_of_block();
269 }
270 break;
271 }
272 }
273
274 // If we reach this point, the remaining data in the buffers is to be flushed.
275 deflate_state.encoder_state.flush();
276 // Make sure we've output everything, and return the number of bytes written if everything
277 // went well.
278 let output_buf_pos = deflate_state.output_buf_pos;
279 let written_to_writer = deflate_state
280 .inner
281 .as_mut()
282 .expect("Missing writer!")
283 .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
284 if written_to_writer <
285 deflate_state
286 .output_buf()
287 .len()
288 .checked_sub(output_buf_pos)
289 .unwrap()
290 {
291 deflate_state.output_buf_pos += written_to_writer;
292 } else {
293 // If we sucessfully wrote all the data, we can clear the output buffer.
294 deflate_state.output_buf_pos = 0;
295 deflate_state.output_buf().clear();
296 }
297 Ok(bytes_written)
298 }
299
300 #[cfg(test)]
301 mod test {
302 use super::*;
303 use test_utils::{get_test_data, decompress_to_end};
304
305 #[test]
306 /// Test compressing a short string using fixed encoding.
fixed_string_mem()307 fn fixed_string_mem() {
308 let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
309 let compressed = compress_data_fixed(&test_data);
310
311 let result = decompress_to_end(&compressed);
312
313 assert_eq!(test_data, result);
314 }
315
316 #[test]
fixed_data()317 fn fixed_data() {
318 let data = vec![190u8; 400];
319 let compressed = compress_data_fixed(&data);
320 let result = decompress_to_end(&compressed);
321
322 assert_eq!(data, result);
323 }
324
325 /// Test deflate example.
326 ///
327 /// Check if the encoder produces the same code as the example given by Mark Adler here:
328 /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
329 #[test]
fixed_example()330 fn fixed_example() {
331 let test_data = b"Deflate late";
332 // let check =
333 // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
334 let check = [
335 0x73,
336 0x49,
337 0x4d,
338 0xcb,
339 0x49,
340 0x2c,
341 0x49,
342 0x55,
343 0x00,
344 0x11,
345 0x00,
346 ];
347 let compressed = compress_data_fixed(test_data);
348 assert_eq!(&compressed, &check);
349 let decompressed = decompress_to_end(&compressed);
350 assert_eq!(&decompressed, test_data)
351 }
352
353 #[test]
354 /// Test compression from a file.
fixed_string_file()355 fn fixed_string_file() {
356 let input = get_test_data();
357
358 let compressed = compress_data_fixed(&input);
359 println!("Fixed codes compressed len: {}", compressed.len());
360 let result = decompress_to_end(&compressed);
361
362 assert_eq!(input.len(), result.len());
363 // Not using assert_eq here deliberately to avoid massive amounts of output spam.
364 assert!(input == result);
365 }
366 }
367