1 use std::io;
2 use std::io::Write;
3 
4 use crate::bitstream::LsbWriter;
5 use crate::deflate_state::DeflateState;
6 use crate::encoder_state::EncoderState;
7 use crate::huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
8 use crate::lz77::{lz77_compress_block, LZ77Status};
9 use crate::lzvalue::LZValue;
10 use crate::stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
11 
12 const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
13 
14 /// Flush mode to use when compressing input received in multiple steps.
15 ///
16 /// (The more obscure ZLIB flush modes are not implemented.)
17 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
18 pub enum Flush {
19     // Simply wait for more input when we are out of input data to process.
20     None,
21     // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
22     // outputting all pending data, and then outputs an empty stored block.
23     // (That is, the block header indicating a stored block followed by `0000FFFF`).
24     Sync,
25     _Partial,
26     _Block,
27     _Full,
28     // Finish compressing and output all remaining input.
29     Finish,
30 }
31 
32 /// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
33 /// with the end of block code.
flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState)34 pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
35     for &b in buffer {
36         state.write_lzvalue(b.value());
37     }
38     state.write_end_of_block()
39 }
40 
41 /// Compress the input data using only fixed huffman codes.
42 ///
43 /// Currently only used in tests.
44 #[cfg(test)]
compress_data_fixed(input: &[u8]) -> Vec<u8>45 pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
46     use crate::lz77::lz77_compress;
47 
48     let mut state = EncoderState::fixed(Vec::new());
49     let compressed = lz77_compress(input).unwrap();
50 
51     // We currently don't split blocks here(this function is just used for tests anyhow)
52     state.write_start_of_block(true, true);
53     flush_to_bitstream(&compressed, &mut state);
54 
55     state.flush();
56     state.reset(Vec::new())
57 }
58 
write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool)59 fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
60     // If the input is not zero, we write stored blocks for the input data.
61     if !input.is_empty() {
62         let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
63 
64         while let Some(chunk) = i.next() {
65             let last_chunk = i.peek().is_none();
66             // Write the block header
67             write_stored_header(writer, final_block && last_chunk);
68 
69             // Write the actual data.
70             compress_block_stored(chunk, &mut writer).expect("Write error");
71         }
72     } else {
73         // If the input length is zero, we output an empty block. This is used for syncing.
74         write_stored_header(writer, final_block);
75         compress_block_stored(&[], &mut writer).expect("Write error");
76     }
77 }
78 
79 /// Inner compression function used by both the writers and the simple compression functions.
compress_data_dynamic_n<W: Write>( input: &[u8], deflate_state: &mut DeflateState<W>, flush: Flush, ) -> io::Result<usize>80 pub fn compress_data_dynamic_n<W: Write>(
81     input: &[u8],
82     deflate_state: &mut DeflateState<W>,
83     flush: Flush,
84 ) -> io::Result<usize> {
85     let mut bytes_written = 0;
86 
87     let mut slice = input;
88 
89     // enter the decompression loop unless we did a sync flush, in case we want to make sure
90     // everything is output before continuing.
91     while !deflate_state.needs_flush {
92         let output_buf_len = deflate_state.output_buf().len();
93         let output_buf_pos = deflate_state.output_buf_pos;
94         // If the output buffer has too much data in it already, flush it before doing anything
95         // else.
96         if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
97             let written = deflate_state
98                 .inner
99                 .as_mut()
100                 .expect("Missing writer!")
101                 .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
102 
103             if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
104                 // Only some of the data was flushed, so keep track of where we were.
105                 deflate_state.output_buf_pos += written;
106             } else {
107                 // If we flushed all of the output, reset the output buffer.
108                 deflate_state.needs_flush = false;
109                 deflate_state.output_buf_pos = 0;
110                 deflate_state.output_buf().clear();
111             }
112 
113             if bytes_written == 0 {
114                 // If the buffer was already full when the function was called, this has to be
115                 // returned rather than Ok(0) to indicate that we didn't write anything, but are
116                 // not done yet.
117                 return Err(io::Error::new(
118                     io::ErrorKind::Interrupted,
119                     "Internal buffer full.",
120                 ));
121             } else {
122                 return Ok(bytes_written);
123             }
124         }
125 
126         if deflate_state.lz77_state.is_last_block() {
127             // The last block has already been written, so we don't have anything to compress.
128             break;
129         }
130 
131         let (written, status, position) = lz77_compress_block(
132             slice,
133             &mut deflate_state.lz77_state,
134             &mut deflate_state.input_buffer,
135             &mut deflate_state.lz77_writer,
136             flush,
137         );
138 
139         // Bytes written in this call
140         bytes_written += written;
141         // Total bytes written since the compression process started
142         // TODO: Should we realistically have to worry about overflowing here?
143         deflate_state.bytes_written += written as u64;
144 
145         if status == LZ77Status::NeedInput {
146             // If we've consumed all the data input so far, and we're not
147             // finishing or syncing or ending the block here, simply return
148             // the number of bytes consumed so far.
149             return Ok(bytes_written);
150         }
151 
152         // Increment start of input data
153         slice = &slice[written..];
154 
155         // We need to check if this is the last block as the header will then be
156         // slightly different to indicate this.
157         let last_block = deflate_state.lz77_state.is_last_block();
158 
159         let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
160 
161         if cfg!(debug_assertions) {
162             deflate_state
163                 .bytes_written_control
164                 .add(current_block_input_bytes);
165         }
166 
167         let partial_bits = deflate_state.encoder_state.writer.pending_bits();
168 
169         let res = {
170             let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
171             let (l_lengths, d_lengths) =
172                 deflate_state.encoder_state.huffman_table.get_lengths_mut();
173 
174             gen_huffman_lengths(
175                 l_freqs,
176                 d_freqs,
177                 current_block_input_bytes,
178                 partial_bits,
179                 l_lengths,
180                 d_lengths,
181                 &mut deflate_state.length_buffers,
182             )
183         };
184 
185         // Check if we've actually managed to compress the input, and output stored blocks
186         // if not.
187         match res {
188             BlockType::Dynamic(header) => {
189                 // Write the block header.
190                 deflate_state
191                     .encoder_state
192                     .write_start_of_block(false, last_block);
193 
194                 // Output the lengths of the huffman codes used in this block.
195                 write_huffman_lengths(
196                     &header,
197                     &deflate_state.encoder_state.huffman_table,
198                     &deflate_state.length_buffers.length_buf,
199                     &mut deflate_state.encoder_state.writer,
200                 );
201 
202                 // Uupdate the huffman codes that will be used to encode the
203                 // lz77-compressed data.
204                 deflate_state
205                     .encoder_state
206                     .huffman_table
207                     .update_from_lengths();
208 
209                 // Write the huffman compressed data and the end of block marker.
210                 flush_to_bitstream(
211                     deflate_state.lz77_writer.get_buffer(),
212                     &mut deflate_state.encoder_state,
213                 );
214             }
215             BlockType::Fixed => {
216                 // Write the block header for fixed code blocks.
217                 deflate_state
218                     .encoder_state
219                     .write_start_of_block(true, last_block);
220 
221                 // Use the pre-defined static huffman codes.
222                 deflate_state.encoder_state.set_huffman_to_fixed();
223 
224                 // Write the compressed data and the end of block marker.
225                 flush_to_bitstream(
226                     deflate_state.lz77_writer.get_buffer(),
227                     &mut deflate_state.encoder_state,
228                 );
229             }
230             BlockType::Stored => {
231                 // If compression fails, output a stored block instead.
232 
233                 let start_pos = position.saturating_sub(current_block_input_bytes as usize);
234 
235                 assert!(
236                     position >= current_block_input_bytes as usize,
237                     "Error! Trying to output a stored block with forgotten data!\
238                      if you encounter this error, please file an issue!"
239                 );
240 
241                 write_stored_block(
242                     &deflate_state.input_buffer.get_buffer()[start_pos..position],
243                     &mut deflate_state.encoder_state.writer,
244                     flush == Flush::Finish && last_block,
245                 );
246             }
247         };
248 
249         // Clear the current lz77 data in the writer for the next call.
250         deflate_state.lz77_writer.clear();
251         // We are done with the block, so we reset the number of bytes taken
252         // for the next one.
253         deflate_state.lz77_state.reset_input_bytes();
254 
255         // We are done for now.
256         if status == LZ77Status::Finished {
257             // This flush mode means that there should be an empty stored block at the end.
258             if flush == Flush::Sync {
259                 write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
260                 // Indicate that we need to flush the buffers before doing anything else.
261                 deflate_state.needs_flush = true;
262             } else if !deflate_state.lz77_state.is_last_block() {
263                 // Make sure a block with the last block header has been output.
264                 // Not sure this can actually happen, but we make sure to finish properly
265                 // if it somehow does.
266                 // An empty fixed block is the shortest.
267                 let es = &mut deflate_state.encoder_state;
268                 es.set_huffman_to_fixed();
269                 es.write_start_of_block(true, true);
270                 es.write_end_of_block();
271             }
272             break;
273         }
274     }
275 
276     // If we reach this point, the remaining data in the buffers is to be flushed.
277     deflate_state.encoder_state.flush();
278     // Make sure we've output everything, and return the number of bytes written if everything
279     // went well.
280     let output_buf_pos = deflate_state.output_buf_pos;
281     let written_to_writer = deflate_state
282         .inner
283         .as_mut()
284         .expect("Missing writer!")
285         .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
286     if written_to_writer
287         < deflate_state
288             .output_buf()
289             .len()
290             .checked_sub(output_buf_pos)
291             .unwrap()
292     {
293         deflate_state.output_buf_pos += written_to_writer;
294     } else {
295         // If we sucessfully wrote all the data, we can clear the output buffer.
296         deflate_state.output_buf_pos = 0;
297         deflate_state.output_buf().clear();
298         deflate_state.needs_flush = false;
299     }
300 
301     Ok(bytes_written)
302 }
303 
304 #[cfg(test)]
305 mod test {
306     use super::*;
307     use crate::test_utils::{decompress_to_end, get_test_data};
308 
309     #[test]
310     /// Test compressing a short string using fixed encoding.
fixed_string_mem()311     fn fixed_string_mem() {
312         let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
313         let compressed = compress_data_fixed(&test_data);
314 
315         let result = decompress_to_end(&compressed);
316 
317         assert_eq!(test_data, result);
318     }
319 
320     #[test]
fixed_data()321     fn fixed_data() {
322         let data = vec![190u8; 400];
323         let compressed = compress_data_fixed(&data);
324         let result = decompress_to_end(&compressed);
325 
326         assert_eq!(data, result);
327     }
328 
329     /// Test deflate example.
330     ///
331     /// Check if the encoder produces the same code as the example given by Mark Adler here:
332     /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
333     #[test]
fixed_example()334     fn fixed_example() {
335         let test_data = b"Deflate late";
336         // let check =
337         // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
338         let check = [
339             0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00,
340         ];
341         let compressed = compress_data_fixed(test_data);
342         assert_eq!(&compressed, &check);
343         let decompressed = decompress_to_end(&compressed);
344         assert_eq!(&decompressed, test_data)
345     }
346 
347     #[test]
348     /// Test compression from a file.
fixed_string_file()349     fn fixed_string_file() {
350         let input = get_test_data();
351 
352         let compressed = compress_data_fixed(&input);
353         println!("Fixed codes compressed len: {}", compressed.len());
354         let result = decompress_to_end(&compressed);
355 
356         assert_eq!(input.len(), result.len());
357         // Not using assert_eq here deliberately to avoid massive amounts of output spam.
358         assert!(input == result);
359     }
360 }
361