1 use length_encode::{EncodedLength, encode_lengths_m, huffman_lengths_from_frequency_m,
2 COPY_PREVIOUS, REPEAT_ZERO_3_BITS, REPEAT_ZERO_7_BITS};
3 use huffman_table::{HuffmanTable, create_codes_in_place, num_extra_bits_for_length_code,
4 num_extra_bits_for_distance_code, NUM_LITERALS_AND_LENGTHS,
5 NUM_DISTANCE_CODES, MAX_CODE_LENGTH, FIXED_CODE_LENGTHS, LENGTH_BITS_START};
6 use bitstream::LsbWriter;
7 use output_writer::FrequencyType;
8 use stored_block::MAX_STORED_BLOCK_LENGTH;
9 use deflate_state::LengthBuffers;
10
11 use std::cmp;
12
13 // The minimum number of literal/length values
14 pub const MIN_NUM_LITERALS_AND_LENGTHS: usize = 257;
15 // The minimum number of distances
16 pub const MIN_NUM_DISTANCES: usize = 1;
17
18 const NUM_HUFFMAN_LENGTHS: usize = 19;
19
20 // The output ordering of the lengths for the huffman codes used to encode the lengths
21 // used to build the full huffman tree for length/literal codes.
22 // http://www.gzip.org/zlib/rfc-deflate.html#dyn
23 const HUFFMAN_LENGTH_ORDER: [u8; NUM_HUFFMAN_LENGTHS] = [
24 16,
25 17,
26 18,
27 0,
28 8,
29 7,
30 9,
31 6,
32 10,
33 5,
34 11,
35 4,
36 12,
37 3,
38 13,
39 2,
40 14,
41 1,
42 15,
43 ];
44
45 // Number of bits used for the values specifying the number of codes
46 const HLIT_BITS: u8 = 5;
47 const HDIST_BITS: u8 = 5;
48 const HCLEN_BITS: u8 = 4;
49
50 // The longest a huffman code describing another huffman length can be
51 const MAX_HUFFMAN_CODE_LENGTH: usize = 7;
52
53 // How many bytes (not including padding and the 3-bit block type) the stored block header takes up.
54 const STORED_BLOCK_HEADER_LENGTH: u64 = 4;
55 const BLOCK_MARKER_LENGTH: u8 = 3;
56
57 /// Creates a new slice from the input slice that stops at the final non-zero value
remove_trailing_zeroes<T: From<u8> + PartialEq>(input: &[T], min_length: usize) -> &[T]58 pub fn remove_trailing_zeroes<T: From<u8> + PartialEq>(input: &[T], min_length: usize) -> &[T] {
59 let num_zeroes = input.iter().rev().take_while(|&a| *a == T::from(0)).count();
60 &input[0..cmp::max(input.len() - num_zeroes, min_length)]
61 }
62
63 /// How many extra bits the huffman length code uses to represent a value.
extra_bits_for_huffman_length_code(code: u8) -> u864 fn extra_bits_for_huffman_length_code(code: u8) -> u8 {
65 match code {
66 16...17 => 3,
67 18 => 7,
68 _ => 0,
69 }
70 }
71
72 /// Calculate how many bits the huffman-encoded huffman lengths will use.
calculate_huffman_length(frequencies: &[FrequencyType], code_lengths: &[u8]) -> u6473 fn calculate_huffman_length(frequencies: &[FrequencyType], code_lengths: &[u8]) -> u64 {
74 frequencies.iter().zip(code_lengths).enumerate().fold(
75 0,
76 |acc, (n, (&f, &l))| {
77 acc +
78 (u64::from(f) *
79 (u64::from(l) + u64::from(extra_bits_for_huffman_length_code(n as u8))))
80 },
81 )
82 }
83
84 /// Calculate how many bits data with the given frequencies will use when compressed with dynamic
85 /// code lengths (first return value) and static code lengths (second return value).
86 ///
87 /// Parameters:
88 /// Frequencies, length of dynamic codes, and a function to get how many extra bits in addition
89 /// to the length of the huffman code the symbol will use.
calculate_block_length<F>( frequencies: &[FrequencyType], dyn_code_lengths: &[u8], get_num_extra_bits: &F, ) -> (u64, u64) where F: Fn(usize) -> u64,90 fn calculate_block_length<F>(
91 frequencies: &[FrequencyType],
92 dyn_code_lengths: &[u8],
93 get_num_extra_bits: &F,
94 ) -> (u64, u64)
95 where
96 F: Fn(usize) -> u64,
97 {
98 // Length of data represented by dynamic codes.
99 let mut d_ll_length = 0u64;
100 // length of data represented by static codes.
101 let mut s_ll_length = 0u64;
102
103 let iter = frequencies
104 .iter()
105 .zip(dyn_code_lengths.iter().zip(FIXED_CODE_LENGTHS.iter()))
106 .enumerate();
107
108 // This could maybe be optimised a bit by splitting the iteration of codes using extra bits and
109 // codes not using extra bits, but the extra complexity may not be worth it.
110 for (c, (&f, (&l, &fl))) in iter {
111 // Frequency
112 let f = u64::from(f);
113 // How many extra bits the current code number needs.
114 let extra_bits_for_code = get_num_extra_bits(c);
115
116 d_ll_length += f * (u64::from(l) + extra_bits_for_code);
117 s_ll_length += f * (u64::from(fl) + extra_bits_for_code);
118
119 }
120
121 (d_ll_length, s_ll_length)
122 }
123
124 /// Get how extra padding bits after a block start header a stored block would use.
125 ///
126 /// # Panics
127 /// Panics if `pending_bits > 8`
stored_padding(pending_bits: u8) -> u64128 fn stored_padding(pending_bits: u8) -> u64 {
129 assert!(pending_bits <= 8);
130 let free_space = 8 - pending_bits;
131 if free_space >= BLOCK_MARKER_LENGTH {
132 // There is space in the current byte for the header.
133 free_space - BLOCK_MARKER_LENGTH
134 } else {
135 // The header will require an extra byte.
136 8 - (BLOCK_MARKER_LENGTH - free_space)
137 }.into()
138 }
139
140 /// Calculate the number of bits storing the data in stored blocks will take up, excluding the
141 /// first block start code and potential padding bits. As stored blocks have a maximum length,
142 /// (as opposed to fixed and dynamic ones), multiple blocks may have to be utilised.
143 ///
144 /// # Panics
145 /// Panics if `input_bytes` is 0.
stored_length(input_bytes: u64) -> u64146 fn stored_length(input_bytes: u64) -> u64 {
147 // Check how many stored blocks these bytes would take up.
148 // (Integer divison rounding up.)
149 let num_blocks = (input_bytes
150 .checked_sub(1)
151 .expect("Underflow calculating stored block length!") /
152 MAX_STORED_BLOCK_LENGTH as u64) + 1;
153 // The length will be the input length and the headers for each block. (Excluding the start
154 // of block code for the first one)
155 (input_bytes + (STORED_BLOCK_HEADER_LENGTH as u64 * num_blocks) + (num_blocks - 1)) * 8
156 }
157
158 pub enum BlockType {
159 Stored,
160 Fixed,
161 Dynamic(DynamicBlockHeader),
162 }
163
164 /// A struct containing the different data needed to write the header for a dynamic block.
165 ///
166 /// The code lengths are stored directly in the `HuffmanTable` struct.
167 /// TODO: Do the same for other things here.
168 pub struct DynamicBlockHeader {
169 /// Length of the run-length encoding symbols.
170 pub huffman_table_lengths: Vec<u8>,
171 /// Number of lengths for values describing the huffman table that encodes the length values
172 /// of the main huffman tables.
173 pub used_hclens: usize,
174 }
175
176 /// Generate the lengths of the huffman codes we will be using, using the
177 /// frequency of the different symbols/lengths/distances, and determine what block type will give
178 /// the shortest representation.
179 /// TODO: This needs a test
gen_huffman_lengths( l_freqs: &[FrequencyType], d_freqs: &[FrequencyType], num_input_bytes: u64, pending_bits: u8, l_lengths: &mut [u8; 288], d_lengths: &mut [u8; 32], length_buffers: &mut LengthBuffers, ) -> BlockType180 pub fn gen_huffman_lengths(
181 l_freqs: &[FrequencyType],
182 d_freqs: &[FrequencyType],
183 num_input_bytes: u64,
184 pending_bits: u8,
185 l_lengths: &mut [u8; 288],
186 d_lengths: &mut [u8; 32],
187 length_buffers: &mut LengthBuffers,
188 ) -> BlockType {
189 // Avoid corner cases and issues if this is called for an empty block.
190 // For blocks this short, a fixed block will be the shortest.
191 // TODO: Find the minimum value it's worth doing calculations for.
192 if num_input_bytes <= 4 {
193 return BlockType::Fixed;
194 };
195
196 let l_freqs = remove_trailing_zeroes(l_freqs, MIN_NUM_LITERALS_AND_LENGTHS);
197 let d_freqs = remove_trailing_zeroes(d_freqs, MIN_NUM_DISTANCES);
198
199 // The huffman spec allows us to exclude zeroes at the end of the
200 // table of huffman lengths.
201 // Since a frequency of 0 will give an huffman
202 // length of 0. We strip off the trailing zeroes before even
203 // generating the lengths to save some work.
204 // There is however a minimum number of values we have to keep
205 // according to the deflate spec.
206 // TODO: We could probably compute some of this in parallel.
207 huffman_lengths_from_frequency_m(
208 l_freqs,
209 MAX_CODE_LENGTH,
210 &mut length_buffers.leaf_buf,
211 l_lengths,
212 );
213 huffman_lengths_from_frequency_m(
214 d_freqs,
215 MAX_CODE_LENGTH,
216 &mut length_buffers.leaf_buf,
217 d_lengths,
218 );
219
220
221 let used_lengths = l_freqs.len();
222 let used_distances = d_freqs.len();
223
224 // Encode length values
225 let mut freqs = [0u16; 19];
226 encode_lengths_m(
227 l_lengths[..used_lengths]
228 .iter()
229 .chain(&d_lengths[..used_distances]),
230 &mut length_buffers.length_buf,
231 &mut freqs,
232 );
233
234 // Create huffman lengths for the length/distance code lengths
235 let mut huffman_table_lengths = vec![0; freqs.len()];
236 huffman_lengths_from_frequency_m(
237 &freqs,
238 MAX_HUFFMAN_CODE_LENGTH,
239 &mut length_buffers.leaf_buf,
240 huffman_table_lengths.as_mut_slice(),
241 );
242
243 // Count how many of these lengths we use.
244 let used_hclens = HUFFMAN_LENGTH_ORDER.len() -
245 HUFFMAN_LENGTH_ORDER
246 .iter()
247 .rev()
248 .take_while(|&&n| huffman_table_lengths[n as usize] == 0)
249 .count();
250
251 // There has to be at least 4 hclens, so if there isn't, something went wrong.
252 debug_assert!(used_hclens >= 4);
253
254 // Calculate how many bytes of space this block will take up with the different block types
255 // (excluding the 3-bit block header since it's used in all block types).
256
257 // Total length of the compressed literals/lengths.
258 let (d_ll_length, s_ll_length) = calculate_block_length(l_freqs, l_lengths, &|c| {
259 num_extra_bits_for_length_code(c.saturating_sub(LENGTH_BITS_START as usize) as u8).into()
260 });
261
262 // Total length of the compressed distances.
263 let (d_dist_length, s_dist_length) = calculate_block_length(d_freqs, d_lengths, &|c| {
264 num_extra_bits_for_distance_code(c as u8).into()
265 });
266
267 // Total length of the compressed huffman code lengths.
268 let huff_table_length = calculate_huffman_length(&freqs, &huffman_table_lengths);
269
270 // For dynamic blocks the huffman tables takes up some extra space.
271 let dynamic_length = d_ll_length + d_dist_length + huff_table_length +
272 (used_hclens as u64 * 3) + u64::from(HLIT_BITS) +
273 u64::from(HDIST_BITS) + u64::from(HCLEN_BITS);
274
275 // Static blocks don't have any extra header data.
276 let static_length = s_ll_length + s_dist_length;
277
278 // Calculate how many bits it will take to store the data in uncompressed (stored) block(s).
279 let stored_length = stored_length(num_input_bytes) + stored_padding(pending_bits % 8);
280
281 let used_length = cmp::min(cmp::min(dynamic_length, static_length), stored_length);
282
283 // Check if the block is actually compressed. If using a dynamic block
284 // increases the length of the block (for instance if the input data is mostly random or
285 // already compressed), we want to output a stored(uncompressed) block instead to avoid wasting
286 // space.
287 if used_length == static_length {
288 BlockType::Fixed
289 } else if used_length == stored_length {
290 BlockType::Stored
291 } else {
292 BlockType::Dynamic(DynamicBlockHeader {
293 huffman_table_lengths: huffman_table_lengths,
294 used_hclens: used_hclens,
295 })
296 }
297 }
298
299 /// Write the specified huffman lengths to the bit writer
write_huffman_lengths( header: &DynamicBlockHeader, huffman_table: &HuffmanTable, encoded_lengths: &[EncodedLength], writer: &mut LsbWriter, )300 pub fn write_huffman_lengths(
301 header: &DynamicBlockHeader,
302 huffman_table: &HuffmanTable,
303 encoded_lengths: &[EncodedLength],
304 writer: &mut LsbWriter,
305 ) {
306 // Ignore trailing zero lengths as allowed by the deflate spec.
307 let (literal_len_lengths, distance_lengths) = huffman_table.get_lengths();
308 let literal_len_lengths =
309 remove_trailing_zeroes(literal_len_lengths, MIN_NUM_LITERALS_AND_LENGTHS);
310 let distance_lengths = remove_trailing_zeroes(distance_lengths, MIN_NUM_DISTANCES);
311 let huffman_table_lengths = &header.huffman_table_lengths;
312 let used_hclens = header.used_hclens;
313
314 assert!(literal_len_lengths.len() <= NUM_LITERALS_AND_LENGTHS);
315 assert!(literal_len_lengths.len() >= MIN_NUM_LITERALS_AND_LENGTHS);
316 assert!(distance_lengths.len() <= NUM_DISTANCE_CODES);
317 assert!(distance_lengths.len() >= MIN_NUM_DISTANCES);
318
319 // Number of length codes - 257.
320 let hlit = (literal_len_lengths.len() - MIN_NUM_LITERALS_AND_LENGTHS) as u16;
321 writer.write_bits(hlit, HLIT_BITS);
322 // Number of distance codes - 1.
323 let hdist = (distance_lengths.len() - MIN_NUM_DISTANCES) as u16;
324 writer.write_bits(hdist, HDIST_BITS);
325
326 // Number of huffman table lengths - 4.
327 let hclen = used_hclens.saturating_sub(4);
328
329 // Write HCLEN.
330 // Casting to u16 is safe since the length can never be more than the length of
331 // `HUFFMAN_LENGTH_ORDER` anyhow.
332 writer.write_bits(hclen as u16, HCLEN_BITS);
333
334 // Write the lengths for the huffman table describing the huffman table
335 // Each length is 3 bits
336 for n in &HUFFMAN_LENGTH_ORDER[..used_hclens] {
337 writer.write_bits(huffman_table_lengths[usize::from(*n)] as u16, 3);
338 }
339
340 // Generate codes for the main huffman table using the lengths we just wrote
341 let mut codes = [0u16; NUM_HUFFMAN_LENGTHS];
342 create_codes_in_place(&mut codes[..], huffman_table_lengths);
343
344 // Write the actual huffman lengths
345 for v in encoded_lengths {
346 match *v {
347 EncodedLength::Length(n) => {
348 let (c, l) = (codes[usize::from(n)], huffman_table_lengths[usize::from(n)]);
349 writer.write_bits(c, l);
350 }
351 EncodedLength::CopyPrevious(n) => {
352 let (c, l) = (codes[COPY_PREVIOUS], huffman_table_lengths[COPY_PREVIOUS]);
353 writer.write_bits(c, l);
354 debug_assert!(n >= 3);
355 debug_assert!(n <= 6);
356 writer.write_bits((n - 3).into(), 2);
357 }
358 EncodedLength::RepeatZero3Bits(n) => {
359 let (c, l) = (
360 codes[REPEAT_ZERO_3_BITS],
361 huffman_table_lengths[REPEAT_ZERO_3_BITS],
362 );
363 writer.write_bits(c, l);
364 debug_assert!(n >= 3);
365 writer.write_bits((n - 3).into(), 3);
366 }
367 EncodedLength::RepeatZero7Bits(n) => {
368 let (c, l) = (
369 codes[REPEAT_ZERO_7_BITS],
370 huffman_table_lengths[REPEAT_ZERO_7_BITS],
371 );
372 writer.write_bits(c, l);
373 debug_assert!(n >= 11);
374 debug_assert!(n <= 138);
375 writer.write_bits((n - 11).into(), 7);
376 }
377 }
378 }
379 }
380
381
382 #[cfg(test)]
383 mod test {
384 use super::stored_padding;
385 #[test]
padding()386 fn padding() {
387 assert_eq!(stored_padding(0), 5);
388 assert_eq!(stored_padding(1), 4);
389 assert_eq!(stored_padding(2), 3);
390 assert_eq!(stored_padding(3), 2);
391 assert_eq!(stored_padding(4), 1);
392 assert_eq!(stored_padding(5), 0);
393 assert_eq!(stored_padding(6), 7);
394 assert_eq!(stored_padding(7), 6);
395 }
396 }
397