1 use std::u16;
2 
3 use lzvalue::LZValue;
4 use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
5                     get_distance_code, get_length_code};
6 
7 /// The type used for representing how many times a literal, length or distance code has been ouput
8 /// to the current buffer.
9 /// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
10 /// 16-bit values.
11 pub type FrequencyType = u16;
12 
13 /// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
14 /// number of literals/lengths output before a new block is started.
15 /// This should not be larger than the maximum value `FrequencyType` can represent to prevent
16 /// overflowing (which would degrade, or in the worst case break compression).
17 pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
18 
19 #[derive(Debug, PartialEq)]
20 pub enum BufferStatus {
21     NotFull,
22     Full,
23 }
24 
25 /// Struct that buffers lz77 data and keeps track of the usage of different codes
26 pub struct DynamicWriter {
27     buffer: Vec<LZValue>,
28     // The two last length codes are not actually used, but only participates in code construction
29     // Therefore, we ignore them to get the correct number of lengths
30     frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
31     distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
32 }
33 
34 impl DynamicWriter {
35     #[inline]
check_buffer_length(&self) -> BufferStatus36     pub fn check_buffer_length(&self) -> BufferStatus {
37         if self.buffer.len() >= MAX_BUFFER_LENGTH {
38             BufferStatus::Full
39         } else {
40             BufferStatus::NotFull
41         }
42     }
43 
44     #[inline]
write_literal(&mut self, literal: u8) -> BufferStatus45     pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
46         debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
47         self.buffer.push(LZValue::literal(literal));
48         self.frequencies[usize::from(literal)] += 1;
49         self.check_buffer_length()
50     }
51 
52     #[inline]
write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus53     pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
54         self.buffer.push(LZValue::length_distance(length, distance));
55         let l_code_num = get_length_code(length);
56         // As we limit the buffer to 2^16 values, this should be safe from overflowing.
57         if cfg!(debug_assertions) {
58             self.frequencies[l_code_num] += 1;
59         } else {
60             // #Safety
61             // None of the values in the table of length code numbers will give a value
62             // that is out of bounds.
63             // There is a test to ensure that these functions can not produce too large values.
64             unsafe {
65                 *self.frequencies.get_unchecked_mut(l_code_num) += 1;
66             }
67         }
68         let d_code_num = get_distance_code(distance);
69         // The compiler seems to be able to evade the bounds check here somehow.
70         self.distance_frequencies[usize::from(d_code_num)] += 1;
71         self.check_buffer_length()
72     }
73 
buffer_length(&self) -> usize74     pub fn buffer_length(&self) -> usize {
75         self.buffer.len()
76     }
77 
get_buffer(&self) -> &[LZValue]78     pub fn get_buffer(&self) -> &[LZValue] {
79         &self.buffer
80     }
81 
new() -> DynamicWriter82     pub fn new() -> DynamicWriter {
83         let mut w = DynamicWriter {
84             buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
85             frequencies: [0; NUM_LITERALS_AND_LENGTHS],
86             distance_frequencies: [0; NUM_DISTANCE_CODES],
87         };
88         // This will always be 1,
89         // since there will always only be one end of block marker in each block
90         w.frequencies[END_OF_BLOCK_POSITION] = 1;
91         w
92     }
93 
94     /// Special output function used with RLE compression
95     /// that avoids bothering to lookup a distance code.
96     #[inline]
write_length_rle(&mut self, length: u16) -> BufferStatus97     pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
98         self.buffer.push(LZValue::length_distance(length, 1));
99         let l_code_num = get_length_code(length);
100         // As we limit the buffer to 2^16 values, this should be safe from overflowing.
101         if cfg!(debug_assertions) {
102             self.frequencies[l_code_num] += 1;
103         } else {
104             // #Safety
105             // None of the values in the table of length code numbers will give a value
106             // that is out of bounds.
107             // There is a test to ensure that these functions won't produce too large values.
108             unsafe {
109                 *self.frequencies.get_unchecked_mut(l_code_num) += 1;
110             }
111         }
112         self.distance_frequencies[0] += 1;
113         self.check_buffer_length()
114     }
115 
get_frequencies(&self) -> (&[u16], &[u16])116     pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
117         (&self.frequencies, &self.distance_frequencies)
118     }
119 
clear_frequencies(&mut self)120     pub fn clear_frequencies(&mut self) {
121         self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
122         self.distance_frequencies = [0; NUM_DISTANCE_CODES];
123         self.frequencies[END_OF_BLOCK_POSITION] = 1;
124     }
125 
clear_data(&mut self)126     pub fn clear_data(&mut self) {
127         self.buffer.clear()
128     }
129 
clear(&mut self)130     pub fn clear(&mut self) {
131         self.clear_frequencies();
132         self.clear_data();
133     }
134 }
135 
136 #[cfg(test)]
137 mod test {
138     use super::*;
139     use huffman_table::{get_length_code, get_distance_code};
140     #[test]
141     /// Ensure that these function won't produce values that would overflow the output_writer
142     /// tables since we use some unsafe indexing.
array_bounds()143     fn array_bounds() {
144         let w = DynamicWriter::new();
145 
146         for i in 0..u16::max_value() {
147             get_length_code(i) < w.frequencies.len();
148         }
149 
150         for i in 0..u16::max_value() {
151             get_distance_code(i) < w.distance_frequencies.len() as u8;
152         }
153     }
154 }
155