1 use std::u16;
2 
3 use lzvalue::LZValue;
4 use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
5                     get_distance_code, get_length_code};
6 
7 /// The type used for representing how many times a literal, length or distance code has been ouput
8 /// to the current buffer.
9 /// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
10 /// 16-bit values.
11 pub type FrequencyType = u16;
12 
13 /// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
14 /// number of literals/lengths output before a new block is started.
15 /// This should not be larger than the maximum value `FrequencyType` can represent to prevent
16 /// overflowing (which would degrade, or in the worst case break compression).
17 pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
18 
19 #[derive(Debug, PartialEq)]
20 pub enum BufferStatus {
21     NotFull,
22     Full,
23 }
24 
25 /// Struct that buffers lz77 data and keeps track of the usage of different codes
26 pub struct DynamicWriter {
27     buffer: Vec<LZValue>,
28     // The two last length codes are not actually used, but only participates in code construction
29     // Therefore, we ignore them to get the correct number of lengths
30     frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
31     distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
32 }
33 
34 impl DynamicWriter {
35     #[inline]
check_buffer_length(&self) -> BufferStatus36     pub fn check_buffer_length(&self) -> BufferStatus {
37         if self.buffer.len() >= MAX_BUFFER_LENGTH {
38             BufferStatus::Full
39         } else {
40             BufferStatus::NotFull
41         }
42     }
43 
44     #[inline]
write_literal(&mut self, literal: u8) -> BufferStatus45     pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
46         debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
47         self.buffer.push(LZValue::literal(literal));
48         self.frequencies[usize::from(literal)] += 1;
49         self.check_buffer_length()
50     }
51 
52     #[inline]
write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus53     pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
54         self.buffer.push(LZValue::length_distance(length, distance));
55         let l_code_num = get_length_code(length);
56         // As we limit the buffer to 2^16 values, this should be safe from overflowing.
57         self.frequencies[l_code_num] += 1;
58 
59         let d_code_num = get_distance_code(distance);
60         // The compiler seems to be able to evade the bounds check here somehow.
61         self.distance_frequencies[usize::from(d_code_num)] += 1;
62         self.check_buffer_length()
63     }
64 
buffer_length(&self) -> usize65     pub fn buffer_length(&self) -> usize {
66         self.buffer.len()
67     }
68 
get_buffer(&self) -> &[LZValue]69     pub fn get_buffer(&self) -> &[LZValue] {
70         &self.buffer
71     }
72 
new() -> DynamicWriter73     pub fn new() -> DynamicWriter {
74         let mut w = DynamicWriter {
75             buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
76             frequencies: [0; NUM_LITERALS_AND_LENGTHS],
77             distance_frequencies: [0; NUM_DISTANCE_CODES],
78         };
79         // This will always be 1,
80         // since there will always only be one end of block marker in each block
81         w.frequencies[END_OF_BLOCK_POSITION] = 1;
82         w
83     }
84 
85     /// Special output function used with RLE compression
86     /// that avoids bothering to lookup a distance code.
87     #[inline]
write_length_rle(&mut self, length: u16) -> BufferStatus88     pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
89         self.buffer.push(LZValue::length_distance(length, 1));
90         let l_code_num = get_length_code(length);
91         // As we limit the buffer to 2^16 values, this should be safe from overflowing.
92         if cfg!(debug_assertions) {
93             self.frequencies[l_code_num] += 1;
94         } else {
95             // #Safety
96             // None of the values in the table of length code numbers will give a value
97             // that is out of bounds.
98             // There is a test to ensure that these functions won't produce too large values.
99             unsafe {
100                 *self.frequencies.get_unchecked_mut(l_code_num) += 1;
101             }
102         }
103         self.distance_frequencies[0] += 1;
104         self.check_buffer_length()
105     }
106 
get_frequencies(&self) -> (&[u16], &[u16])107     pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
108         (&self.frequencies, &self.distance_frequencies)
109     }
110 
clear_frequencies(&mut self)111     pub fn clear_frequencies(&mut self) {
112         self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
113         self.distance_frequencies = [0; NUM_DISTANCE_CODES];
114         self.frequencies[END_OF_BLOCK_POSITION] = 1;
115     }
116 
clear_data(&mut self)117     pub fn clear_data(&mut self) {
118         self.buffer.clear()
119     }
120 
clear(&mut self)121     pub fn clear(&mut self) {
122         self.clear_frequencies();
123         self.clear_data();
124     }
125 }
126 
127 #[cfg(test)]
128 mod test {
129     use super::*;
130     use huffman_table::{get_length_code, get_distance_code};
131     #[test]
132     /// Ensure that these function won't produce values that would overflow the output_writer
133     /// tables since we use some unsafe indexing.
array_bounds()134     fn array_bounds() {
135         let w = DynamicWriter::new();
136 
137         for i in 0..u16::max_value() {
138             assert!(get_length_code(i) < w.frequencies.len());
139         }
140 
141         for i in 0..u16::max_value() {
142             assert!(get_distance_code(i) < w.distance_frequencies.len() as u8);
143         }
144     }
145 }
146