1 use std::u16; 2 3 use lzvalue::LZValue; 4 use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION, 5 get_distance_code, get_length_code}; 6 7 /// The type used for representing how many times a literal, length or distance code has been ouput 8 /// to the current buffer. 9 /// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using 10 /// 16-bit values. 11 pub type FrequencyType = u16; 12 13 /// The maximum number of literals/lengths in the buffer, which in practice also means the maximum 14 /// number of literals/lengths output before a new block is started. 15 /// This should not be larger than the maximum value `FrequencyType` can represent to prevent 16 /// overflowing (which would degrade, or in the worst case break compression). 17 pub const MAX_BUFFER_LENGTH: usize = 1024 * 31; 18 19 #[derive(Debug, PartialEq)] 20 pub enum BufferStatus { 21 NotFull, 22 Full, 23 } 24 25 /// Struct that buffers lz77 data and keeps track of the usage of different codes 26 pub struct DynamicWriter { 27 buffer: Vec<LZValue>, 28 // The two last length codes are not actually used, but only participates in code construction 29 // Therefore, we ignore them to get the correct number of lengths 30 frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS], 31 distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES], 32 } 33 34 impl DynamicWriter { 35 #[inline] check_buffer_length(&self) -> BufferStatus36 pub fn check_buffer_length(&self) -> BufferStatus { 37 if self.buffer.len() >= MAX_BUFFER_LENGTH { 38 BufferStatus::Full 39 } else { 40 BufferStatus::NotFull 41 } 42 } 43 44 #[inline] write_literal(&mut self, literal: u8) -> BufferStatus45 pub fn write_literal(&mut self, literal: u8) -> BufferStatus { 46 debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH); 47 self.buffer.push(LZValue::literal(literal)); 48 self.frequencies[usize::from(literal)] += 1; 49 self.check_buffer_length() 50 } 51 52 #[inline] write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus53 pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus { 54 self.buffer.push(LZValue::length_distance(length, distance)); 55 let l_code_num = get_length_code(length); 56 // As we limit the buffer to 2^16 values, this should be safe from overflowing. 57 self.frequencies[l_code_num] += 1; 58 59 let d_code_num = get_distance_code(distance); 60 // The compiler seems to be able to evade the bounds check here somehow. 61 self.distance_frequencies[usize::from(d_code_num)] += 1; 62 self.check_buffer_length() 63 } 64 buffer_length(&self) -> usize65 pub fn buffer_length(&self) -> usize { 66 self.buffer.len() 67 } 68 get_buffer(&self) -> &[LZValue]69 pub fn get_buffer(&self) -> &[LZValue] { 70 &self.buffer 71 } 72 new() -> DynamicWriter73 pub fn new() -> DynamicWriter { 74 let mut w = DynamicWriter { 75 buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), 76 frequencies: [0; NUM_LITERALS_AND_LENGTHS], 77 distance_frequencies: [0; NUM_DISTANCE_CODES], 78 }; 79 // This will always be 1, 80 // since there will always only be one end of block marker in each block 81 w.frequencies[END_OF_BLOCK_POSITION] = 1; 82 w 83 } 84 85 /// Special output function used with RLE compression 86 /// that avoids bothering to lookup a distance code. 87 #[inline] write_length_rle(&mut self, length: u16) -> BufferStatus88 pub fn write_length_rle(&mut self, length: u16) -> BufferStatus { 89 self.buffer.push(LZValue::length_distance(length, 1)); 90 let l_code_num = get_length_code(length); 91 // As we limit the buffer to 2^16 values, this should be safe from overflowing. 92 if cfg!(debug_assertions) { 93 self.frequencies[l_code_num] += 1; 94 } else { 95 // #Safety 96 // None of the values in the table of length code numbers will give a value 97 // that is out of bounds. 98 // There is a test to ensure that these functions won't produce too large values. 99 unsafe { 100 *self.frequencies.get_unchecked_mut(l_code_num) += 1; 101 } 102 } 103 self.distance_frequencies[0] += 1; 104 self.check_buffer_length() 105 } 106 get_frequencies(&self) -> (&[u16], &[u16])107 pub fn get_frequencies(&self) -> (&[u16], &[u16]) { 108 (&self.frequencies, &self.distance_frequencies) 109 } 110 clear_frequencies(&mut self)111 pub fn clear_frequencies(&mut self) { 112 self.frequencies = [0; NUM_LITERALS_AND_LENGTHS]; 113 self.distance_frequencies = [0; NUM_DISTANCE_CODES]; 114 self.frequencies[END_OF_BLOCK_POSITION] = 1; 115 } 116 clear_data(&mut self)117 pub fn clear_data(&mut self) { 118 self.buffer.clear() 119 } 120 clear(&mut self)121 pub fn clear(&mut self) { 122 self.clear_frequencies(); 123 self.clear_data(); 124 } 125 } 126 127 #[cfg(test)] 128 mod test { 129 use super::*; 130 use huffman_table::{get_length_code, get_distance_code}; 131 #[test] 132 /// Ensure that these function won't produce values that would overflow the output_writer 133 /// tables since we use some unsafe indexing. array_bounds()134 fn array_bounds() { 135 let w = DynamicWriter::new(); 136 137 for i in 0..u16::max_value() { 138 assert!(get_length_code(i) < w.frequencies.len()); 139 } 140 141 for i in 0..u16::max_value() { 142 assert!(get_distance_code(i) < w.distance_frequencies.len() as u8); 143 } 144 } 145 } 146