1 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
4 // option. This file may not be copied, modified, or distributed
5 // except according to those terms.
6 
7 use crate::huffman_decode_helper::{HuffmanDecoderNode, HUFFMAN_DECODE_ROOT};
8 use crate::huffman_table::HUFFMAN_TABLE;
9 use crate::{Error, Res};
10 use std::convert::TryFrom;
11 
12 struct BitReader<'a> {
13     input: &'a [u8],
14     offset: usize,
15     current_bit: u8,
16 }
17 
18 impl<'a> BitReader<'a> {
new(input: &'a [u8]) -> Self19     pub fn new(input: &'a [u8]) -> Self {
20         BitReader {
21             input,
22             offset: 0,
23             current_bit: 8,
24         }
25     }
26 
read_bit(&mut self) -> Res<u8>27     pub fn read_bit(&mut self) -> Res<u8> {
28         if self.input.len() == self.offset {
29             return Err(Error::NeedMoreData);
30         }
31 
32         if self.current_bit == 0 {
33             self.offset += 1;
34             if self.offset == self.input.len() {
35                 return Err(Error::NeedMoreData);
36             }
37             self.current_bit = 8;
38         }
39         self.current_bit -= 1;
40         Ok((self.input[self.offset] >> self.current_bit) & 0x01)
41     }
42 
verify_ending(&mut self, i: u8) -> Res<()>43     pub fn verify_ending(&mut self, i: u8) -> Res<()> {
44         if (i + self.current_bit) > 7 {
45             return Err(Error::HuffmanDecompressionFailed);
46         }
47 
48         if self.input.is_empty() {
49             Ok(())
50         } else if self.offset != self.input.len() {
51             Err(Error::HuffmanDecompressionFailed)
52         } else if self.input[self.input.len() - 1] & ((0x1 << (i + self.current_bit)) - 1)
53             == ((0x1 << (i + self.current_bit)) - 1)
54         {
55             self.current_bit = 0;
56             Ok(())
57         } else {
58             Err(Error::HuffmanDecompressionFailed)
59         }
60     }
61 
has_more_data(&self) -> bool62     pub fn has_more_data(&self) -> bool {
63         !self.input.is_empty() && (self.offset != self.input.len() || (self.current_bit != 0))
64     }
65 }
66 
67 /// Decodes huffman encoded input.
68 /// # Errors
69 /// This function may return `HuffmanDecompressionFailed` if `input` is not a correct huffman-encoded array of bits.
70 /// # Panics
71 /// Never, but rust can't know that.
decode_huffman(input: &[u8]) -> Res<Vec<u8>>72 pub fn decode_huffman(input: &[u8]) -> Res<Vec<u8>> {
73     let mut reader = BitReader::new(input);
74     let mut output = Vec::new();
75     while reader.has_more_data() {
76         if let Some(c) = decode_character(&mut reader)? {
77             if c == 256 {
78                 return Err(Error::HuffmanDecompressionFailed);
79             }
80             output.push(u8::try_from(c).unwrap());
81         }
82     }
83 
84     Ok(output)
85 }
86 
decode_character(reader: &mut BitReader) -> Res<Option<u16>>87 fn decode_character(reader: &mut BitReader) -> Res<Option<u16>> {
88     let mut node: &HuffmanDecoderNode = &HUFFMAN_DECODE_ROOT;
89     let mut i = 0;
90     while node.value.is_none() {
91         match reader.read_bit() {
92             Err(_) => {
93                 reader.verify_ending(i)?;
94                 return Ok(None);
95             }
96             Ok(b) => {
97                 i += 1;
98                 if let Some(next) = &node.next[usize::from(b)] {
99                     node = &next;
100                 } else {
101                     reader.verify_ending(i)?;
102                     return Ok(None);
103                 }
104             }
105         }
106     }
107     debug_assert!(node.value.is_some());
108     Ok(node.value)
109 }
110 
111 /// # Panics
112 /// Never, but rust doesn't know that.
113 #[must_use]
encode_huffman(input: &[u8]) -> Vec<u8>114 pub fn encode_huffman(input: &[u8]) -> Vec<u8> {
115     let mut output: Vec<u8> = Vec::new();
116     let mut left: u8 = 8;
117     let mut saved: u8 = 0;
118     for c in input {
119         let mut e = HUFFMAN_TABLE[*c as usize];
120 
121         // Fill the previous byte
122         if e.len < left {
123             let b = u8::try_from(e.val & 0xFF).unwrap();
124             saved |= b << (left - e.len);
125             left -= e.len;
126             e.len = 0;
127         } else {
128             let v: u8 = u8::try_from(e.val >> (e.len - left)).unwrap();
129             saved |= v;
130             output.push(saved);
131             e.len -= left;
132             left = 8;
133             saved = 0;
134         }
135 
136         // Write full bytes
137         while e.len >= 8 {
138             let v: u8 = u8::try_from((e.val >> (e.len - 8)) & 0xFF).unwrap();
139             output.push(v);
140             e.len -= 8;
141         }
142 
143         // Write the rest into saved.
144         if e.len > 0 {
145             saved = u8::try_from(e.val & ((1 << e.len) - 1)).unwrap() << (8 - e.len);
146             left = 8 - e.len;
147         }
148     }
149 
150     if left < 8 {
151         let v: u8 = (1 << left) - 1;
152         saved |= v;
153         output.push(saved);
154     }
155 
156     output
157 }
158 
159 #[cfg(test)]
160 mod tests {
161     use super::{decode_huffman, encode_huffman, Error};
162 
163     struct TestElement {
164         pub val: &'static [u8],
165         pub res: &'static [u8],
166     }
167     const TEST_CASES: &[TestElement] = &[
168         TestElement {
169             val: b"www.example.com",
170             res: &[
171                 0xf1, 0xe3, 0xc2, 0xe5, 0xf2, 0x3a, 0x6b, 0xa0, 0xab, 0x90, 0xf4, 0xff,
172             ],
173         },
174         TestElement {
175             val: b"no-cache",
176             res: &[0xa8, 0xeb, 0x10, 0x64, 0x9c, 0xbf],
177         },
178         TestElement {
179             val: b"custom-key",
180             res: &[0x25, 0xa8, 0x49, 0xe9, 0x5b, 0xa9, 0x7d, 0x7f],
181         },
182         TestElement {
183             val: b"custom-value",
184             res: &[0x25, 0xa8, 0x49, 0xe9, 0x5b, 0xb8, 0xe8, 0xb4, 0xbf],
185         },
186         TestElement {
187             val: b"private",
188             res: &[0xae, 0xc3, 0x77, 0x1a, 0x4b],
189         },
190         TestElement {
191             val: b"Mon, 21 Oct 2013 20:13:21 GMT",
192             res: &[
193                 0xd0, 0x7a, 0xbe, 0x94, 0x10, 0x54, 0xd4, 0x44, 0xa8, 0x20, 0x05, 0x95, 0x04, 0x0b,
194                 0x81, 0x66, 0xe0, 0x82, 0xa6, 0x2d, 0x1b, 0xff,
195             ],
196         },
197         TestElement {
198             val: b"https://www.example.com",
199             res: &[
200                 0x9d, 0x29, 0xad, 0x17, 0x18, 0x63, 0xc7, 0x8f, 0x0b, 0x97, 0xc8, 0xe9, 0xae, 0x82,
201                 0xae, 0x43, 0xd3,
202             ],
203         },
204         TestElement {
205             val: b"Mon, 21 Oct 2013 20:13:22 GMT",
206             res: &[
207                 0xd0, 0x7a, 0xbe, 0x94, 0x10, 0x54, 0xd4, 0x44, 0xa8, 0x20, 0x05, 0x95, 0x04, 0x0b,
208                 0x81, 0x66, 0xe0, 0x84, 0xa6, 0x2d, 0x1b, 0xff,
209             ],
210         },
211         TestElement {
212             val: b"gzip",
213             res: &[0x9b, 0xd9, 0xab],
214         },
215         TestElement {
216             val: b"foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1",
217             res: &[
218                 0x94, 0xe7, 0x82, 0x1d, 0xd7, 0xf2, 0xe6, 0xc7, 0xb3, 0x35, 0xdf, 0xdf, 0xcd, 0x5b,
219                 0x39, 0x60, 0xd5, 0xaf, 0x27, 0x08, 0x7f, 0x36, 0x72, 0xc1, 0xab, 0x27, 0x0f, 0xb5,
220                 0x29, 0x1f, 0x95, 0x87, 0x31, 0x60, 0x65, 0xc0, 0x03, 0xed, 0x4e, 0xe5, 0xb1, 0x06,
221                 0x3d, 0x50, 0x07,
222             ],
223         },
224         TestElement {
225             val: b"<?\\ >",
226             res: &[0xff, 0xf9, 0xfe, 0x7f, 0xff, 0x05, 0x3f, 0xef],
227         },
228     ];
229 
230     const WRONG_END: &[u8] = &[0xa8, 0xeb, 0x10, 0x64, 0x9c, 0xaf];
231 
232     #[test]
test_encoder()233     fn test_encoder() {
234         for e in TEST_CASES {
235             let out = encode_huffman(e.val);
236             assert_eq!(out[..], *e.res);
237         }
238     }
239 
240     #[test]
test_decoder()241     fn test_decoder() {
242         for e in TEST_CASES {
243             let res = decode_huffman(e.res);
244             assert!(res.is_ok());
245             assert_eq!(res.unwrap()[..], *e.val);
246         }
247     }
248 
249     #[test]
decoder_error_wrong_ending()250     fn decoder_error_wrong_ending() {
251         assert_eq!(
252             decode_huffman(WRONG_END),
253             Err(Error::HuffmanDecompressionFailed)
254         );
255     }
256 }
257