1 // Copyright 2017 Brian Langenberger
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 //! Traits and implementations for reading or writing Huffman codes
10 //! from or to a stream.
11
12 #![warn(missing_docs)]
13
14 use super::BitQueue;
15 use super::Endianness;
16 use std::collections::BTreeMap;
17 use std::fmt;
18 use std::marker::PhantomData;
19
20 /// A compiled Huffman tree element for use with the `read_huffman` method.
21 /// Returned by `compile_read_tree`.
22 ///
23 /// Compiled read trees are optimized for faster lookup
24 /// and are therefore endian-specific.
25 ///
26 /// In addition, each symbol in the source tree may occur many times
27 /// in the compiled tree. If symbols require a nontrivial amount of space,
28 /// consider using reference counting so that they may be cloned
29 /// more efficiently.
30 pub enum ReadHuffmanTree<E: Endianness, T: Clone> {
31 /// The final value and new reader state
32 Done(T, u8, u32, PhantomData<E>),
33 /// Another byte is necessary to determine final value
34 Continue(Box<[ReadHuffmanTree<E, T>]>),
35 /// An invalid reader state has been used
36 InvalidState,
37 }
38
39 /// Given a vector of symbol/code pairs, compiles a Huffman tree
40 /// for reading.
41 ///
42 /// Code must be 0 or 1 bits and are always read from the stream
43 /// from least-significant in the list to most signficant
44 /// (which makes them easier to read for humans).
45 ///
46 /// All possible codes must be assigned some symbol,
47 /// and it is acceptable for the same symbol to occur multiple times.
48 ///
49 /// ## Examples
50 /// ```
51 /// use bitstream_io::huffman::compile_read_tree;
52 /// use bitstream_io::BigEndian;
53 /// assert!(compile_read_tree::<BigEndian,i32>(
54 /// vec![(1, vec![0]),
55 /// (2, vec![1, 0]),
56 /// (3, vec![1, 1])]).is_ok());
57 /// ```
58 ///
59 /// ```
60 /// use std::io::{Read, Cursor};
61 /// use bitstream_io::{BigEndian, BitReader, HuffmanRead};
62 /// use bitstream_io::huffman::compile_read_tree;
63 /// let tree = compile_read_tree(
64 /// vec![('a', vec![0]),
65 /// ('b', vec![1, 0]),
66 /// ('c', vec![1, 1, 0]),
67 /// ('d', vec![1, 1, 1])]).unwrap();
68 /// let data = [0b10110111];
69 /// let mut cursor = Cursor::new(&data);
70 /// let mut reader = BitReader::endian(&mut cursor, BigEndian);
71 /// assert_eq!(reader.read_huffman(&tree).unwrap(), 'b');
72 /// assert_eq!(reader.read_huffman(&tree).unwrap(), 'c');
73 /// assert_eq!(reader.read_huffman(&tree).unwrap(), 'd');
74 /// ```
compile_read_tree<E, T>( values: Vec<(T, Vec<u8>)>, ) -> Result<Box<[ReadHuffmanTree<E, T>]>, HuffmanTreeError> where E: Endianness, T: Clone,75 pub fn compile_read_tree<E, T>(
76 values: Vec<(T, Vec<u8>)>,
77 ) -> Result<Box<[ReadHuffmanTree<E, T>]>, HuffmanTreeError>
78 where
79 E: Endianness,
80 T: Clone,
81 {
82 let tree = FinalHuffmanTree::new(values)?;
83
84 let mut result = Vec::with_capacity(256);
85 result.extend((0..256).map(|_| ReadHuffmanTree::InvalidState));
86 let queue = BitQueue::from_value(0, 0);
87 let i = queue.to_state();
88 result[i] = compile_queue(queue, &tree);
89 for bits in 1..8 {
90 for value in 0..(1 << bits) {
91 let queue = BitQueue::from_value(value, bits);
92 let i = queue.to_state();
93 result[i] = compile_queue(queue, &tree);
94 }
95 }
96 assert_eq!(result.len(), 256);
97 Ok(result.into_boxed_slice())
98 }
99
compile_queue<E, T>( mut queue: BitQueue<E, u8>, tree: &FinalHuffmanTree<T>, ) -> ReadHuffmanTree<E, T> where E: Endianness, T: Clone,100 fn compile_queue<E, T>(
101 mut queue: BitQueue<E, u8>,
102 tree: &FinalHuffmanTree<T>,
103 ) -> ReadHuffmanTree<E, T>
104 where
105 E: Endianness,
106 T: Clone,
107 {
108 match tree {
109 FinalHuffmanTree::Leaf(ref value) => {
110 let len = queue.len();
111 ReadHuffmanTree::Done(value.clone(), queue.value(), len, PhantomData)
112 }
113 FinalHuffmanTree::Tree(ref bit0, ref bit1) => {
114 if queue.is_empty() {
115 ReadHuffmanTree::Continue(
116 (0..256)
117 .map(|byte| compile_queue(BitQueue::from_value(byte as u8, 8), tree))
118 .collect::<Vec<ReadHuffmanTree<E, T>>>()
119 .into_boxed_slice(),
120 )
121 } else if queue.pop(1) == 0 {
122 compile_queue(queue, bit0)
123 } else {
124 compile_queue(queue, bit1)
125 }
126 }
127 }
128 }
129
130 // A complete Huffman tree with no empty nodes
131 enum FinalHuffmanTree<T: Clone> {
132 Leaf(T),
133 Tree(Box<FinalHuffmanTree<T>>, Box<FinalHuffmanTree<T>>),
134 }
135
136 impl<T: Clone> FinalHuffmanTree<T> {
new(values: Vec<(T, Vec<u8>)>) -> Result<FinalHuffmanTree<T>, HuffmanTreeError>137 fn new(values: Vec<(T, Vec<u8>)>) -> Result<FinalHuffmanTree<T>, HuffmanTreeError> {
138 let mut tree = WipHuffmanTree::new_empty();
139
140 for (symbol, code) in values {
141 tree.add(code.as_slice(), symbol)?;
142 }
143
144 tree.into_read_tree()
145 }
146 }
147
148 // Work-in-progress trees may have empty nodes during construction
149 // but those are not allowed in a finalized tree.
150 // If the user wants some codes to be None or an error symbol of some sort,
151 // those will need to be specified explicitly.
152 enum WipHuffmanTree<T: Clone> {
153 Empty,
154 Leaf(T),
155 Tree(Box<WipHuffmanTree<T>>, Box<WipHuffmanTree<T>>),
156 }
157
158 impl<T: Clone> WipHuffmanTree<T> {
new_empty() -> WipHuffmanTree<T>159 fn new_empty() -> WipHuffmanTree<T> {
160 WipHuffmanTree::Empty
161 }
162
new_leaf(value: T) -> WipHuffmanTree<T>163 fn new_leaf(value: T) -> WipHuffmanTree<T> {
164 WipHuffmanTree::Leaf(value)
165 }
166
new_tree() -> WipHuffmanTree<T>167 fn new_tree() -> WipHuffmanTree<T> {
168 WipHuffmanTree::Tree(Box::new(Self::new_empty()), Box::new(Self::new_empty()))
169 }
170
into_read_tree(self) -> Result<FinalHuffmanTree<T>, HuffmanTreeError>171 fn into_read_tree(self) -> Result<FinalHuffmanTree<T>, HuffmanTreeError> {
172 match self {
173 WipHuffmanTree::Empty => Err(HuffmanTreeError::MissingLeaf),
174 WipHuffmanTree::Leaf(v) => Ok(FinalHuffmanTree::Leaf(v)),
175 WipHuffmanTree::Tree(zero, one) => {
176 let zero = zero.into_read_tree()?;
177 let one = one.into_read_tree()?;
178 Ok(FinalHuffmanTree::Tree(Box::new(zero), Box::new(one)))
179 }
180 }
181 }
182
add(&mut self, code: &[u8], symbol: T) -> Result<(), HuffmanTreeError>183 fn add(&mut self, code: &[u8], symbol: T) -> Result<(), HuffmanTreeError> {
184 match self {
185 WipHuffmanTree::Empty => {
186 if code.is_empty() {
187 *self = WipHuffmanTree::new_leaf(symbol);
188 Ok(())
189 } else {
190 *self = WipHuffmanTree::new_tree();
191 self.add(code, symbol)
192 }
193 }
194 WipHuffmanTree::Leaf(_) => Err(if code.is_empty() {
195 HuffmanTreeError::DuplicateLeaf
196 } else {
197 HuffmanTreeError::OrphanedLeaf
198 }),
199 WipHuffmanTree::Tree(ref mut zero, ref mut one) => {
200 if code.is_empty() {
201 Err(HuffmanTreeError::DuplicateLeaf)
202 } else {
203 match code[0] {
204 0 => zero.add(&code[1..], symbol),
205 1 => one.add(&code[1..], symbol),
206 _ => Err(HuffmanTreeError::InvalidBit),
207 }
208 }
209 }
210 }
211 }
212 }
213
214 /// An error type during Huffman tree compilation.
215 #[derive(PartialEq, Copy, Clone, Debug)]
216 pub enum HuffmanTreeError {
217 /// One of the bits in a Huffman code is not 0 or 1
218 InvalidBit,
219 /// A Huffman code in the specification has no defined symbol
220 MissingLeaf,
221 /// The same Huffman code specifies multiple symbols
222 DuplicateLeaf,
223 /// A Huffman code is the prefix of some longer code
224 OrphanedLeaf,
225 }
226
227 impl fmt::Display for HuffmanTreeError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result228 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
229 match *self {
230 HuffmanTreeError::InvalidBit => write!(f, "invalid bit in code"),
231 HuffmanTreeError::MissingLeaf => write!(f, "missing leaf node in specification"),
232 HuffmanTreeError::DuplicateLeaf => write!(f, "duplicate leaf node in specification"),
233 HuffmanTreeError::OrphanedLeaf => write!(f, "orphaned leaf node in specification"),
234 }
235 }
236 }
237
238 impl std::error::Error for HuffmanTreeError {}
239
240 /// Given a vector of symbol/code pairs, compiles a Huffman tree
241 /// for writing.
242 ///
243 /// Code must be 0 or 1 bits and are always written to the stream
244 /// from least-significant in the list to most signficant
245 /// (which makes them easier to read for humans).
246 ///
247 /// If the same symbol occurs multiple times, the first code is used.
248 /// Unlike in read trees, not all possible codes need to be
249 /// assigned a symbol.
250 ///
251 /// ## Examples
252 /// ```
253 /// use bitstream_io::huffman::compile_write_tree;
254 /// use bitstream_io::BigEndian;
255 /// assert!(compile_write_tree::<BigEndian,i32>(
256 /// vec![(1, vec![0]),
257 /// (2, vec![1, 0]),
258 /// (3, vec![1, 1])]).is_ok());
259 /// ```
260 ///
261 /// ```
262 /// use std::io::Write;
263 /// use bitstream_io::{BigEndian, BitWriter, HuffmanWrite};
264 /// use bitstream_io::huffman::compile_write_tree;
265 /// let tree = compile_write_tree(
266 /// vec![('a', vec![0]),
267 /// ('b', vec![1, 0]),
268 /// ('c', vec![1, 1, 0]),
269 /// ('d', vec![1, 1, 1])]).unwrap();
270 /// let mut data = Vec::new();
271 /// {
272 /// let mut writer = BitWriter::endian(&mut data, BigEndian);
273 /// writer.write_huffman(&tree, 'b').unwrap();
274 /// writer.write_huffman(&tree, 'c').unwrap();
275 /// writer.write_huffman(&tree, 'd').unwrap();
276 /// }
277 /// assert_eq!(data, [0b10110111]);
278 /// ```
compile_write_tree<E, T>( values: Vec<(T, Vec<u8>)>, ) -> Result<WriteHuffmanTree<E, T>, HuffmanTreeError> where E: Endianness, T: Ord + Clone,279 pub fn compile_write_tree<E, T>(
280 values: Vec<(T, Vec<u8>)>,
281 ) -> Result<WriteHuffmanTree<E, T>, HuffmanTreeError>
282 where
283 E: Endianness,
284 T: Ord + Clone,
285 {
286 let mut map = BTreeMap::new();
287
288 for (symbol, code) in values {
289 let mut encoded = Vec::new();
290 for bits in code.chunks(32) {
291 let mut acc = BitQueue::<E, u32>::new();
292 for bit in bits {
293 match *bit {
294 0 => acc.push(1, 0),
295 1 => acc.push(1, 1),
296 _ => return Err(HuffmanTreeError::InvalidBit),
297 }
298 }
299 let len = acc.len();
300 encoded.push((len, acc.value()))
301 }
302 map.entry(symbol)
303 .or_insert_with(|| encoded.into_boxed_slice());
304 }
305
306 Ok(WriteHuffmanTree {
307 map,
308 phantom: PhantomData,
309 })
310 }
311
312 /// A compiled Huffman tree for use with the `write_huffman` method.
313 /// Returned by `compiled_write_tree`.
314 pub struct WriteHuffmanTree<E: Endianness, T: Ord> {
315 map: BTreeMap<T, Box<[(u32, u32)]>>,
316 phantom: PhantomData<E>,
317 }
318
319 impl<E: Endianness, T: Ord + Clone> WriteHuffmanTree<E, T> {
320 /// Returns true if symbol is in tree.
321 #[inline]
has_symbol(&self, symbol: &T) -> bool322 pub fn has_symbol(&self, symbol: &T) -> bool {
323 self.map.contains_key(symbol)
324 }
325
326 /// Given symbol, returns iterator of
327 /// (bits, value) pairs for writing code.
328 /// Panics if symbol is not found.
329 #[inline]
get(&self, symbol: &T) -> impl Iterator<Item = &(u32, u32)>330 pub fn get(&self, symbol: &T) -> impl Iterator<Item = &(u32, u32)> {
331 self.map[symbol].iter()
332 }
333 }
334