1 /********************************************************
2  Bitstream Library, a module for reading bits of data
3 
4  Copyright (C) 2007-2014  Brian Langenberger
5 
6  The Bitstream Library is free software; you can redistribute it and/or modify
7  it under the terms of either:
8 
9    * the GNU Lesser General Public License as published by the Free
10      Software Foundation; either version 3 of the License, or (at your
11      option) any later version.
12 
13  or
14 
15    * the GNU General Public License as published by the Free Software
16      Foundation; either version 2 of the License, or (at your option) any
17      later version.
18 
19  or both in parallel, as here.
20 
21  The Bitstream Library is distributed in the hope that it will be useful, but
22  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
23  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
24  for more details.
25 
26  You should have received copies of the GNU General Public License and the
27  GNU Lesser General Public License along with the GNU MP Library.  If not,
28  see https://www.gnu.org/licenses/.
29  *******************************************************/
30 
31 #include "bitstream.h"
32 
33 #ifndef __BITSTREAMLIB_HUFFMAN_H__
34 #define __BITSTREAMLIB_HUFFMAN_H__
35 
36 struct huffman_frequency {
37     unsigned int bits;   /*the bits to be consumed
38                            from most-significant to least-significant
39                            in the tree*/
40 
41     unsigned int length; /*the total length of bits leading to the leaf node*/
42 
43     int value;           /*the final value in the leaf node*/
44 };
45 
46 enum {
47     /*triggered by an incomplete Huffman table, such as:
48       [[1], 0,
49        [0, 1], 1]]
50       where the value for [0, 0] isn't specified*/
51     HUFFMAN_MISSING_LEAF = -1,
52 
53     /*triggered by an overfilled Huffman table, such as:
54       [[1], 0,
55        [0, 1], 1,
56        [0, 0], 2,
57        [0, 0], 3]
58       where the value for [0, 0] is specified multiple times*/
59     HUFFMAN_DUPLICATE_LEAF = -2,
60 
61     /*triggered by a Huffman table where leaves are unreachable, such as:
62       [[1], 0,
63        [0], 1,
64        [0, 0], 2,
65        [0, 1], 3]
66       where the values [0, 0] and [0, 1] are unreachable since [0] is a leaf*/
67     HUFFMAN_ORPHANED_LEAF = -3,
68 
69     HUFFMAN_EMPTY_TREE = -4
70 };
71 
72 /*given a set of huffman_frequency values,
73   the total number of frequency values
74   and BS_BIG_ENDIAN or BS_LITTLE_ENDIAN,
75   compiles the Huffman tree into a jump table
76   suitable for use by bitstream->read_huffman_code
77 
78   the jump table must be freed when no longer needed
79 
80   returns the number of rows in the table,
81   or a negative value if there's an error
82   (whose value is taken from the preceding enum)
83 */
84 int compile_br_huffman_table(br_huffman_table_t** table,
85                              struct huffman_frequency* frequencies,
86                              unsigned int total_frequencies,
87                              bs_endianness endianness);
88 
89 
90 /*The bitstream reader operates on jump tables that have been
91   compiled from the original Huffman tree.
92 
93   Using this module as a standalone binary,
94   one can compile JSON source into jump tables suitable
95   for importing into C code.  For example, given the JSON file:
96 
97   [[1], 0,
98    [0, 1], 1,
99    [0, 0, 1], 2,
100    [0, 0, 0], 3]
101 
102   We compile it to a jump table with:
103 
104   % huffman -i table.json > table.h
105 
106   where "table.h" is imported in source code with:
107 
108   struct br_huffman_table table[][0x200] =
109   #include "table.h"
110   ;
111 
112   and called from the bitstream reader with:
113 
114   value = bitstream->read_huffman_code(bitstream, table);
115 
116   This is the preferred method of handling static Huffman trees
117   which are the same for every file in a particular format.
118 
119 
120   However, for Huffman trees which are defined at runtime,
121   one will need to compile the jump table directly.
122   For example, given a set of frequency values:
123 
124   struct huffman_frequency frequencies[] = {
125       {1, 1, 0},  // bits = 1      value = 0
126       {1, 2, 1},  // bits = 0 1    value = 1
127       {1, 3, 2},  // bits = 0 0 1  value = 2
128       {0, 3, 3}}  // bits = 0 0 0  value = 3
129      };
130 
131   we compile them to a table with:
132 
133   struct br_huffman_table (*table)[][0x200];
134   compile_br_huffman_table(&table, frequencies, 4, BS_BIG_ENDIAN);
135 
136   and call that table from the bitstream reader with:
137 
138   value = bitstream->read_huffman_code(bitstream, *table);
139 
140   before finally deallocating the table with:
141 
142   free(table);
143 
144   For real code, the "frequencies" array will be allocated at runtime
145   but can be deallocated immediately once "table" has been compiled.
146   In addition, real code will want to check the return value of
147   "compile_br_huffman_table" in case of an incorrectly specified Huffman tree.
148 */
149 
150 
151 /*given a set of huffman_frequency values,
152   the total number of frequency values
153   and BS_BIG_ENDIAN or BS_LITTLE_ENDIAN,
154   compiles the Huffman tree into a binary tree
155   suitable for use by bitstream->write_huffman_code
156 
157   the table must be deallocated with free(table) when no longer needed
158 
159   returns 0 on success,
160   or a negative value if there's an error
161 */
162 int compile_bw_huffman_table(bw_huffman_table_t** table,
163                              struct huffman_frequency* frequencies,
164                              unsigned int total_frequencies,
165                              bs_endianness endianness);
166 
167 /*returns a string and value into huffman_frequency entry, such as:
168   ("1", 0)   -> bits 1      value = 0
169   ("01", 1)  -> bits 0 1    value = 1
170   ("001", 2) -> bits 0 0 1  value = 2
171   ("000", 3) -> bits 0 0 0  value = 3*/
172 struct huffman_frequency
173 bw_str_to_frequency(const char *s, int value);
174 #endif
175