1 /**
2  * @file huffman.cpp
3  * Huffman codes.
4  *
5  * Uses predetermined, fixed frequencies optimized for short (size < 128)
6  * messages.
7  *
8  * @authors Copyright © 2003-2017 Jaakko Keränen <jaakko.keranen@iki.fi>
9  * @authors Copyright © 2006-2013 Daniel Swanson <danij@dengine.net>
10  *
11  * @par License
12  * LGPL: http://www.gnu.org/licenses/lgpl.html
13  *
14  * <small>This program is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published by
16  * the Free Software Foundation; either version 3 of the License, or (at your
17  * option) any later version. This program is distributed in the hope that it
18  * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
19  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
20  * General Public License for more details. You should have received a copy of
21  * the GNU Lesser General Public License along with this program; if not, see:
22  * http://www.gnu.org/licenses</small>
23  */
24 
25 #include "de/data/huffman.h"
26 #include "de/App"
27 #include "de/Log"
28 #include "de/ByteRefArray"
29 
30 // Heap relations.
31 #define HEAP_PARENT(i)  (((i) + 1)/2 - 1)
32 #define HEAP_LEFT(i)    (2*(i) + 1)
33 #define HEAP_RIGHT(i)   (2*(i) + 2)
34 
35 namespace de {
36 
37 namespace internal {
38 
39 /*
40  * Total number of bytes: 234457 (10217 packets)
41  * Frequencies calculated in Doom II, co-op (1p)
42  */
43 static double freqs[256] = {
44     0.3108032603, 0.0030495997, 0.0035443599, 0.0023202549,
45     0.0018638812, 0.0026188171, 0.0021752390, 0.0027083858,
46     0.0175810490, 0.0011302712, 0.0010748240, 0.0015013414,
47     0.0012241051, 0.0015951752, 0.0012923479, 0.0012795523,
48     0.0011004150, 0.0013477951, 0.0434066801, 0.0016506225,
49     0.0019790409, 0.0017146001, 0.0010108463, 0.0012113095,
50     0.0014629548, 0.0013605906, 0.0015482583, 0.0017103349,
51     0.0024055584, 0.0010151115, 0.0009980508, 0.0011558623,
52     0.0015354628, 0.0012496961, 0.0015141369, 0.0021283220,
53     0.0012241051, 0.0015311976, 0.0010534981, 0.0018510857,
54     0.0013989772, 0.0013563255, 0.0015226673, 0.0012283702,
55     0.0011302712, 0.0010790891, 0.0011601274, 0.0010236419,
56     0.0013008782, 0.0012283702, 0.0013648558, 0.0011132105,
57     0.0012624916, 0.0016165011, 0.0018596160, 0.0030240087,
58     0.0018084340, 0.0013989772, 0.0013179389, 0.0012369006,
59     0.0025932260, 0.0016719484, 0.0016463573, 0.0019406544,
60     0.0122026640, 0.0017401912, 0.0144632065, 0.0403186938,
61     0.0779332671, 0.0014970762, 0.0025207181, 0.0021027310,
62     0.0018681464, 0.0014629548, 0.0014586897, 0.0011985140,
63     0.0013563255, 0.0013094085, 0.0014928110, 0.0014586897,
64     0.0015098717, 0.0014586897, 0.0012070444, 0.0017401912,
65     0.0012454309, 0.0018126991, 0.0022264210, 0.0018297598,
66     0.0027297116, 0.0012496961, 0.0013222041, 0.0016165011,
67     0.0021453827, 0.0024695360, 0.0015994404, 0.0016676832,
68     0.0011814533, 0.0021539131, 0.0013904469, 0.0015269324,
69     0.0023586415, 0.0016420922, 0.0011558623, 0.0013819165,
70     0.0012241051, 0.0013904469, 0.0013136737, 0.0020771399,
71     0.0024865967, 0.0015482583, 0.0011899837, 0.0013136737,
72     0.0012624916, 0.0016250315, 0.0017828429, 0.0014970762,
73     0.0014629548, 0.0017529867, 0.0012411658, 0.0021411176,
74     0.0023671718, 0.0019961016, 0.0015951752, 0.0025974912,
75     0.0013051434, 0.0020728748, 0.0016079708, 0.0021283220,
76     0.0550079546, 0.0033694878, 0.0025889609, 0.0021624434,
77     0.0029728266, 0.0022946638, 0.0021283220, 0.0018510857,
78     0.0020216927, 0.0017700474, 0.0018809419, 0.0015525235,
79     0.0022562773, 0.0028832579, 0.0020899355, 0.0018425554,
80     0.0024610056, 0.0020899355, 0.0017188653, 0.0021112613,
81     0.0018638812, 0.0017231305, 0.0018254947, 0.0015951752,
82     0.0020814051, 0.0020174275, 0.0019193285, 0.0014032424,
83     0.0017572519, 0.0017913733, 0.0020003668, 0.0018510857,
84     0.0022264210, 0.0012923479, 0.0017529867, 0.0018468205,
85     0.0017359260, 0.0018596160, 0.0018084340, 0.0025463091,
86     0.0011430667, 0.0022221559, 0.0010407026, 0.0012411658,
87     0.0015354628, 0.0019235937, 0.0022178907, 0.0013819165,
88     0.0021837693, 0.0015823797, 0.0013008782, 0.0011814533,
89     0.0010492329, 0.0015695842, 0.0014160379, 0.0015823797,
90     0.0014928110, 0.0019107981, 0.0012369006, 0.0019619802,
91     0.0017913733, 0.0023799673, 0.0016037056, 0.0020174275,
92     0.0148854587, 0.0032841843, 0.0018126991, 0.0023159897,
93     0.0015056066, 0.0026955902, 0.0019747758, 0.0012624916,
94     0.0011558623, 0.0014672200, 0.0017572519, 0.0022520121,
95     0.0013136737, 0.0012752872, 0.0012411658, 0.0017743126,
96     0.0014458941, 0.0012241051, 0.0012752872, 0.0017615170,
97     0.0012113095, 0.0011515971, 0.0013776513, 0.0010748240,
98     0.0016250315, 0.0012283702, 0.0014117727, 0.0009596642,
99     0.0011430667, 0.0010705588, 0.0013264692, 0.0012923479,
100     0.0025889609, 0.0013733862, 0.0013136737, 0.0012752872,
101     0.0014970762, 0.0011899837, 0.0013691210, 0.0010023160,
102     0.0014416290, 0.0010876195, 0.0010662936, 0.0009340732,
103     0.0011814533, 0.0010577633, 0.0012710220, 0.0017316608,
104     0.0014586897, 0.0010449677, 0.0017359260, 0.0010279070,
105     0.0016292966, 0.0018297598, 0.0020259579, 0.0015311976,
106     0.0040775067, 0.0010790891, 0.0013861817, 0.0010108463,
107     0.0017103349, 0.0012496961, 0.0022903987, 0.0028619320
108 };
109 
110 struct HuffNode {
111     HuffNode *left, *right;
112     double freq;
113     dbyte value;              // Only valid for leaves.
114 };
115 
116 struct HuffQueue {
117     HuffNode *nodes[256];
118     int count;
119 };
120 
121 struct HuffCode {
122     duint code;
123     duint length;
124 };
125 
126 struct HuffBuffer {
127     dbyte *data;
128     dsize size;
129 };
130 
131 struct Huffman
132 {
133     // The root of the Huffman tree.
134     HuffNode *huffRoot;
135 
136     // The lookup table for encoding.
137     HuffCode huffCodes[256];
138 
139     /**
140      * Builds the Huffman tree and initializes the code lookup.
141      */
Huffmande::internal::Huffman142     Huffman() : huffRoot(0)
143     {
144         zap(huffCodes);
145 
146         HuffQueue queue;
147         HuffNode *node;
148         int i;
149 
150         // Initialize the priority queue that holds the remaining nodes.
151         queue.count = 0;
152         for (i = 0; i < 256; ++i)
153         {
154             // These are the leaves of the tree.
155             node = (HuffNode *) calloc(1, sizeof(HuffNode));
156             node->freq = freqs[i];
157             node->value = i;
158             Huff_QueueInsert(&queue, node);
159         }
160 
161         // Build the tree.
162         for (i = 0; i < 255; ++i)
163         {
164             node = (HuffNode *) calloc(1, sizeof(HuffNode));
165             node->left = Huff_QueueExtract(&queue);
166             node->right = Huff_QueueExtract(&queue);
167             node->freq = node->left->freq + node->right->freq;
168             Huff_QueueInsert(&queue, node);
169         }
170 
171         // The root is the last node left in the queue.
172         huffRoot = Huff_QueueExtract(&queue);
173 
174         // Fill in the code lookup table.
175         Huff_BuildLookup(huffRoot, 0, 0);
176 
177 #if 0
178         if (qApp->arguments().contains("-huffcodes"))
179         {
180             double realBits = 0;
181             double huffBits = 0;
182 
183             LOG_MSG("Huffman codes:");
184             for (i = 0; i < 256; ++i)
185             {
186                 uint k;
187                 realBits += freqs[i] * 8;
188                 huffBits += freqs[i] * huffCodes[i].length;
189                 LOG_MSG("%03i: (%07i) ", i, (int) (6e6 * freqs[i]));
190                 for (k = 0; k < huffCodes[i].length; ++k)
191                 {
192                     LogBuffer_Printf(0, "%c", huffCodes[i].code & (1 << k) ? '1' : '0');
193                 }
194                 LogBuffer_Printf(0, "\n");
195             }
196             LOG_MSG("realbits=%f, huffbits=%f (%f%%)")
197                     << realBits <<huffBits << huffBits / realBits * 100;
198         }
199 #endif
200     }
201 
202     /**
203      * Free all resources allocated for Huffman codes.
204      */
~Huffmande::internal::Huffman205     ~Huffman()
206     {
207         Huff_DestroyNode(huffRoot);
208         huffRoot = NULL;
209     }
210 
211     /**
212      * Exchange two nodes in the queue.
213      */
Huff_QueueExchangede::internal::Huffman214     void Huff_QueueExchange(HuffQueue *queue, int index1, int index2)
215     {
216         HuffNode *temp = queue->nodes[index1];
217         queue->nodes[index1] = queue->nodes[index2];
218         queue->nodes[index2] = temp;
219     }
220 
221     /**
222      * Insert a node into a priority queue.
223      */
Huff_QueueInsertde::internal::Huffman224     void Huff_QueueInsert(HuffQueue *queue, HuffNode *node)
225     {
226         int i, parent;
227 
228         // Add the new node to the end of the queue.
229         i = queue->count;
230         queue->nodes[i] = node;
231         ++queue->count;
232 
233         // Rise in the heap until the correct place is found.
234         while (i > 0)
235         {
236             parent = HEAP_PARENT(i);
237 
238             // Is it good now?
239             if (queue->nodes[parent]->freq <= node->freq)
240                 break;
241 
242             // Exchange with the parent.
243             Huff_QueueExchange(queue, parent, i);
244 
245             i = parent;
246         }
247     }
248 
249     /**
250      * Extract the smallest node from the queue.
251      */
Huff_QueueExtractde::internal::Huffman252     HuffNode *Huff_QueueExtract(HuffQueue *queue)
253     {
254         HuffNode *min;
255         int i, left, right, small;
256 
257         DENG2_ASSERT(queue->count > 0);
258 
259         // This is what we'll return.
260         min = queue->nodes[0];
261 
262         // Remove the first element from the queue.
263         queue->nodes[0] = queue->nodes[--queue->count];
264 
265         // Heapify the heap. This is O(log n).
266         i = 0;
267         for (;;)
268         {
269             left = HEAP_LEFT(i);
270             right = HEAP_RIGHT(i);
271             small = i;
272 
273             // Which child has smaller freq?
274             if (left < queue->count &&
275                queue->nodes[left]->freq < queue->nodes[i]->freq)
276             {
277                 small = left;
278             }
279             if (right < queue->count &&
280                queue->nodes[right]->freq < queue->nodes[small]->freq)
281             {
282                 small = right;
283             }
284 
285             // Can we stop now?
286             if (i == small)
287             {
288                 // Heapifying is complete.
289                 break;
290             }
291 
292             // Exchange and continue.
293             Huff_QueueExchange(queue, i, small);
294             i = small;
295         }
296 
297         return min;
298     }
299 
300     /**
301      * Recursively builds the Huffman code lookup for the node's subtree.
302      */
Huff_BuildLookupde::internal::Huffman303     void Huff_BuildLookup(HuffNode *node, uint code, uint length)
304     {
305         if (!node->left && !node->right)
306         {
307             // This is a leaf.
308             huffCodes[node->value].code = code;
309             huffCodes[node->value].length = length;
310             return;
311         }
312 
313         // Shouldn't run out of bits...
314         DENG2_ASSERT(length < 32);
315 
316         // Descend into the left and right subtrees.
317         if (node->left)
318         {
319             // This child's bit is zero.
320             Huff_BuildLookup(node->left, code, length + 1);
321         }
322         if (node->right)
323         {
324             // This child's bit is one.
325             Huff_BuildLookup(node->right, code | (1 << length), length + 1);
326         }
327     }
328 
329     /**
330      * Checks if the encoding/decoding buffer can hold the given number of
331      * bytes. If not, reallocates the buffer.
332      */
Huff_ResizeBufferde::internal::Huffman333     static void Huff_ResizeBuffer(HuffBuffer *buffer, dsize neededSize)
334     {
335         while (neededSize > buffer->size)
336         {
337             if (!buffer->size)
338                 buffer->size = qMax(dsize(1024), neededSize);
339             else
340                 buffer->size *= 2;
341         }
342         buffer->data = reinterpret_cast<dbyte *>(realloc(buffer->data, buffer->size));
343     }
344 
345     /**
346      * Recursively frees the node and its subtree.
347      */
Huff_DestroyNodede::internal::Huffman348     void Huff_DestroyNode(HuffNode *node)
349     {
350         if (node)
351         {
352             Huff_DestroyNode(node->left);
353             Huff_DestroyNode(node->right);
354             free(node);
355         }
356     }
357 
358     /**
359      * Free the buffer.
360      */
Huff_DestroyBufferde::internal::Huffman361     void Huff_DestroyBuffer(HuffBuffer *buffer)
362     {
363         free(buffer->data);
364         zapPtr(buffer);
365     }
366 
encodede::internal::Huffman367     dbyte *encode(dbyte const *data, dsize size, dsize *encodedSize) const
368     {
369         HuffBuffer huffEnc;
370         dsize i;
371         duint code;
372         int remaining, fits;
373         dbyte *out, bit;
374 
375         zap(huffEnc);
376 
377         // The encoded message is never twice the original size
378         // (longest codes are currently 11 bits).
379         Huff_ResizeBuffer(&huffEnc, 2 * size);
380 
381         // First three bits of the encoded data contain the number of bits (-1)
382         // in the last dbyte of the encoded data. It's written when we have
383         // finished the encoding.
384         bit = 3;
385 
386         // Clear the first dbyte of the result.
387         out = huffEnc.data;
388         *out = 0;
389 
390         for (i = 0; i < size; ++i)
391         {
392             remaining = huffCodes[data[i]].length;
393             code = huffCodes[data[i]].code;
394 
395             while (remaining > 0)
396             {
397                 fits = 8 - bit;
398                 if (fits > remaining)
399                     fits = remaining;
400 
401                 // Write the bits that fit the current dbyte.
402                 *out |= code << bit;
403                 code >>= fits;
404                 remaining -= fits;
405 
406                 // Advance the bit position.
407                 bit += fits;
408                 if (bit == 8)
409                 {
410                     bit = 0;
411                     *++out = 0;
412                 }
413             }
414         }
415 
416         // If the last dbyte is empty, back up.
417         if (bit == 0)
418         {
419             out--;
420             bit = 8;
421         }
422 
423         if (encodedSize)
424             *encodedSize = out - huffEnc.data + 1;
425 
426         // The number of valid bits - 1 in the last dbyte.
427         huffEnc.data[0] |= bit - 1;
428 
429         return huffEnc.data;
430     }
431 
decodede::internal::Huffman432     dbyte *decode(dbyte const *data, dsize size, dsize *decodedSize) const
433     {
434         HuffBuffer huffDec;
435         HuffNode *node;
436         dsize outBytes = 0;
437         dbyte const *in = data;
438         dbyte const *lastIn = in + size - 1;
439         dbyte bit = 3, lastByteBits;
440 
441         if (!data || size == 0) return nullptr;
442 
443         zap(huffDec);
444         Huff_ResizeBuffer(&huffDec, 256);
445 
446         // The first three bits contain the number of valid bits in
447         // the last dbyte.
448         lastByteBits = (*in & 7) + 1;
449 
450         // Start from the root node.
451         node = huffRoot;
452 
453         while (in < lastIn || bit < lastByteBits)
454         {
455             // Go left or right?
456             if (*in & (1 << bit))
457             {
458                 node = node->right;
459             }
460             else
461             {
462                 node = node->left;
463             }
464 
465             // Did we arrive at a leaf?
466             DENG2_ASSERT(node);
467             if (!node->left && !node->right)
468             {
469                 // This node represents a value.
470                 huffDec.data[outBytes++] = node->value;
471 
472                 // Should we allocate more memory?
473                 if (outBytes == huffDec.size)
474                 {
475                     Huff_ResizeBuffer(&huffDec, 2 * huffDec.size);
476                 }
477 
478                 // Back to the root.
479                 node = huffRoot;
480             }
481 
482             // Advance bit position.
483             if (++bit == 8)
484             {
485                 bit = 0;
486                 ++in;
487 
488                 // Out of buffer?
489                 if (in > lastIn)
490                     break;
491             }
492         }
493         if (decodedSize)
494         {
495             *decodedSize = outBytes;
496         }
497         return huffDec.data;
498     }
499 };
500 
501 } // namespace internal
502 
503 static internal::Huffman huff;
504 
huffmanEncode(Block const & data)505 Block codec::huffmanEncode(Block const &data)
506 {
507     Block result;
508     dsize size = 0;
509     dbyte *coded = huff.encode(data.data(), data.size(), &size);
510     if (coded)
511     {
512         result.copyFrom(ByteRefArray(coded, size), 0, size);
513         free(coded);
514     }
515     return result;
516 }
517 
huffmanDecode(Block const & codedData)518 Block codec::huffmanDecode(Block const &codedData)
519 {
520     Block result;
521     dsize size = 0;
522     dbyte *decoded = huff.decode(codedData.data(), codedData.size(), &size);
523     if (decoded)
524     {
525         result.copyFrom(ByteRefArray(decoded, size), 0, size);
526         free(decoded);
527     }
528     return result;
529 }
530 
531 } // namespace de
532