1 /**
2 * @file huffman.cpp
3 * Huffman codes.
4 *
5 * Uses predetermined, fixed frequencies optimized for short (size < 128)
6 * messages.
7 *
8 * @authors Copyright © 2003-2017 Jaakko Keränen <jaakko.keranen@iki.fi>
9 * @authors Copyright © 2006-2013 Daniel Swanson <danij@dengine.net>
10 *
11 * @par License
12 * LGPL: http://www.gnu.org/licenses/lgpl.html
13 *
14 * <small>This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published by
16 * the Free Software Foundation; either version 3 of the License, or (at your
17 * option) any later version. This program is distributed in the hope that it
18 * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
19 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
20 * General Public License for more details. You should have received a copy of
21 * the GNU Lesser General Public License along with this program; if not, see:
22 * http://www.gnu.org/licenses</small>
23 */
24
25 #include "de/data/huffman.h"
26 #include "de/App"
27 #include "de/Log"
28 #include "de/ByteRefArray"
29
30 // Heap relations.
31 #define HEAP_PARENT(i) (((i) + 1)/2 - 1)
32 #define HEAP_LEFT(i) (2*(i) + 1)
33 #define HEAP_RIGHT(i) (2*(i) + 2)
34
35 namespace de {
36
37 namespace internal {
38
39 /*
40 * Total number of bytes: 234457 (10217 packets)
41 * Frequencies calculated in Doom II, co-op (1p)
42 */
43 static double freqs[256] = {
44 0.3108032603, 0.0030495997, 0.0035443599, 0.0023202549,
45 0.0018638812, 0.0026188171, 0.0021752390, 0.0027083858,
46 0.0175810490, 0.0011302712, 0.0010748240, 0.0015013414,
47 0.0012241051, 0.0015951752, 0.0012923479, 0.0012795523,
48 0.0011004150, 0.0013477951, 0.0434066801, 0.0016506225,
49 0.0019790409, 0.0017146001, 0.0010108463, 0.0012113095,
50 0.0014629548, 0.0013605906, 0.0015482583, 0.0017103349,
51 0.0024055584, 0.0010151115, 0.0009980508, 0.0011558623,
52 0.0015354628, 0.0012496961, 0.0015141369, 0.0021283220,
53 0.0012241051, 0.0015311976, 0.0010534981, 0.0018510857,
54 0.0013989772, 0.0013563255, 0.0015226673, 0.0012283702,
55 0.0011302712, 0.0010790891, 0.0011601274, 0.0010236419,
56 0.0013008782, 0.0012283702, 0.0013648558, 0.0011132105,
57 0.0012624916, 0.0016165011, 0.0018596160, 0.0030240087,
58 0.0018084340, 0.0013989772, 0.0013179389, 0.0012369006,
59 0.0025932260, 0.0016719484, 0.0016463573, 0.0019406544,
60 0.0122026640, 0.0017401912, 0.0144632065, 0.0403186938,
61 0.0779332671, 0.0014970762, 0.0025207181, 0.0021027310,
62 0.0018681464, 0.0014629548, 0.0014586897, 0.0011985140,
63 0.0013563255, 0.0013094085, 0.0014928110, 0.0014586897,
64 0.0015098717, 0.0014586897, 0.0012070444, 0.0017401912,
65 0.0012454309, 0.0018126991, 0.0022264210, 0.0018297598,
66 0.0027297116, 0.0012496961, 0.0013222041, 0.0016165011,
67 0.0021453827, 0.0024695360, 0.0015994404, 0.0016676832,
68 0.0011814533, 0.0021539131, 0.0013904469, 0.0015269324,
69 0.0023586415, 0.0016420922, 0.0011558623, 0.0013819165,
70 0.0012241051, 0.0013904469, 0.0013136737, 0.0020771399,
71 0.0024865967, 0.0015482583, 0.0011899837, 0.0013136737,
72 0.0012624916, 0.0016250315, 0.0017828429, 0.0014970762,
73 0.0014629548, 0.0017529867, 0.0012411658, 0.0021411176,
74 0.0023671718, 0.0019961016, 0.0015951752, 0.0025974912,
75 0.0013051434, 0.0020728748, 0.0016079708, 0.0021283220,
76 0.0550079546, 0.0033694878, 0.0025889609, 0.0021624434,
77 0.0029728266, 0.0022946638, 0.0021283220, 0.0018510857,
78 0.0020216927, 0.0017700474, 0.0018809419, 0.0015525235,
79 0.0022562773, 0.0028832579, 0.0020899355, 0.0018425554,
80 0.0024610056, 0.0020899355, 0.0017188653, 0.0021112613,
81 0.0018638812, 0.0017231305, 0.0018254947, 0.0015951752,
82 0.0020814051, 0.0020174275, 0.0019193285, 0.0014032424,
83 0.0017572519, 0.0017913733, 0.0020003668, 0.0018510857,
84 0.0022264210, 0.0012923479, 0.0017529867, 0.0018468205,
85 0.0017359260, 0.0018596160, 0.0018084340, 0.0025463091,
86 0.0011430667, 0.0022221559, 0.0010407026, 0.0012411658,
87 0.0015354628, 0.0019235937, 0.0022178907, 0.0013819165,
88 0.0021837693, 0.0015823797, 0.0013008782, 0.0011814533,
89 0.0010492329, 0.0015695842, 0.0014160379, 0.0015823797,
90 0.0014928110, 0.0019107981, 0.0012369006, 0.0019619802,
91 0.0017913733, 0.0023799673, 0.0016037056, 0.0020174275,
92 0.0148854587, 0.0032841843, 0.0018126991, 0.0023159897,
93 0.0015056066, 0.0026955902, 0.0019747758, 0.0012624916,
94 0.0011558623, 0.0014672200, 0.0017572519, 0.0022520121,
95 0.0013136737, 0.0012752872, 0.0012411658, 0.0017743126,
96 0.0014458941, 0.0012241051, 0.0012752872, 0.0017615170,
97 0.0012113095, 0.0011515971, 0.0013776513, 0.0010748240,
98 0.0016250315, 0.0012283702, 0.0014117727, 0.0009596642,
99 0.0011430667, 0.0010705588, 0.0013264692, 0.0012923479,
100 0.0025889609, 0.0013733862, 0.0013136737, 0.0012752872,
101 0.0014970762, 0.0011899837, 0.0013691210, 0.0010023160,
102 0.0014416290, 0.0010876195, 0.0010662936, 0.0009340732,
103 0.0011814533, 0.0010577633, 0.0012710220, 0.0017316608,
104 0.0014586897, 0.0010449677, 0.0017359260, 0.0010279070,
105 0.0016292966, 0.0018297598, 0.0020259579, 0.0015311976,
106 0.0040775067, 0.0010790891, 0.0013861817, 0.0010108463,
107 0.0017103349, 0.0012496961, 0.0022903987, 0.0028619320
108 };
109
110 struct HuffNode {
111 HuffNode *left, *right;
112 double freq;
113 dbyte value; // Only valid for leaves.
114 };
115
116 struct HuffQueue {
117 HuffNode *nodes[256];
118 int count;
119 };
120
121 struct HuffCode {
122 duint code;
123 duint length;
124 };
125
126 struct HuffBuffer {
127 dbyte *data;
128 dsize size;
129 };
130
131 struct Huffman
132 {
133 // The root of the Huffman tree.
134 HuffNode *huffRoot;
135
136 // The lookup table for encoding.
137 HuffCode huffCodes[256];
138
139 /**
140 * Builds the Huffman tree and initializes the code lookup.
141 */
Huffmande::internal::Huffman142 Huffman() : huffRoot(0)
143 {
144 zap(huffCodes);
145
146 HuffQueue queue;
147 HuffNode *node;
148 int i;
149
150 // Initialize the priority queue that holds the remaining nodes.
151 queue.count = 0;
152 for (i = 0; i < 256; ++i)
153 {
154 // These are the leaves of the tree.
155 node = (HuffNode *) calloc(1, sizeof(HuffNode));
156 node->freq = freqs[i];
157 node->value = i;
158 Huff_QueueInsert(&queue, node);
159 }
160
161 // Build the tree.
162 for (i = 0; i < 255; ++i)
163 {
164 node = (HuffNode *) calloc(1, sizeof(HuffNode));
165 node->left = Huff_QueueExtract(&queue);
166 node->right = Huff_QueueExtract(&queue);
167 node->freq = node->left->freq + node->right->freq;
168 Huff_QueueInsert(&queue, node);
169 }
170
171 // The root is the last node left in the queue.
172 huffRoot = Huff_QueueExtract(&queue);
173
174 // Fill in the code lookup table.
175 Huff_BuildLookup(huffRoot, 0, 0);
176
177 #if 0
178 if (qApp->arguments().contains("-huffcodes"))
179 {
180 double realBits = 0;
181 double huffBits = 0;
182
183 LOG_MSG("Huffman codes:");
184 for (i = 0; i < 256; ++i)
185 {
186 uint k;
187 realBits += freqs[i] * 8;
188 huffBits += freqs[i] * huffCodes[i].length;
189 LOG_MSG("%03i: (%07i) ", i, (int) (6e6 * freqs[i]));
190 for (k = 0; k < huffCodes[i].length; ++k)
191 {
192 LogBuffer_Printf(0, "%c", huffCodes[i].code & (1 << k) ? '1' : '0');
193 }
194 LogBuffer_Printf(0, "\n");
195 }
196 LOG_MSG("realbits=%f, huffbits=%f (%f%%)")
197 << realBits <<huffBits << huffBits / realBits * 100;
198 }
199 #endif
200 }
201
202 /**
203 * Free all resources allocated for Huffman codes.
204 */
~Huffmande::internal::Huffman205 ~Huffman()
206 {
207 Huff_DestroyNode(huffRoot);
208 huffRoot = NULL;
209 }
210
211 /**
212 * Exchange two nodes in the queue.
213 */
Huff_QueueExchangede::internal::Huffman214 void Huff_QueueExchange(HuffQueue *queue, int index1, int index2)
215 {
216 HuffNode *temp = queue->nodes[index1];
217 queue->nodes[index1] = queue->nodes[index2];
218 queue->nodes[index2] = temp;
219 }
220
221 /**
222 * Insert a node into a priority queue.
223 */
Huff_QueueInsertde::internal::Huffman224 void Huff_QueueInsert(HuffQueue *queue, HuffNode *node)
225 {
226 int i, parent;
227
228 // Add the new node to the end of the queue.
229 i = queue->count;
230 queue->nodes[i] = node;
231 ++queue->count;
232
233 // Rise in the heap until the correct place is found.
234 while (i > 0)
235 {
236 parent = HEAP_PARENT(i);
237
238 // Is it good now?
239 if (queue->nodes[parent]->freq <= node->freq)
240 break;
241
242 // Exchange with the parent.
243 Huff_QueueExchange(queue, parent, i);
244
245 i = parent;
246 }
247 }
248
249 /**
250 * Extract the smallest node from the queue.
251 */
Huff_QueueExtractde::internal::Huffman252 HuffNode *Huff_QueueExtract(HuffQueue *queue)
253 {
254 HuffNode *min;
255 int i, left, right, small;
256
257 DENG2_ASSERT(queue->count > 0);
258
259 // This is what we'll return.
260 min = queue->nodes[0];
261
262 // Remove the first element from the queue.
263 queue->nodes[0] = queue->nodes[--queue->count];
264
265 // Heapify the heap. This is O(log n).
266 i = 0;
267 for (;;)
268 {
269 left = HEAP_LEFT(i);
270 right = HEAP_RIGHT(i);
271 small = i;
272
273 // Which child has smaller freq?
274 if (left < queue->count &&
275 queue->nodes[left]->freq < queue->nodes[i]->freq)
276 {
277 small = left;
278 }
279 if (right < queue->count &&
280 queue->nodes[right]->freq < queue->nodes[small]->freq)
281 {
282 small = right;
283 }
284
285 // Can we stop now?
286 if (i == small)
287 {
288 // Heapifying is complete.
289 break;
290 }
291
292 // Exchange and continue.
293 Huff_QueueExchange(queue, i, small);
294 i = small;
295 }
296
297 return min;
298 }
299
300 /**
301 * Recursively builds the Huffman code lookup for the node's subtree.
302 */
Huff_BuildLookupde::internal::Huffman303 void Huff_BuildLookup(HuffNode *node, uint code, uint length)
304 {
305 if (!node->left && !node->right)
306 {
307 // This is a leaf.
308 huffCodes[node->value].code = code;
309 huffCodes[node->value].length = length;
310 return;
311 }
312
313 // Shouldn't run out of bits...
314 DENG2_ASSERT(length < 32);
315
316 // Descend into the left and right subtrees.
317 if (node->left)
318 {
319 // This child's bit is zero.
320 Huff_BuildLookup(node->left, code, length + 1);
321 }
322 if (node->right)
323 {
324 // This child's bit is one.
325 Huff_BuildLookup(node->right, code | (1 << length), length + 1);
326 }
327 }
328
329 /**
330 * Checks if the encoding/decoding buffer can hold the given number of
331 * bytes. If not, reallocates the buffer.
332 */
Huff_ResizeBufferde::internal::Huffman333 static void Huff_ResizeBuffer(HuffBuffer *buffer, dsize neededSize)
334 {
335 while (neededSize > buffer->size)
336 {
337 if (!buffer->size)
338 buffer->size = qMax(dsize(1024), neededSize);
339 else
340 buffer->size *= 2;
341 }
342 buffer->data = reinterpret_cast<dbyte *>(realloc(buffer->data, buffer->size));
343 }
344
345 /**
346 * Recursively frees the node and its subtree.
347 */
Huff_DestroyNodede::internal::Huffman348 void Huff_DestroyNode(HuffNode *node)
349 {
350 if (node)
351 {
352 Huff_DestroyNode(node->left);
353 Huff_DestroyNode(node->right);
354 free(node);
355 }
356 }
357
358 /**
359 * Free the buffer.
360 */
Huff_DestroyBufferde::internal::Huffman361 void Huff_DestroyBuffer(HuffBuffer *buffer)
362 {
363 free(buffer->data);
364 zapPtr(buffer);
365 }
366
encodede::internal::Huffman367 dbyte *encode(dbyte const *data, dsize size, dsize *encodedSize) const
368 {
369 HuffBuffer huffEnc;
370 dsize i;
371 duint code;
372 int remaining, fits;
373 dbyte *out, bit;
374
375 zap(huffEnc);
376
377 // The encoded message is never twice the original size
378 // (longest codes are currently 11 bits).
379 Huff_ResizeBuffer(&huffEnc, 2 * size);
380
381 // First three bits of the encoded data contain the number of bits (-1)
382 // in the last dbyte of the encoded data. It's written when we have
383 // finished the encoding.
384 bit = 3;
385
386 // Clear the first dbyte of the result.
387 out = huffEnc.data;
388 *out = 0;
389
390 for (i = 0; i < size; ++i)
391 {
392 remaining = huffCodes[data[i]].length;
393 code = huffCodes[data[i]].code;
394
395 while (remaining > 0)
396 {
397 fits = 8 - bit;
398 if (fits > remaining)
399 fits = remaining;
400
401 // Write the bits that fit the current dbyte.
402 *out |= code << bit;
403 code >>= fits;
404 remaining -= fits;
405
406 // Advance the bit position.
407 bit += fits;
408 if (bit == 8)
409 {
410 bit = 0;
411 *++out = 0;
412 }
413 }
414 }
415
416 // If the last dbyte is empty, back up.
417 if (bit == 0)
418 {
419 out--;
420 bit = 8;
421 }
422
423 if (encodedSize)
424 *encodedSize = out - huffEnc.data + 1;
425
426 // The number of valid bits - 1 in the last dbyte.
427 huffEnc.data[0] |= bit - 1;
428
429 return huffEnc.data;
430 }
431
decodede::internal::Huffman432 dbyte *decode(dbyte const *data, dsize size, dsize *decodedSize) const
433 {
434 HuffBuffer huffDec;
435 HuffNode *node;
436 dsize outBytes = 0;
437 dbyte const *in = data;
438 dbyte const *lastIn = in + size - 1;
439 dbyte bit = 3, lastByteBits;
440
441 if (!data || size == 0) return nullptr;
442
443 zap(huffDec);
444 Huff_ResizeBuffer(&huffDec, 256);
445
446 // The first three bits contain the number of valid bits in
447 // the last dbyte.
448 lastByteBits = (*in & 7) + 1;
449
450 // Start from the root node.
451 node = huffRoot;
452
453 while (in < lastIn || bit < lastByteBits)
454 {
455 // Go left or right?
456 if (*in & (1 << bit))
457 {
458 node = node->right;
459 }
460 else
461 {
462 node = node->left;
463 }
464
465 // Did we arrive at a leaf?
466 DENG2_ASSERT(node);
467 if (!node->left && !node->right)
468 {
469 // This node represents a value.
470 huffDec.data[outBytes++] = node->value;
471
472 // Should we allocate more memory?
473 if (outBytes == huffDec.size)
474 {
475 Huff_ResizeBuffer(&huffDec, 2 * huffDec.size);
476 }
477
478 // Back to the root.
479 node = huffRoot;
480 }
481
482 // Advance bit position.
483 if (++bit == 8)
484 {
485 bit = 0;
486 ++in;
487
488 // Out of buffer?
489 if (in > lastIn)
490 break;
491 }
492 }
493 if (decodedSize)
494 {
495 *decodedSize = outBytes;
496 }
497 return huffDec.data;
498 }
499 };
500
501 } // namespace internal
502
503 static internal::Huffman huff;
504
huffmanEncode(Block const & data)505 Block codec::huffmanEncode(Block const &data)
506 {
507 Block result;
508 dsize size = 0;
509 dbyte *coded = huff.encode(data.data(), data.size(), &size);
510 if (coded)
511 {
512 result.copyFrom(ByteRefArray(coded, size), 0, size);
513 free(coded);
514 }
515 return result;
516 }
517
huffmanDecode(Block const & codedData)518 Block codec::huffmanDecode(Block const &codedData)
519 {
520 Block result;
521 dsize size = 0;
522 dbyte *decoded = huff.decode(codedData.data(), codedData.size(), &size);
523 if (decoded)
524 {
525 result.copyFrom(ByteRefArray(decoded, size), 0, size);
526 free(decoded);
527 }
528 return result;
529 }
530
531 } // namespace de
532