1 //
2 // SPDX-License-Identifier: BSD-3-Clause
3 // Copyright (c) DreamWorks Animation LLC and Contributors of the OpenEXR Project
4 //
5
6 #include "ImfFastHuf.h"
7 #include <Iex.h>
8
9 #include <string.h>
10 #include <assert.h>
11 #include <math.h>
12 #include <vector>
13
14 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
15
16 //
17 // Adapted from hufUnpackEncTable -
18 // We don't need to reconstruct the code book, just the encoded
19 // lengths for each symbol. From the lengths, we can build the
20 // base + offset tables. This should be a bit more efficient
21 // for sparse code books.
22 //
23 // table - ptr to the start of the code length data. Will be
24 // updated as we decode data
25 //
26 // numBytes - size of the encoded table (I think)?
27 //
28 // minSymbol - smallest symbol in the code book
29 //
30 // maxSymbol - largest symbol in the code book.
31 //
32 // rleSymbol - the symbol to trigger RLE in the encoded bitstream
33 //
34
FastHufDecoder(const char * & table,int numBytes,int minSymbol,int maxSymbol,int rleSymbol)35 FastHufDecoder::FastHufDecoder
36 (const char *&table,
37 int numBytes,
38 int minSymbol,
39 int maxSymbol,
40 int rleSymbol)
41 :
42 _rleSymbol (rleSymbol),
43 _numSymbols (0),
44 _minCodeLength (255),
45 _maxCodeLength (0),
46 _idToSymbol (0)
47 {
48 //
49 // List of symbols that we find with non-zero code lengths
50 // (listed in the order we find them). Store these in the
51 // same format as the code book stores codes + lengths -
52 // low 6 bits are the length, everything above that is
53 // the symbol.
54 //
55
56 std::vector<uint64_t> symbols;
57
58 //
59 // The 'base' table is the minimum code at each code length. base[i]
60 // is the smallest code (numerically) of length i.
61 //
62
63 uint64_t base[MAX_CODE_LEN + 1];
64
65 //
66 // The 'offset' table is the position (in sorted order) of the first id
67 // of a given code lenght. Array is indexed by code length, like base.
68 //
69
70 uint64_t offset[MAX_CODE_LEN + 1];
71
72 //
73 // Count of how many codes at each length there are. Array is
74 // indexed by code length, like base and offset.
75 //
76
77 size_t codeCount[MAX_CODE_LEN + 1];
78
79 for (int i = 0; i <= MAX_CODE_LEN; ++i)
80 {
81 codeCount[i] = 0;
82 base[i] = 0xffffffffffffffffULL;
83 offset[i] = 0;
84 }
85
86 //
87 // Count the number of codes, the min/max code lengths, the number of
88 // codes with each length, and record symbols with non-zero code
89 // length as we find them.
90 //
91
92 const char *currByte = table;
93 uint64_t currBits = 0;
94 int currBitCount = 0;
95
96 const int SHORT_ZEROCODE_RUN = 59;
97 const int LONG_ZEROCODE_RUN = 63;
98 const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
99
100 for (uint64_t symbol = static_cast<uint64_t>(minSymbol); symbol <= static_cast<uint64_t>(maxSymbol); symbol++)
101 {
102 if (currByte - table >= numBytes)
103 {
104 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
105 "(Truncated table data).");
106 }
107
108 //
109 // Next code length - either:
110 // 0-58 (literal code length)
111 // 59-62 (various lengths runs of 0)
112 // 63 (run of n 0's, with n is the next 8 bits)
113 //
114
115 uint64_t codeLen = readBits (6, currBits, currBitCount, currByte);
116
117 if (codeLen == (uint64_t) LONG_ZEROCODE_RUN)
118 {
119 if (currByte - table >= numBytes)
120 {
121 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
122 "(Truncated table data).");
123 }
124
125 int runLen = readBits (8, currBits, currBitCount, currByte) +
126 SHORTEST_LONG_RUN;
127
128 if (symbol + runLen > static_cast<uint64_t>(maxSymbol + 1))
129 {
130 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
131 "(Run beyond end of table).");
132 }
133
134 symbol += runLen - 1;
135
136 }
137 else if (codeLen >= static_cast<uint64_t>(SHORT_ZEROCODE_RUN))
138 {
139 int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;
140
141 if (symbol + runLen > static_cast<uint64_t>(maxSymbol + 1))
142 {
143 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
144 "(Run beyond end of table).");
145 }
146
147 symbol += runLen - 1;
148
149 }
150 else if (codeLen != 0)
151 {
152 symbols.push_back ((symbol << 6) | (codeLen & 63));
153
154 if (codeLen < _minCodeLength)
155 _minCodeLength = codeLen;
156
157 if (codeLen > _maxCodeLength)
158 _maxCodeLength = codeLen;
159
160 codeCount[codeLen]++;
161 }
162 }
163
164 for (int i = 0; i < MAX_CODE_LEN; ++i)
165 _numSymbols += codeCount[i];
166
167 table = currByte;
168
169 //
170 // Compute base - once we have the code length counts, there
171 // is a closed form solution for this
172 //
173
174 {
175 double* countTmp = new double[_maxCodeLength+1];
176
177 for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
178 {
179 countTmp[l] = (double)codeCount[l] *
180 (double)(2ll << (_maxCodeLength-l));
181 }
182
183 for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
184 {
185 double tmp = 0;
186
187 for (int k =l + 1; k <= _maxCodeLength; ++k)
188 tmp += countTmp[k];
189
190 tmp /= (double)(2ll << (_maxCodeLength - l));
191
192 base[l] = (uint64_t)ceil (tmp);
193 }
194
195 delete [] countTmp;
196 }
197
198 //
199 // Compute offset - these are the positions of the first
200 // id (not symbol) that has length [i]
201 //
202
203 offset[_maxCodeLength] = 0;
204
205 for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
206 offset[i] = offset[i + 1] + codeCount[i + 1];
207
208 //
209 // Allocate and fill the symbol-to-id mapping. Smaller Ids should be
210 // mapped to less-frequent symbols (which have longer codes). Use
211 // the offset table to tell us where the id's for a given code
212 // length start off.
213 //
214
215 _idToSymbol = new int[_numSymbols];
216
217 uint64_t mapping[MAX_CODE_LEN + 1];
218 for (int i = 0; i < MAX_CODE_LEN + 1; ++i)
219 mapping[i] = -1;
220 for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
221 mapping[i] = offset[i];
222
223 for (std::vector<uint64_t>::const_iterator i = symbols.begin();
224 i != symbols.end();
225 ++i)
226 {
227 int codeLen = *i & 63;
228 int symbol = *i >> 6;
229
230 if (mapping[codeLen] >= static_cast<uint64_t>(_numSymbols))
231 {
232 delete[] _idToSymbol;
233 _idToSymbol = NULL;
234 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
235 "(Invalid symbol in header).");
236 }
237 _idToSymbol[mapping[codeLen]] = symbol;
238 mapping[codeLen]++;
239 }
240
241 //
242 // exceptions can be thrown whilst building tables. Delete
243 // _idToSynmbol before re-throwing to prevent memory leak
244 //
245 try
246 {
247 buildTables(base, offset);
248 }catch(...)
249 {
250 delete[] _idToSymbol;
251 _idToSymbol = NULL;
252 throw;
253 }
254 }
255
256
~FastHufDecoder()257 FastHufDecoder::~FastHufDecoder()
258 {
259 delete[] _idToSymbol;
260 }
261
262
263 //
264 // Static check if the decoder is enabled.
265 //
266 // ATM, I only have access to little endian hardware for testing,
267 // so I'm not entirely sure that we are reading fom the bit stream
268 // properly on BE.
269 //
270 // If you happen to have more obscure hardware, check that the
271 // byte swapping in refill() is happening sensable, add an endian
272 // check if needed, and fix the preprocessor magic here.
273 //
274
275 #define READ64(c) \
276 ((uint64_t)(c)[0] << 56) | ((uint64_t)(c)[1] << 48) | ((uint64_t)(c)[2] << 40) | \
277 ((uint64_t)(c)[3] << 32) | ((uint64_t)(c)[4] << 24) | ((uint64_t)(c)[5] << 16) | \
278 ((uint64_t)(c)[6] << 8) | ((uint64_t)(c)[7] )
279
280 #ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
281 #if defined (__i386__) || defined(__x86_64__)
282 #undef READ64
283 #define READ64(c) _bswap64 (*(const uint64_t*)(c))
284 #endif
285 #endif
286
287
288 bool
enabled()289 FastHufDecoder::enabled()
290 {
291 #if defined(__INTEL_COMPILER) || defined(__GNUC__)
292
293 //
294 // Enabled for ICC, GCC:
295 // __i386__ -> x86
296 // __x86_64__ -> 64-bit x86
297 // __e2k__ -> e2k (MCST Elbrus 2000)
298
299 #if defined (__i386__) || defined(__x86_64__) || defined(__e2k__)
300 return true;
301 #else
302 return false;
303 #endif
304
305 #elif defined (_MSC_VER)
306
307 //
308 // Enabled for Visual Studio:
309 // _M_IX86 -> x86
310 // _M_X64 -> 64bit x86
311
312 #if defined (_M_IX86) || defined(_M_X64)
313 return true;
314 #else
315 return false;
316 #endif
317
318 #else
319
320 //
321 // Unknown compiler - Be safe and disable.
322 //
323 return false;
324 #endif
325 }
326
327 //
328 //
329 // Built the acceleration tables for lookups on the upper bits
330 // as well as the 'LJ' tables.
331 //
332
333 void
buildTables(uint64_t * base,uint64_t * offset)334 FastHufDecoder::buildTables (uint64_t *base, uint64_t *offset)
335 {
336 //
337 // Build the 'left justified' base table, by shifting base left..
338 //
339
340 for (int i = 0; i <= MAX_CODE_LEN; ++i)
341 {
342 if (base[i] != 0xffffffffffffffffULL)
343 {
344 _ljBase[i] = base[i] << (64 - i);
345 }
346 else
347 {
348 //
349 // Unused code length - insert dummy values
350 //
351
352 _ljBase[i] = 0xffffffffffffffffULL;
353 }
354 }
355
356 //
357 // Combine some terms into a big fat constant, which for
358 // lack of a better term we'll call the 'left justified'
359 // offset table (because it serves the same function
360 // as 'offset', when using the left justified base table.
361 //
362
363 _ljOffset[0] = offset[0] - _ljBase[0];
364 for (int i = 1; i <= MAX_CODE_LEN; ++i)
365 _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));
366
367 //
368 // Build the acceleration tables for the lookups of
369 // short codes ( <= TABLE_LOOKUP_BITS long)
370 //
371
372 for (uint64_t i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
373 {
374 uint64_t value = i << (64 - TABLE_LOOKUP_BITS);
375
376 _tableSymbol[i] = 0xffff;
377 _tableCodeLen[i] = 0;
378
379 for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
380 {
381 if (_ljBase[codeLen] <= value)
382 {
383 _tableCodeLen[i] = codeLen;
384
385 uint64_t id = _ljOffset[codeLen] + (value >> (64 - codeLen));
386 if (id < static_cast<uint64_t>(_numSymbols))
387 {
388 _tableSymbol[i] = _idToSymbol[id];
389 }
390 else
391 {
392 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
393 "(Overrun).");
394 }
395 break;
396 }
397 }
398 }
399
400 //
401 // Store the smallest value in the table that points to real data.
402 // This should be the entry for the largest length that has
403 // valid data (in our case, non-dummy _ljBase)
404 //
405
406 int minIdx = TABLE_LOOKUP_BITS;
407
408 while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL)
409 minIdx--;
410
411 if (minIdx < 0)
412 {
413 //
414 // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
415 // Set the min value such that the table is never tested.
416 //
417
418 _tableMin = 0xffffffffffffffffULL;
419 }
420 else
421 {
422 _tableMin = _ljBase[minIdx];
423 }
424 }
425
426
427 //
428 // For decoding, we're holding onto 2 uint64_t's.
429 //
430 // The first (buffer), holds the next bits from the bitstream to be
431 // decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
432 // valid bits to decode the next symbol. For other paths, we need a full
433 // 64-bits to decode a symbol.
434 //
435 // When we need to refill 'buffer', we could pull bits straight from
436 // the bitstream. But this is very slow and requires lots of book keeping
437 // (what's the next bit in the next byte?). Instead, we keep another uint64_t
438 // around that we use to refill from. While this doesn't cut down on the
439 // book keeping (still need to know how many valid bits), it does cut
440 // down on some of the bit shifting crazy and byte access.
441 //
442 // The refill uint64_t (bufferBack) gets left-shifted after we've pulled
443 // off bits. If we run out of bits in the input bit stream, we just
444 // shift in 0's to bufferBack.
445 //
446 // The refill act takes numBits from the top of bufferBack and sticks
447 // them in the bottom of buffer. If there arn't enough bits in bufferBack,
448 // it gets refilled (to 64-bits) from the input bitstream.
449 //
450
451 inline void
refill(uint64_t & buffer,int numBits,uint64_t & bufferBack,int & bufferBackNumBits,const unsigned char * & currByte,int & currBitsLeft)452 FastHufDecoder::refill
453 (uint64_t &buffer,
454 int numBits, // number of bits to refill
455 uint64_t &bufferBack, // the next 64-bits, to refill from
456 int &bufferBackNumBits, // number of bits left in bufferBack
457 const unsigned char *&currByte, // current byte in the bitstream
458 int &currBitsLeft) // number of bits left in the bitsream
459 {
460 //
461 // Refill bits into the bottom of buffer, from the top of bufferBack.
462 // Always top up buffer to be completely full.
463 //
464
465 buffer |= bufferBack >> (64 - numBits);
466
467 if (bufferBackNumBits < numBits)
468 {
469 numBits -= bufferBackNumBits;
470
471 //
472 // Refill all of bufferBack from the bitstream. Either grab
473 // a full 64-bit chunk, or whatever bytes are left. If we
474 // don't have 64-bits left, pad with 0's.
475 //
476
477 if (currBitsLeft >= 64)
478 {
479 bufferBack = READ64 (currByte);
480 bufferBackNumBits = 64;
481 currByte += sizeof (uint64_t);
482 currBitsLeft -= 8 * sizeof (uint64_t);
483
484 }
485 else
486 {
487 bufferBack = 0;
488 bufferBackNumBits = 64;
489
490 uint64_t shift = 56;
491
492 while (currBitsLeft > 0)
493 {
494 bufferBack |= ((uint64_t)(*currByte)) << shift;
495
496 currByte++;
497 shift -= 8;
498 currBitsLeft -= 8;
499 }
500
501 //
502 // At this point, currBitsLeft might be negative, just because
503 // we're subtracting whole bytes. To keep anyone from freaking
504 // out, zero the counter.
505 //
506
507 if (currBitsLeft < 0)
508 currBitsLeft = 0;
509 }
510
511 buffer |= bufferBack >> (64 - numBits);
512 }
513
514
515 //
516 // We can have cases where the previous shift of bufferBack is << 64 -
517 // this is an undefined operation but tends to create just zeroes.
518 // so if we won't have any bits left, zero out bufferBack insetad of computing the shift
519 //
520
521 if (bufferBackNumBits <= numBits)
522 {
523 bufferBack = 0;
524 }else
525 {
526 bufferBack = bufferBack << numBits;
527 }
528 bufferBackNumBits -= numBits;
529
530
531 }
532
533 //
534 // Read the next few bits out of a bitstream. Will be given a backing buffer
535 // (buffer) that may still have data left over from previous reads
536 // (bufferNumBits). Bitstream pointer (currByte) will be advanced when needed.
537 //
538
539 inline uint64_t
readBits(int numBits,uint64_t & buffer,int & bufferNumBits,const char * & currByte)540 FastHufDecoder::readBits
541 (int numBits,
542 uint64_t &buffer, // c
543 int &bufferNumBits, // lc
544 const char *&currByte) // in
545 {
546 while (bufferNumBits < numBits)
547 {
548 buffer = (buffer << 8) | *(unsigned char*)(currByte++);
549 bufferNumBits += 8;
550 }
551
552 bufferNumBits -= numBits;
553 return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
554 }
555
556
557 //
558 // Decode using a the 'One-Shift' strategy for decoding, with a
559 // small-ish table to accelerate decoding of short codes.
560 //
561 // If possible, try looking up codes into the acceleration table.
562 // This has a few benifits - there's no search involved; We don't
563 // need an additional lookup to map id to symbol; we don't need
564 // a full 64-bits (so less refilling).
565 //
566
567 void
decode(const unsigned char * src,int numSrcBits,unsigned short * dst,int numDstElems)568 FastHufDecoder::decode
569 (const unsigned char *src,
570 int numSrcBits,
571 unsigned short *dst,
572 int numDstElems)
573 {
574 if (numSrcBits < 128)
575 throw IEX_NAMESPACE::InputExc ("Error choosing Huffman decoder implementation "
576 "(insufficient number of bits).");
577
578 //
579 // Current position (byte/bit) in the src data stream
580 // (after the first buffer fill)
581 //
582
583 const unsigned char *currByte = src + 2 * sizeof (uint64_t);
584
585 numSrcBits -= 8 * 2 * sizeof (uint64_t);
586
587 //
588 // 64-bit buffer holding the current bits in the stream
589 //
590
591 uint64_t buffer = READ64 (src);
592 int bufferNumBits = 64;
593
594 //
595 // 64-bit buffer holding the next bits in the stream
596 //
597
598 uint64_t bufferBack = READ64 ((src + sizeof (uint64_t)));
599 int bufferBackNumBits = 64;
600
601 int dstIdx = 0;
602
603 while (dstIdx < numDstElems)
604 {
605 int codeLen;
606 int symbol;
607
608 //
609 // Test if we can be table accelerated. If so, directly
610 // lookup the output symbol. Otherwise, we need to fall
611 // back to searching for the code.
612 //
613 // If we're doing table lookups, we don't really need
614 // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
615 // left. But for a search, we do need a refilled table.
616 //
617
618 if (_tableMin <= buffer)
619 {
620 int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);
621
622 //
623 // For invalid codes, _tableCodeLen[] should return 0. This
624 // will cause the decoder to get stuck in the current spot
625 // until we run out of elements, then barf that the codestream
626 // is bad. So we don't need to stick a condition like
627 // if (codeLen > _maxCodeLength) in this inner.
628 //
629
630 codeLen = _tableCodeLen[tableIdx];
631 symbol = _tableSymbol[tableIdx];
632 }
633 else
634 {
635 if (bufferNumBits < 64)
636 {
637 refill (buffer,
638 64 - bufferNumBits,
639 bufferBack,
640 bufferBackNumBits,
641 currByte,
642 numSrcBits);
643
644 bufferNumBits = 64;
645 }
646
647 //
648 // Brute force search:
649 // Find the smallest length where _ljBase[length] <= buffer
650 //
651
652 codeLen = TABLE_LOOKUP_BITS + 1;
653
654 while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
655 codeLen++;
656
657 if (codeLen > _maxCodeLength)
658 {
659 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
660 "(Decoded an invalid symbol).");
661 }
662
663 uint64_t id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
664 if (id < static_cast<uint64_t>(_numSymbols))
665 {
666 symbol = _idToSymbol[id];
667 }
668 else
669 {
670 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
671 "(Decoded an invalid symbol).");
672 }
673 }
674
675 //
676 // Shift over bit stream, and update the bit count in the buffer
677 //
678
679 buffer = buffer << codeLen;
680 bufferNumBits -= codeLen;
681
682 //
683 // If we recieved a RLE symbol (_rleSymbol), then we need
684 // to read ahead 8 bits to know how many times to repeat
685 // the previous symbol. Need to ensure we at least have
686 // 8 bits of data in the buffer
687 //
688
689 if (symbol == _rleSymbol)
690 {
691 if (bufferNumBits < 8)
692 {
693 refill (buffer,
694 64 - bufferNumBits,
695 bufferBack,
696 bufferBackNumBits,
697 currByte,
698 numSrcBits);
699
700 bufferNumBits = 64;
701 }
702
703 int rleCount = buffer >> 56;
704
705 if (dstIdx < 1)
706 {
707 throw IEX_NAMESPACE::InputExc ("Huffman decode error (RLE code "
708 "with no previous symbol).");
709 }
710
711 if (dstIdx + rleCount > numDstElems)
712 {
713 throw IEX_NAMESPACE::InputExc ("Huffman decode error (Symbol run "
714 "beyond expected output buffer length).");
715 }
716
717 if (rleCount <= 0)
718 {
719 throw IEX_NAMESPACE::InputExc("Huffman decode error"
720 " (Invalid RLE length)");
721 }
722
723 for (int i = 0; i < rleCount; ++i)
724 dst[dstIdx + i] = dst[dstIdx - 1];
725
726 dstIdx += rleCount;
727
728 buffer = buffer << 8;
729 bufferNumBits -= 8;
730 }
731 else
732 {
733 dst[dstIdx] = symbol;
734 dstIdx++;
735 }
736
737 //
738 // refill bit stream buffer if we're below the number of
739 // bits needed for a table lookup
740 //
741
742 if (bufferNumBits < TABLE_LOOKUP_BITS)
743 {
744 refill (buffer,
745 64 - bufferNumBits,
746 bufferBack,
747 bufferBackNumBits,
748 currByte,
749 numSrcBits);
750
751 bufferNumBits = 64;
752 }
753 }
754
755 if (numSrcBits != 0)
756 {
757 throw IEX_NAMESPACE::InputExc ("Huffman decode error (Compressed data remains "
758 "after filling expected output buffer).");
759 }
760 }
761
762 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
763