1 //
2 // SPDX-License-Identifier: BSD-3-Clause
3 // Copyright (c) DreamWorks Animation LLC and Contributors of the OpenEXR Project
4 //
5 
6 #include "ImfFastHuf.h"
7 #include <Iex.h>
8 
9 #include <string.h>
10 #include <assert.h>
11 #include <math.h>
12 #include <vector>
13 
14 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
15 
16 //
17 // Adapted from hufUnpackEncTable -
18 // We don't need to reconstruct the code book, just the encoded
19 // lengths for each symbol. From the lengths, we can build the
20 // base + offset tables. This should be a bit more efficient
21 // for sparse code books.
22 //
23 //   table     - ptr to the start of the code length data. Will be
24 //               updated as we decode data
25 //
26 //   numBytes  - size of the encoded table (I think)?
27 //
28 //   minSymbol - smallest symbol in the code book
29 //
30 //   maxSymbol - largest symbol in the code book.
31 //
32 //   rleSymbol - the symbol to trigger RLE in the encoded bitstream
33 //
34 
FastHufDecoder(const char * & table,int numBytes,int minSymbol,int maxSymbol,int rleSymbol)35 FastHufDecoder::FastHufDecoder
36     (const char *&table,
37      int numBytes,
38      int minSymbol,
39      int maxSymbol,
40      int rleSymbol)
41 :
42     _rleSymbol (rleSymbol),
43     _numSymbols (0),
44     _minCodeLength (255),
45     _maxCodeLength (0),
46     _idToSymbol (0)
47 {
48     //
49     // List of symbols that we find with non-zero code lengths
50     // (listed in the order we find them). Store these in the
51     // same format as the code book stores codes + lengths -
52     // low 6 bits are the length, everything above that is
53     // the symbol.
54     //
55 
56     std::vector<uint64_t> symbols;
57 
58     //
59     // The 'base' table is the minimum code at each code length. base[i]
60     // is the smallest code (numerically) of length i.
61     //
62 
63     uint64_t base[MAX_CODE_LEN + 1];
64 
65     //
66     // The 'offset' table is the position (in sorted order) of the first id
67     // of a given code lenght. Array is indexed by code length, like base.
68     //
69 
70     uint64_t offset[MAX_CODE_LEN + 1];
71 
72     //
73     // Count of how many codes at each length there are. Array is
74     // indexed by code length, like base and offset.
75     //
76 
77     size_t codeCount[MAX_CODE_LEN + 1];
78 
79     for (int i = 0; i <= MAX_CODE_LEN; ++i)
80     {
81         codeCount[i] = 0;
82         base[i]      = 0xffffffffffffffffULL;
83         offset[i]    = 0;
84     }
85 
86     //
87     // Count the number of codes, the min/max code lengths, the number of
88     // codes with each length, and record symbols with non-zero code
89     // length as we find them.
90     //
91 
92     const char *currByte     = table;
93     uint64_t    currBits     = 0;
94     int         currBitCount = 0;
95 
96     const int SHORT_ZEROCODE_RUN = 59;
97     const int LONG_ZEROCODE_RUN  = 63;
98     const int SHORTEST_LONG_RUN  = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
99 
100     for (uint64_t symbol = static_cast<uint64_t>(minSymbol); symbol <= static_cast<uint64_t>(maxSymbol); symbol++)
101     {
102         if (currByte - table >= numBytes)
103         {
104             throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
105                                            "(Truncated table data).");
106         }
107 
108         //
109         // Next code length - either:
110         //       0-58  (literal code length)
111         //       59-62 (various lengths runs of 0)
112         //       63    (run of n 0's, with n is the next 8 bits)
113         //
114 
115         uint64_t codeLen = readBits (6, currBits, currBitCount, currByte);
116 
117         if (codeLen == (uint64_t) LONG_ZEROCODE_RUN)
118         {
119             if (currByte - table >= numBytes)
120             {
121                 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
122                                                "(Truncated table data).");
123             }
124 
125             int runLen = readBits (8, currBits, currBitCount, currByte) +
126                          SHORTEST_LONG_RUN;
127 
128             if (symbol + runLen > static_cast<uint64_t>(maxSymbol + 1))
129             {
130                 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
131                                                "(Run beyond end of table).");
132             }
133 
134             symbol += runLen - 1;
135 
136         }
137         else if (codeLen >= static_cast<uint64_t>(SHORT_ZEROCODE_RUN))
138         {
139             int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;
140 
141             if (symbol + runLen > static_cast<uint64_t>(maxSymbol + 1))
142             {
143                 throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
144                                                "(Run beyond end of table).");
145             }
146 
147             symbol += runLen - 1;
148 
149         }
150         else if (codeLen != 0)
151         {
152             symbols.push_back ((symbol << 6) | (codeLen & 63));
153 
154             if (codeLen < _minCodeLength)
155                 _minCodeLength = codeLen;
156 
157             if (codeLen > _maxCodeLength)
158                 _maxCodeLength = codeLen;
159 
160             codeCount[codeLen]++;
161         }
162     }
163 
164     for (int i = 0; i < MAX_CODE_LEN; ++i)
165         _numSymbols += codeCount[i];
166 
167     table = currByte;
168 
169     //
170     // Compute base - once we have the code length counts, there
171     //                is a closed form solution for this
172     //
173 
174     {
175         double* countTmp = new double[_maxCodeLength+1];
176 
177         for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
178         {
179             countTmp[l] = (double)codeCount[l] *
180                           (double)(2ll << (_maxCodeLength-l));
181         }
182 
183         for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
184         {
185             double tmp = 0;
186 
187             for (int k =l + 1; k <= _maxCodeLength; ++k)
188                 tmp += countTmp[k];
189 
190             tmp /= (double)(2ll << (_maxCodeLength - l));
191 
192             base[l] = (uint64_t)ceil (tmp);
193         }
194 
195         delete [] countTmp;
196     }
197 
198     //
199     // Compute offset - these are the positions of the first
200     //                  id (not symbol) that has length [i]
201     //
202 
203     offset[_maxCodeLength] = 0;
204 
205     for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
206         offset[i] = offset[i + 1] + codeCount[i + 1];
207 
208     //
209     // Allocate and fill the symbol-to-id mapping. Smaller Ids should be
210     // mapped to less-frequent symbols (which have longer codes). Use
211     // the offset table to tell us where the id's for a given code
212     // length start off.
213     //
214 
215     _idToSymbol = new int[_numSymbols];
216 
217     uint64_t mapping[MAX_CODE_LEN + 1];
218     for (int i = 0; i < MAX_CODE_LEN + 1; ++i)
219         mapping[i] = -1;
220     for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
221         mapping[i] = offset[i];
222 
223     for (std::vector<uint64_t>::const_iterator i = symbols.begin();
224          i != symbols.end();
225          ++i)
226     {
227         int codeLen = *i & 63;
228         int symbol  = *i >> 6;
229 
230         if (mapping[codeLen] >= static_cast<uint64_t>(_numSymbols))
231         {
232             delete[] _idToSymbol;
233             _idToSymbol = NULL;
234             throw IEX_NAMESPACE::InputExc ("Huffman decode error "
235                                            "(Invalid symbol in header).");
236         }
237         _idToSymbol[mapping[codeLen]] = symbol;
238         mapping[codeLen]++;
239     }
240 
241     //
242     // exceptions can be thrown whilst building tables. Delete
243     // _idToSynmbol before re-throwing to prevent memory leak
244     //
245     try
246     {
247       buildTables(base, offset);
248     }catch(...)
249     {
250             delete[] _idToSymbol;
251             _idToSymbol = NULL;
252             throw;
253     }
254 }
255 
256 
~FastHufDecoder()257 FastHufDecoder::~FastHufDecoder()
258 {
259     delete[] _idToSymbol;
260 }
261 
262 
263 //
264 // Static check if the decoder is enabled.
265 //
266 // ATM, I only have access to little endian hardware for testing,
267 // so I'm not entirely sure that we are reading fom the bit stream
268 // properly on BE.
269 //
270 // If you happen to have more obscure hardware, check that the
271 // byte swapping in refill() is happening sensable, add an endian
272 // check if needed, and fix the preprocessor magic here.
273 //
274 
275 #define READ64(c) \
276     ((uint64_t)(c)[0] << 56) | ((uint64_t)(c)[1] << 48) | ((uint64_t)(c)[2] << 40) | \
277     ((uint64_t)(c)[3] << 32) | ((uint64_t)(c)[4] << 24) | ((uint64_t)(c)[5] << 16) | \
278     ((uint64_t)(c)[6] <<  8) | ((uint64_t)(c)[7] )
279 
280 #ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
281     #if defined (__i386__) || defined(__x86_64__)
282         #undef  READ64
283         #define READ64(c) _bswap64 (*(const uint64_t*)(c))
284     #endif
285 #endif
286 
287 
288 bool
enabled()289 FastHufDecoder::enabled()
290 {
291     #if defined(__INTEL_COMPILER) || defined(__GNUC__)
292 
293         //
294         // Enabled for ICC, GCC:
295         //       __i386__   -> x86
296         //       __x86_64__ -> 64-bit x86
297         //       __e2k__    -> e2k (MCST Elbrus 2000)
298 
299         #if defined (__i386__) || defined(__x86_64__) || defined(__e2k__)
300             return true;
301         #else
302             return false;
303         #endif
304 
305     #elif defined (_MSC_VER)
306 
307         //
308         // Enabled for Visual Studio:
309         //        _M_IX86 -> x86
310         //        _M_X64  -> 64bit x86
311 
312         #if defined (_M_IX86) || defined(_M_X64)
313             return true;
314         #else
315             return false;
316         #endif
317 
318     #else
319 
320         //
321         // Unknown compiler - Be safe and disable.
322         //
323         return false;
324     #endif
325 }
326 
327 //
328 //
329 // Built the acceleration tables for lookups on the upper bits
330 // as well as the 'LJ' tables.
331 //
332 
333 void
buildTables(uint64_t * base,uint64_t * offset)334 FastHufDecoder::buildTables (uint64_t *base, uint64_t *offset)
335 {
336     //
337     // Build the 'left justified' base table, by shifting base left..
338     //
339 
340     for (int i = 0; i <= MAX_CODE_LEN; ++i)
341     {
342         if (base[i] != 0xffffffffffffffffULL)
343         {
344             _ljBase[i] = base[i] << (64 - i);
345         }
346         else
347         {
348             //
349             // Unused code length - insert dummy values
350             //
351 
352             _ljBase[i] = 0xffffffffffffffffULL;
353         }
354     }
355 
356     //
357     // Combine some terms into a big fat constant, which for
358     // lack of a better term we'll call the 'left justified'
359     // offset table (because it serves the same function
360     // as 'offset', when using the left justified base table.
361     //
362 
363     _ljOffset[0] = offset[0] - _ljBase[0];
364     for (int i = 1; i <= MAX_CODE_LEN; ++i)
365         _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));
366 
367     //
368     // Build the acceleration tables for the lookups of
369     // short codes ( <= TABLE_LOOKUP_BITS long)
370     //
371 
372     for (uint64_t i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
373     {
374         uint64_t value = i << (64 - TABLE_LOOKUP_BITS);
375 
376         _tableSymbol[i]  = 0xffff;
377         _tableCodeLen[i] = 0;
378 
379         for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
380         {
381             if (_ljBase[codeLen] <= value)
382             {
383                 _tableCodeLen[i] = codeLen;
384 
385                 uint64_t id = _ljOffset[codeLen] + (value >> (64 - codeLen));
386                 if (id < static_cast<uint64_t>(_numSymbols))
387                 {
388                     _tableSymbol[i] = _idToSymbol[id];
389                 }
390                 else
391                 {
392                     throw IEX_NAMESPACE::InputExc ("Huffman decode error "
393                                                    "(Overrun).");
394                 }
395                 break;
396             }
397         }
398     }
399 
400     //
401     // Store the smallest value in the table that points to real data.
402     // This should be the entry for the largest length that has
403     // valid data (in our case, non-dummy _ljBase)
404     //
405 
406     int minIdx = TABLE_LOOKUP_BITS;
407 
408     while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL)
409         minIdx--;
410 
411     if (minIdx < 0)
412     {
413         //
414         // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
415         // Set the min value such that the table is never tested.
416         //
417 
418         _tableMin = 0xffffffffffffffffULL;
419     }
420     else
421     {
422         _tableMin = _ljBase[minIdx];
423     }
424 }
425 
426 
427 //
428 // For decoding, we're holding onto 2 uint64_t's.
429 //
430 // The first (buffer), holds the next bits from the bitstream to be
431 // decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
432 // valid bits to decode the next symbol. For other paths, we need a full
433 // 64-bits to decode a symbol.
434 //
435 // When we need to refill 'buffer', we could pull bits straight from
436 // the bitstream. But this is very slow and requires lots of book keeping
437 // (what's the next bit in the next byte?). Instead, we keep another uint64_t
438 // around that we use to refill from. While this doesn't cut down on the
439 // book keeping (still need to know how many valid bits), it does cut
440 // down on some of the bit shifting crazy and byte access.
441 //
442 // The refill uint64_t (bufferBack) gets left-shifted after we've pulled
443 // off bits. If we run out of bits in the input bit stream, we just
444 // shift in 0's to bufferBack.
445 //
446 // The refill act takes numBits from the top of bufferBack and sticks
447 // them in the bottom of buffer. If there arn't enough bits in bufferBack,
448 // it gets refilled (to 64-bits) from the input bitstream.
449 //
450 
451 inline void
refill(uint64_t & buffer,int numBits,uint64_t & bufferBack,int & bufferBackNumBits,const unsigned char * & currByte,int & currBitsLeft)452 FastHufDecoder::refill
453     (uint64_t &buffer,
454      int numBits,                       // number of bits to refill
455      uint64_t &bufferBack,                 // the next 64-bits, to refill from
456      int &bufferBackNumBits,            // number of bits left in bufferBack
457      const unsigned char *&currByte,    // current byte in the bitstream
458      int &currBitsLeft)                 // number of bits left in the bitsream
459 {
460     //
461     // Refill bits into the bottom of buffer, from the top of bufferBack.
462     // Always top up buffer to be completely full.
463     //
464 
465     buffer |= bufferBack >> (64 - numBits);
466 
467     if (bufferBackNumBits < numBits)
468     {
469         numBits -= bufferBackNumBits;
470 
471         //
472         // Refill all of bufferBack from the bitstream. Either grab
473         // a full 64-bit chunk, or whatever bytes are left. If we
474         // don't have 64-bits left, pad with 0's.
475         //
476 
477         if (currBitsLeft >= 64)
478         {
479             bufferBack        = READ64 (currByte);
480             bufferBackNumBits = 64;
481             currByte         += sizeof (uint64_t);
482             currBitsLeft     -= 8 * sizeof (uint64_t);
483 
484         }
485         else
486         {
487             bufferBack        = 0;
488             bufferBackNumBits = 64;
489 
490             uint64_t shift = 56;
491 
492             while (currBitsLeft > 0)
493             {
494                 bufferBack |= ((uint64_t)(*currByte)) << shift;
495 
496                 currByte++;
497                 shift        -= 8;
498                 currBitsLeft -= 8;
499             }
500 
501             //
502             // At this point, currBitsLeft might be negative, just because
503             // we're subtracting whole bytes. To keep anyone from freaking
504             // out, zero the counter.
505             //
506 
507             if (currBitsLeft < 0)
508                 currBitsLeft = 0;
509         }
510 
511         buffer |= bufferBack >> (64 - numBits);
512     }
513 
514 
515     //
516     // We can have cases where the previous shift of bufferBack is << 64 -
517     // this is an undefined operation but tends to create just zeroes.
518     // so if we won't have any bits left, zero out bufferBack insetad of computing the shift
519     //
520 
521     if (bufferBackNumBits <= numBits)
522     {
523         bufferBack = 0;
524     }else
525     {
526         bufferBack = bufferBack << numBits;
527     }
528     bufferBackNumBits -= numBits;
529 
530 
531 }
532 
533 //
534 // Read the next few bits out of a bitstream. Will be given a backing buffer
535 // (buffer) that may still have data left over from previous reads
536 // (bufferNumBits).  Bitstream pointer (currByte) will be advanced when needed.
537 //
538 
539 inline uint64_t
readBits(int numBits,uint64_t & buffer,int & bufferNumBits,const char * & currByte)540 FastHufDecoder::readBits
541     (int numBits,
542      uint64_t &buffer,             // c
543      int &bufferNumBits,        // lc
544      const char *&currByte)     // in
545 {
546     while (bufferNumBits < numBits)
547     {
548         buffer = (buffer << 8) | *(unsigned char*)(currByte++);
549         bufferNumBits += 8;
550     }
551 
552     bufferNumBits -= numBits;
553     return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
554 }
555 
556 
557 //
558 // Decode using a the 'One-Shift' strategy for decoding, with a
559 // small-ish table to accelerate decoding of short codes.
560 //
561 // If possible, try looking up codes into the acceleration table.
562 // This has a few benifits - there's no search involved; We don't
563 // need an additional lookup to map id to symbol; we don't need
564 // a full 64-bits (so less refilling).
565 //
566 
567 void
decode(const unsigned char * src,int numSrcBits,unsigned short * dst,int numDstElems)568 FastHufDecoder::decode
569     (const unsigned char *src,
570      int numSrcBits,
571      unsigned short *dst,
572      int numDstElems)
573 {
574     if (numSrcBits < 128)
575         throw IEX_NAMESPACE::InputExc ("Error choosing Huffman decoder implementation "
576                                        "(insufficient number of bits).");
577 
578     //
579     // Current position (byte/bit) in the src data stream
580     // (after the first buffer fill)
581     //
582 
583     const unsigned char *currByte = src + 2 * sizeof (uint64_t);
584 
585     numSrcBits -= 8 * 2 * sizeof (uint64_t);
586 
587     //
588     // 64-bit buffer holding the current bits in the stream
589     //
590 
591     uint64_t buffer            = READ64 (src);
592     int   bufferNumBits     = 64;
593 
594     //
595     // 64-bit buffer holding the next bits in the stream
596     //
597 
598     uint64_t bufferBack        = READ64 ((src + sizeof (uint64_t)));
599     int   bufferBackNumBits = 64;
600 
601     int dstIdx = 0;
602 
603     while (dstIdx < numDstElems)
604     {
605         int  codeLen;
606         int  symbol;
607 
608         //
609         // Test if we can be table accelerated. If so, directly
610         // lookup the output symbol. Otherwise, we need to fall
611         // back to searching for the code.
612         //
613         // If we're doing table lookups, we don't really need
614         // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
615         // left. But for a search, we do need a refilled table.
616         //
617 
618         if (_tableMin <= buffer)
619         {
620             int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);
621 
622             //
623             // For invalid codes, _tableCodeLen[] should return 0. This
624             // will cause the decoder to get stuck in the current spot
625             // until we run out of elements, then barf that the codestream
626             // is bad.  So we don't need to stick a condition like
627             //     if (codeLen > _maxCodeLength) in this inner.
628             //
629 
630             codeLen = _tableCodeLen[tableIdx];
631             symbol  = _tableSymbol[tableIdx];
632         }
633         else
634         {
635             if (bufferNumBits < 64)
636             {
637                 refill (buffer,
638                         64 - bufferNumBits,
639                         bufferBack,
640                         bufferBackNumBits,
641                         currByte,
642                         numSrcBits);
643 
644                 bufferNumBits = 64;
645             }
646 
647             //
648             // Brute force search:
649             // Find the smallest length where _ljBase[length] <= buffer
650             //
651 
652             codeLen = TABLE_LOOKUP_BITS + 1;
653 
654             while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
655                 codeLen++;
656 
657             if (codeLen > _maxCodeLength)
658             {
659                 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
660                                                "(Decoded an invalid symbol).");
661             }
662 
663             uint64_t id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
664             if (id < static_cast<uint64_t>(_numSymbols))
665             {
666                 symbol = _idToSymbol[id];
667             }
668             else
669             {
670                 throw IEX_NAMESPACE::InputExc ("Huffman decode error "
671                                                "(Decoded an invalid symbol).");
672             }
673         }
674 
675         //
676         // Shift over bit stream, and update the bit count in the buffer
677         //
678 
679         buffer = buffer << codeLen;
680         bufferNumBits -= codeLen;
681 
682         //
683         // If we recieved a RLE symbol (_rleSymbol), then we need
684         // to read ahead 8 bits to know how many times to repeat
685         // the previous symbol. Need to ensure we at least have
686         // 8 bits of data in the buffer
687         //
688 
689         if (symbol == _rleSymbol)
690         {
691             if (bufferNumBits < 8)
692             {
693                 refill (buffer,
694                         64 - bufferNumBits,
695                         bufferBack,
696                         bufferBackNumBits,
697                         currByte,
698                         numSrcBits);
699 
700                 bufferNumBits = 64;
701             }
702 
703             int rleCount = buffer >> 56;
704 
705             if (dstIdx < 1)
706             {
707                 throw IEX_NAMESPACE::InputExc ("Huffman decode error (RLE code "
708                                                "with no previous symbol).");
709             }
710 
711             if (dstIdx + rleCount > numDstElems)
712             {
713                 throw IEX_NAMESPACE::InputExc ("Huffman decode error (Symbol run "
714                                                "beyond expected output buffer length).");
715             }
716 
717             if (rleCount <= 0)
718             {
719                 throw IEX_NAMESPACE::InputExc("Huffman decode error"
720                                               " (Invalid RLE length)");
721             }
722 
723             for (int i = 0; i < rleCount; ++i)
724                 dst[dstIdx + i] = dst[dstIdx - 1];
725 
726             dstIdx += rleCount;
727 
728             buffer = buffer << 8;
729             bufferNumBits -= 8;
730         }
731         else
732         {
733             dst[dstIdx] = symbol;
734             dstIdx++;
735         }
736 
737         //
738         // refill bit stream buffer if we're below the number of
739         // bits needed for a table lookup
740         //
741 
742         if (bufferNumBits < TABLE_LOOKUP_BITS)
743         {
744             refill (buffer,
745                     64 - bufferNumBits,
746                     bufferBack,
747                     bufferBackNumBits,
748                     currByte,
749                     numSrcBits);
750 
751             bufferNumBits = 64;
752         }
753     }
754 
755     if (numSrcBits != 0)
756     {
757         throw IEX_NAMESPACE::InputExc ("Huffman decode error (Compressed data remains "
758                                        "after filling expected output buffer).");
759     }
760 }
761 
762 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
763