1 ///////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2009-2014 DreamWorks Animation LLC.
4 //
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of DreamWorks Animation nor the names of
17 // its contributors may be used to endorse or promote products derived
18 // from this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 ///////////////////////////////////////////////////////////////////////////
33
34 #include "ImfFastHuf.h"
35 #include <Iex.h>
36
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40 #include <vector>
41
42 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
43
44 //
45 // Adapted from hufUnpackEncTable -
46 // We don't need to reconstruct the code book, just the encoded
47 // lengths for each symbol. From the lengths, we can build the
48 // base + offset tables. This should be a bit more efficient
49 // for sparse code books.
50 //
51 // table - ptr to the start of the code length data. Will be
52 // updated as we decode data
53 //
54 // numBytes - size of the encoded table (I think)?
55 //
56 // minSymbol - smallest symbol in the code book
57 //
58 // maxSymbol - largest symbol in the code book.
59 //
60 // rleSymbol - the symbol to trigger RLE in the encoded bitstream
61 //
62
FastHufDecoder(const char * & table,int numBytes,int minSymbol,int maxSymbol,int rleSymbol)63 FastHufDecoder::FastHufDecoder
64 (const char *&table,
65 int numBytes,
66 int minSymbol,
67 int maxSymbol,
68 int rleSymbol)
69 :
70 _rleSymbol (rleSymbol),
71 _numSymbols (0),
72 _minCodeLength (255),
73 _maxCodeLength (0),
74 _idToSymbol (0)
75 {
76 //
77 // List of symbols that we find with non-zero code lengths
78 // (listed in the order we find them). Store these in the
79 // same format as the code book stores codes + lengths -
80 // low 6 bits are the length, everything above that is
81 // the symbol.
82 //
83
84 std::vector<Int64> symbols;
85
86 //
87 // The 'base' table is the minimum code at each code length. base[i]
88 // is the smallest code (numerically) of length i.
89 //
90
91 Int64 base[MAX_CODE_LEN + 1];
92
93 //
94 // The 'offset' table is the position (in sorted order) of the first id
95 // of a given code lenght. Array is indexed by code length, like base.
96 //
97
98 Int64 offset[MAX_CODE_LEN + 1];
99
100 //
101 // Count of how many codes at each length there are. Array is
102 // indexed by code length, like base and offset.
103 //
104
105 size_t codeCount[MAX_CODE_LEN + 1];
106
107 for (int i = 0; i <= MAX_CODE_LEN; ++i)
108 {
109 codeCount[i] = 0;
110 base[i] = 0xffffffffffffffffULL;
111 offset[i] = 0;
112 }
113
114 //
115 // Count the number of codes, the min/max code lengths, the number of
116 // codes with each length, and record symbols with non-zero code
117 // length as we find them.
118 //
119
120 const char *currByte = table;
121 Int64 currBits = 0;
122 int currBitCount = 0;
123
124 const int SHORT_ZEROCODE_RUN = 59;
125 const int LONG_ZEROCODE_RUN = 63;
126 const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
127
128 for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++)
129 {
130 if (currByte - table > numBytes)
131 {
132 throw Iex::InputExc ("Error decoding Huffman table "
133 "(Truncated table data).");
134 }
135
136 //
137 // Next code length - either:
138 // 0-58 (literal code length)
139 // 59-62 (various lengths runs of 0)
140 // 63 (run of n 0's, with n is the next 8 bits)
141 //
142
143 Int64 codeLen = readBits (6, currBits, currBitCount, currByte);
144
145 if (codeLen == (Int64) LONG_ZEROCODE_RUN)
146 {
147 if (currByte - table > numBytes)
148 {
149 throw Iex::InputExc ("Error decoding Huffman table "
150 "(Truncated table data).");
151 }
152
153 int runLen = readBits (8, currBits, currBitCount, currByte) +
154 SHORTEST_LONG_RUN;
155
156 if (symbol + runLen > maxSymbol + 1)
157 {
158 throw Iex::InputExc ("Error decoding Huffman table "
159 "(Run beyond end of table).");
160 }
161
162 symbol += runLen - 1;
163
164 }
165 else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN)
166 {
167 int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;
168
169 if (symbol + runLen > maxSymbol + 1)
170 {
171 throw Iex::InputExc ("Error decoding Huffman table "
172 "(Run beyond end of table).");
173 }
174
175 symbol += runLen - 1;
176
177 }
178 else if (codeLen != 0)
179 {
180 symbols.push_back ((symbol << 6) | (codeLen & 63));
181
182 if (codeLen < _minCodeLength)
183 _minCodeLength = codeLen;
184
185 if (codeLen > _maxCodeLength)
186 _maxCodeLength = codeLen;
187
188 codeCount[codeLen]++;
189 }
190 }
191
192 for (int i = 0; i < MAX_CODE_LEN; ++i)
193 _numSymbols += codeCount[i];
194
195 table = currByte;
196
197 //
198 // Compute base - once we have the code length counts, there
199 // is a closed form solution for this
200 //
201
202 {
203 double* countTmp = new double[_maxCodeLength+1];
204
205 for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
206 {
207 countTmp[l] = (double)codeCount[l] *
208 (double)(2 << (_maxCodeLength-l));
209 }
210
211 for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
212 {
213 double tmp = 0;
214
215 for (int k =l + 1; k <= _maxCodeLength; ++k)
216 tmp += countTmp[k];
217
218 tmp /= (double)(2 << (_maxCodeLength - l));
219
220 base[l] = (Int64)ceil (tmp);
221 }
222
223 delete [] countTmp;
224 }
225
226 //
227 // Compute offset - these are the positions of the first
228 // id (not symbol) that has length [i]
229 //
230
231 offset[_maxCodeLength] = 0;
232
233 for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
234 offset[i] = offset[i + 1] + codeCount[i + 1];
235
236 //
237 // Allocate and fill the symbol-to-id mapping. Smaller Ids should be
238 // mapped to less-frequent symbols (which have longer codes). Use
239 // the offset table to tell us where the id's for a given code
240 // length start off.
241 //
242
243 _idToSymbol = new int[_numSymbols];
244
245 Int64 mapping[MAX_CODE_LEN + 1];
246 for (int i = 0; i < MAX_CODE_LEN + 1; ++i)
247 mapping[i] = -1;
248 for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
249 mapping[i] = offset[i];
250
251 for (std::vector<Int64>::const_iterator i = symbols.begin();
252 i != symbols.end();
253 ++i)
254 {
255 int codeLen = *i & 63;
256 int symbol = *i >> 6;
257
258 if (mapping[codeLen] >= _numSymbols)
259 throw Iex::InputExc ("Huffman decode error "
260 "(Invalid symbol in header).");
261
262 _idToSymbol[mapping[codeLen]] = symbol;
263 mapping[codeLen]++;
264 }
265
266 buildTables(base, offset);
267 }
268
269
~FastHufDecoder()270 FastHufDecoder::~FastHufDecoder()
271 {
272 delete[] _idToSymbol;
273 }
274
275
276 //
277 // Static check if the decoder is enabled.
278 //
279 // ATM, I only have access to little endian hardware for testing,
280 // so I'm not entirely sure that we are reading fom the bit stream
281 // properly on BE.
282 //
283 // If you happen to have more obscure hardware, check that the
284 // byte swapping in refill() is happening sensable, add an endian
285 // check if needed, and fix the preprocessor magic here.
286 //
287
288 #define READ64(c) \
289 ((Int64)(c)[0] << 56) | ((Int64)(c)[1] << 48) | ((Int64)(c)[2] << 40) | \
290 ((Int64)(c)[3] << 32) | ((Int64)(c)[4] << 24) | ((Int64)(c)[5] << 16) | \
291 ((Int64)(c)[6] << 8) | ((Int64)(c)[7] )
292
293 #ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
294 #if defined (__i386__) || defined(__x86_64__)
295 #undef READ64
296 #define READ64(c) _bswap64 (*(const Int64*)(c))
297 #endif
298 #endif
299
300
301 bool
enabled()302 FastHufDecoder::enabled()
303 {
304 #if defined(__INTEL_COMPILER) || defined(__GNUC__)
305
306 //
307 // Enabled for ICC, GCC:
308 // __i386__ -> x86
309 // __x86_64__ -> 64-bit x86
310 //
311
312 #if defined (__i386__) || defined(__x86_64__)
313 return true;
314 #else
315 return false;
316 #endif
317
318 #elif defined (_MSC_VER)
319
320 //
321 // Enabled for Visual Studio:
322 // _M_IX86 -> x86
323 // _M_X64 -> 64bit x86
324
325 #if defined (_M_IX86) || defined(_M_X64)
326 return true;
327 #else
328 return false;
329 #endif
330
331 #else
332
333 //
334 // Unknown compiler - Be safe and disable.
335 //
336 return false;
337 #endif
338 }
339
340 //
341 //
342 // Built the acceleration tables for lookups on the upper bits
343 // as well as the 'LJ' tables.
344 //
345
346 void
buildTables(Int64 * base,Int64 * offset)347 FastHufDecoder::buildTables (Int64 *base, Int64 *offset)
348 {
349 //
350 // Build the 'left justified' base table, by shifting base left..
351 //
352
353 for (int i = 0; i <= MAX_CODE_LEN; ++i)
354 {
355 if (base[i] != 0xffffffffffffffffULL)
356 {
357 _ljBase[i] = base[i] << (64 - i);
358 }
359 else
360 {
361 //
362 // Unused code length - insert dummy values
363 //
364
365 _ljBase[i] = 0xffffffffffffffffULL;
366 }
367 }
368
369 //
370 // Combine some terms into a big fat constant, which for
371 // lack of a better term we'll call the 'left justified'
372 // offset table (because it serves the same function
373 // as 'offset', when using the left justified base table.
374 //
375
376 for (int i = 0; i <= MAX_CODE_LEN; ++i)
377 _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));
378
379 //
380 // Build the acceleration tables for the lookups of
381 // short codes ( <= TABLE_LOOKUP_BITS long)
382 //
383
384 for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
385 {
386 Int64 value = i << (64 - TABLE_LOOKUP_BITS);
387
388 _tableSymbol[i] = 0xffff;
389 _tableCodeLen[i] = 0;
390
391 for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
392 {
393 if (_ljBase[codeLen] <= value)
394 {
395 _tableCodeLen[i] = codeLen;
396
397 Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen));
398 if (id < _numSymbols)
399 {
400 _tableSymbol[i] = _idToSymbol[id];
401 }
402 else
403 {
404 throw Iex::InputExc ("Huffman decode error "
405 "(Overrun).");
406 }
407 break;
408 }
409 }
410 }
411
412 //
413 // Store the smallest value in the table that points to real data.
414 // This should be the entry for the largest length that has
415 // valid data (in our case, non-dummy _ljBase)
416 //
417
418 int minIdx = TABLE_LOOKUP_BITS;
419
420 while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL)
421 minIdx--;
422
423 if (minIdx < 0)
424 {
425 //
426 // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
427 // Set the min value such that the table is never tested.
428 //
429
430 _tableMin = 0xffffffffffffffffULL;
431 }
432 else
433 {
434 _tableMin = _ljBase[minIdx];
435 }
436 }
437
438
439 //
440 // For decoding, we're holding onto 2 Int64's.
441 //
442 // The first (buffer), holds the next bits from the bitstream to be
443 // decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
444 // valid bits to decode the next symbol. For other paths, we need a full
445 // 64-bits to decode a symbol.
446 //
447 // When we need to refill 'buffer', we could pull bits straight from
448 // the bitstream. But this is very slow and requires lots of book keeping
449 // (what's the next bit in the next byte?). Instead, we keep another Int64
450 // around that we use to refill from. While this doesn't cut down on the
451 // book keeping (still need to know how many valid bits), it does cut
452 // down on some of the bit shifting crazy and byte access.
453 //
454 // The refill Int64 (bufferBack) gets left-shifted after we've pulled
455 // off bits. If we run out of bits in the input bit stream, we just
456 // shift in 0's to bufferBack.
457 //
458 // The refill act takes numBits from the top of bufferBack and sticks
459 // them in the bottom of buffer. If there arn't enough bits in bufferBack,
460 // it gets refilled (to 64-bits) from the input bitstream.
461 //
462
463 inline void
refill(Int64 & buffer,int numBits,Int64 & bufferBack,int & bufferBackNumBits,const unsigned char * & currByte,int & currBitsLeft)464 FastHufDecoder::refill
465 (Int64 &buffer,
466 int numBits, // number of bits to refill
467 Int64 &bufferBack, // the next 64-bits, to refill from
468 int &bufferBackNumBits, // number of bits left in bufferBack
469 const unsigned char *&currByte, // current byte in the bitstream
470 int &currBitsLeft) // number of bits left in the bitsream
471 {
472 //
473 // Refill bits into the bottom of buffer, from the top of bufferBack.
474 // Always top up buffer to be completely full.
475 //
476
477 buffer |= bufferBack >> (64 - numBits);
478
479 if (bufferBackNumBits < numBits)
480 {
481 numBits -= bufferBackNumBits;
482
483 //
484 // Refill all of bufferBack from the bitstream. Either grab
485 // a full 64-bit chunk, or whatever bytes are left. If we
486 // don't have 64-bits left, pad with 0's.
487 //
488
489 if (currBitsLeft >= 64)
490 {
491 bufferBack = READ64 (currByte);
492 bufferBackNumBits = 64;
493 currByte += sizeof (Int64);
494 currBitsLeft -= 8 * sizeof (Int64);
495
496 }
497 else
498 {
499 bufferBack = 0;
500 bufferBackNumBits = 64;
501
502 Int64 shift = 56;
503
504 while (currBitsLeft > 0)
505 {
506 bufferBack |= ((Int64)(*currByte)) << shift;
507
508 currByte++;
509 shift -= 8;
510 currBitsLeft -= 8;
511 }
512
513 //
514 // At this point, currBitsLeft might be negative, just because
515 // we're subtracting whole bytes. To keep anyone from freaking
516 // out, zero the counter.
517 //
518
519 if (currBitsLeft < 0)
520 currBitsLeft = 0;
521 }
522
523 buffer |= bufferBack >> (64 - numBits);
524 }
525
526 bufferBack = bufferBack << numBits;
527 bufferBackNumBits -= numBits;
528
529 //
530 // We can have cases where the previous shift of bufferBack is << 64 -
531 // in which case no shift occurs. The bit count math still works though,
532 // so if we don't have any bits left, zero out bufferBack.
533 //
534
535 if (bufferBackNumBits == 0)
536 bufferBack = 0;
537 }
538
539 //
540 // Read the next few bits out of a bitstream. Will be given a backing buffer
541 // (buffer) that may still have data left over from previous reads
542 // (bufferNumBits). Bitstream pointer (currByte) will be advanced when needed.
543 //
544
545 inline Int64
readBits(int numBits,Int64 & buffer,int & bufferNumBits,const char * & currByte)546 FastHufDecoder::readBits
547 (int numBits,
548 Int64 &buffer, // c
549 int &bufferNumBits, // lc
550 const char *&currByte) // in
551 {
552 while (bufferNumBits < numBits)
553 {
554 buffer = (buffer << 8) | *(unsigned char*)(currByte++);
555 bufferNumBits += 8;
556 }
557
558 bufferNumBits -= numBits;
559 return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
560 }
561
562
563 //
564 // Decode using a the 'One-Shift' strategy for decoding, with a
565 // small-ish table to accelerate decoding of short codes.
566 //
567 // If possible, try looking up codes into the acceleration table.
568 // This has a few benifits - there's no search involved; We don't
569 // need an additional lookup to map id to symbol; we don't need
570 // a full 64-bits (so less refilling).
571 //
572
573 void
decode(const unsigned char * src,int numSrcBits,unsigned short * dst,int numDstElems)574 FastHufDecoder::decode
575 (const unsigned char *src,
576 int numSrcBits,
577 unsigned short *dst,
578 int numDstElems)
579 {
580 if (numSrcBits < 128)
581 throw Iex::InputExc ("Error choosing Huffman decoder implementation "
582 "(insufficient number of bits).");
583
584 //
585 // Current position (byte/bit) in the src data stream
586 // (after the first buffer fill)
587 //
588
589 const unsigned char *currByte = src + 2 * sizeof (Int64);
590
591 numSrcBits -= 8 * 2 * sizeof (Int64);
592
593 //
594 // 64-bit buffer holding the current bits in the stream
595 //
596
597 Int64 buffer = READ64 (src);
598 int bufferNumBits = 64;
599
600 //
601 // 64-bit buffer holding the next bits in the stream
602 //
603
604 Int64 bufferBack = READ64 ((src + sizeof (Int64)));
605 int bufferBackNumBits = 64;
606
607 int dstIdx = 0;
608
609 while (dstIdx < numDstElems)
610 {
611 int codeLen;
612 int symbol;
613
614 //
615 // Test if we can be table accelerated. If so, directly
616 // lookup the output symbol. Otherwise, we need to fall
617 // back to searching for the code.
618 //
619 // If we're doing table lookups, we don't really need
620 // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
621 // left. But for a search, we do need a refilled table.
622 //
623
624 if (_tableMin <= buffer)
625 {
626 int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);
627
628 //
629 // For invalid codes, _tableCodeLen[] should return 0. This
630 // will cause the decoder to get stuck in the current spot
631 // until we run out of elements, then barf that the codestream
632 // is bad. So we don't need to stick a condition like
633 // if (codeLen > _maxCodeLength) in this inner.
634 //
635
636 codeLen = _tableCodeLen[tableIdx];
637 symbol = _tableSymbol[tableIdx];
638 }
639 else
640 {
641 if (bufferNumBits < 64)
642 {
643 refill (buffer,
644 64 - bufferNumBits,
645 bufferBack,
646 bufferBackNumBits,
647 currByte,
648 numSrcBits);
649
650 bufferNumBits = 64;
651 }
652
653 //
654 // Brute force search:
655 // Find the smallest length where _ljBase[length] <= buffer
656 //
657
658 codeLen = TABLE_LOOKUP_BITS + 1;
659
660 while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
661 codeLen++;
662
663 if (codeLen > _maxCodeLength)
664 {
665 throw Iex::InputExc ("Huffman decode error "
666 "(Decoded an invalid symbol).");
667 }
668
669 Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
670 if (id < _numSymbols)
671 {
672 symbol = _idToSymbol[id];
673 }
674 else
675 {
676 throw Iex::InputExc ("Huffman decode error "
677 "(Decoded an invalid symbol).");
678 }
679 }
680
681 //
682 // Shift over bit stream, and update the bit count in the buffer
683 //
684
685 buffer = buffer << codeLen;
686 bufferNumBits -= codeLen;
687
688 //
689 // If we recieved a RLE symbol (_rleSymbol), then we need
690 // to read ahead 8 bits to know how many times to repeat
691 // the previous symbol. Need to ensure we at least have
692 // 8 bits of data in the buffer
693 //
694
695 if (symbol == _rleSymbol)
696 {
697 if (bufferNumBits < 8)
698 {
699 refill (buffer,
700 64 - bufferNumBits,
701 bufferBack,
702 bufferBackNumBits,
703 currByte,
704 numSrcBits);
705
706 bufferNumBits = 64;
707 }
708
709 int rleCount = buffer >> 56;
710
711 if (dstIdx < 1)
712 {
713 throw Iex::InputExc ("Huffman decode error (RLE code "
714 "with no previous symbol).");
715 }
716
717 if (dstIdx + rleCount > numDstElems)
718 {
719 throw Iex::InputExc ("Huffman decode error (Symbol run "
720 "beyond expected output buffer length).");
721 }
722
723 if (rleCount <= 0)
724 {
725 throw Iex::InputExc("Huffman decode error"
726 " (Invalid RLE length)");
727 }
728
729 for (int i = 0; i < rleCount; ++i)
730 dst[dstIdx + i] = dst[dstIdx - 1];
731
732 dstIdx += rleCount;
733
734 buffer = buffer << 8;
735 bufferNumBits -= 8;
736 }
737 else
738 {
739 dst[dstIdx] = symbol;
740 dstIdx++;
741 }
742
743 //
744 // refill bit stream buffer if we're below the number of
745 // bits needed for a table lookup
746 //
747
748 if (bufferNumBits < TABLE_LOOKUP_BITS)
749 {
750 refill (buffer,
751 64 - bufferNumBits,
752 bufferBack,
753 bufferBackNumBits,
754 currByte,
755 numSrcBits);
756
757 bufferNumBits = 64;
758 }
759 }
760
761 if (numSrcBits != 0)
762 {
763 throw Iex::InputExc ("Huffman decode error (Compressed data remains "
764 "after filling expected output buffer).");
765 }
766 }
767
768 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
769