1 //
2 // SPDX-License-Identifier: BSD-3-Clause
3 // Copyright (c) Contributors to the OpenEXR Project.
4 //
5
6
7
8 //-----------------------------------------------------------------------------
9 //
10 // 16-bit Huffman compression and decompression.
11 //
12 // The source code in this file is derived from the 8-bit
13 // Huffman compression and decompression routines written
14 // by Christian Rouet for his PIZ image file format.
15 //
16 //-----------------------------------------------------------------------------
17
18 #include <ImfHuf.h>
19 #include "ImfAutoArray.h"
20 #include "ImfFastHuf.h"
21 #include "Iex.h"
22 #include <cstring>
23 #include <cassert>
24 #include <algorithm>
25 #include <cstdint>
26
27
28
29 using namespace std;
30 using namespace IEX_NAMESPACE;
31 #include "ImfNamespace.h"
32
33 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
34
35 namespace {
36
37
38 const int HUF_ENCBITS = 16; // literal (value) bit length
39 const int HUF_DECBITS = 14; // decoding bit size (>= 8)
40
41 const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size
42 const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size
43 const int HUF_DECMASK = HUF_DECSIZE - 1;
44
45
46 struct HufDec
47 { // short code long code
48 //-------------------------------
49 int len:8; // code length 0
50 int lit:24; // lit p size
51 int * p; // 0 lits
52 };
53
54
55 void
invalidNBits()56 invalidNBits ()
57 {
58 throw InputExc ("Error in header for Huffman-encoded data "
59 "(invalid number of bits).");
60 }
61
62
63 void
tooMuchData()64 tooMuchData ()
65 {
66 throw InputExc ("Error in Huffman-encoded data "
67 "(decoded data are longer than expected).");
68 }
69
70
71 void
notEnoughData()72 notEnoughData ()
73 {
74 throw InputExc ("Error in Huffman-encoded data "
75 "(decoded data are shorter than expected).");
76 }
77
78
79 void
invalidCode()80 invalidCode ()
81 {
82 throw InputExc ("Error in Huffman-encoded data "
83 "(invalid code).");
84 }
85
86
87 void
invalidTableSize()88 invalidTableSize ()
89 {
90 throw InputExc ("Error in Huffman-encoded data "
91 "(invalid code table size).");
92 }
93
94
95 void
unexpectedEndOfTable()96 unexpectedEndOfTable ()
97 {
98 throw InputExc ("Error in Huffman-encoded data "
99 "(unexpected end of code table data).");
100 }
101
102
103 void
tableTooLong()104 tableTooLong ()
105 {
106 throw InputExc ("Error in Huffman-encoded data "
107 "(code table is longer than expected).");
108 }
109
110
111 void
invalidTableEntry()112 invalidTableEntry ()
113 {
114 throw InputExc ("Error in Huffman-encoded data "
115 "(invalid code table entry).");
116 }
117
118
119 inline uint64_t
hufLength(uint64_t code)120 hufLength (uint64_t code)
121 {
122 return code & 63;
123 }
124
125
126 inline uint64_t
hufCode(uint64_t code)127 hufCode (uint64_t code)
128 {
129 return code >> 6;
130 }
131
132
133 inline void
outputBits(int nBits,uint64_t bits,uint64_t & c,int & lc,char * & out)134 outputBits (int nBits, uint64_t bits, uint64_t &c, int &lc, char *&out)
135 {
136 c <<= nBits;
137 lc += nBits;
138
139 c |= bits;
140
141 while (lc >= 8)
142 *out++ = (c >> (lc -= 8));
143 }
144
145
146 inline uint64_t
getBits(int nBits,uint64_t & c,int & lc,const char * & in)147 getBits (int nBits, uint64_t &c, int &lc, const char *&in)
148 {
149 while (lc < nBits)
150 {
151 c = (c << 8) | *(unsigned char *)(in++);
152 lc += 8;
153 }
154
155 lc -= nBits;
156 return (c >> lc) & ((1 << nBits) - 1);
157 }
158
159
160 //
161 // ENCODING TABLE BUILDING & (UN)PACKING
162 //
163
164 //
165 // Build a "canonical" Huffman code table:
166 // - for each (uncompressed) symbol, hcode contains the length
167 // of the corresponding code (in the compressed data)
168 // - canonical codes are computed and stored in hcode
169 // - the rules for constructing canonical codes are as follows:
170 // * shorter codes (if filled with zeroes to the right)
171 // have a numerically higher value than longer codes
172 // * for codes with the same length, numerical values
173 // increase with numerical symbol values
174 // - because the canonical code table can be constructed from
175 // symbol lengths alone, the code table can be transmitted
176 // without sending the actual code values
177 // - see http://www.compressconsult.com/huffman/
178 //
179
180 #if !defined (OPENEXR_IMF_HAVE_LARGE_STACK)
181 void
hufCanonicalCodeTable(uint64_t * hcode)182 hufCanonicalCodeTable (uint64_t *hcode)
183 #else
184 void
185 hufCanonicalCodeTable (uint64_t hcode[HUF_ENCSIZE])
186 #endif
187 {
188 uint64_t n[59];
189
190 //
191 // For each i from 0 through 58, count the
192 // number of different codes of length i, and
193 // store the count in n[i].
194 //
195
196 for (int i = 0; i <= 58; ++i)
197 n[i] = 0;
198
199 for (int i = 0; i < HUF_ENCSIZE; ++i)
200 n[hcode[i]] += 1;
201
202 //
203 // For each i from 58 through 1, compute the
204 // numerically lowest code with length i, and
205 // store that code in n[i].
206 //
207
208 uint64_t c = 0;
209
210 for (int i = 58; i > 0; --i)
211 {
212 uint64_t nc = ((c + n[i]) >> 1);
213 n[i] = c;
214 c = nc;
215 }
216
217 //
218 // hcode[i] contains the length, l, of the
219 // code for symbol i. Assign the next available
220 // code of length l to the symbol and store both
221 // l and the code in hcode[i].
222 //
223
224 for (int i = 0; i < HUF_ENCSIZE; ++i)
225 {
226 int l = hcode[i];
227
228 if (l > 0)
229 hcode[i] = l | (n[l]++ << 6);
230 }
231 }
232
233
234 //
235 // Compute Huffman codes (based on frq input) and store them in frq:
236 // - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
237 // - max code length is 58 bits;
238 // - codes outside the range [im-iM] have a null length (unused values);
239 // - original frequencies are destroyed;
240 // - encoding tables are used by hufEncode() and hufBuildDecTable();
241 //
242 // NB: The following code "(*a == *b) && (a > b))" was added to ensure
243 // elements in the heap with the same value are sorted by index.
244 // This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
245 // produced a resultant sorted heap that is identical across OSes.
246 //
247
248
249 struct FHeapCompare
250 {
operator ()__anone1ca35a80111::FHeapCompare251 bool operator () (uint64_t *a, uint64_t *b)
252 {
253 return ((*a > *b) || ((*a == *b) && (a > b)));
254 }
255 };
256
257
258 void
hufBuildEncTable(uint64_t * frq,int * im,int * iM)259 hufBuildEncTable
260 (uint64_t* frq, // io: input frequencies [HUF_ENCSIZE], output table
261 int* im, // o: min frq index
262 int* iM) // o: max frq index
263 {
264 //
265 // This function assumes that when it is called, array frq
266 // indicates the frequency of all possible symbols in the data
267 // that are to be Huffman-encoded. (frq[i] contains the number
268 // of occurrences of symbol i in the data.)
269 //
270 // The loop below does three things:
271 //
272 // 1) Finds the minimum and maximum indices that point
273 // to non-zero entries in frq:
274 //
275 // frq[im] != 0, and frq[i] == 0 for all i < im
276 // frq[iM] != 0, and frq[i] == 0 for all i > iM
277 //
278 // 2) Fills array fHeap with pointers to all non-zero
279 // entries in frq.
280 //
281 // 3) Initializes array hlink such that hlink[i] == i
282 // for all array entries.
283 //
284
285 AutoArray <int, HUF_ENCSIZE> hlink;
286 AutoArray <uint64_t *, HUF_ENCSIZE> fHeap;
287
288 *im = 0;
289
290 while (!frq[*im])
291 (*im)++;
292
293 int nf = 0;
294
295 for (int i = *im; i < HUF_ENCSIZE; i++)
296 {
297 hlink[i] = i;
298
299 if (frq[i])
300 {
301 fHeap[nf] = &frq[i];
302 nf++;
303 *iM = i;
304 }
305 }
306
307 //
308 // Add a pseudo-symbol, with a frequency count of 1, to frq;
309 // adjust the fHeap and hlink array accordingly. Function
310 // hufEncode() uses the pseudo-symbol for run-length encoding.
311 //
312
313 (*iM)++;
314 frq[*iM] = 1;
315 fHeap[nf] = &frq[*iM];
316 nf++;
317
318 //
319 // Build an array, scode, such that scode[i] contains the number
320 // of bits assigned to symbol i. Conceptually this is done by
321 // constructing a tree whose leaves are the symbols with non-zero
322 // frequency:
323 //
324 // Make a heap that contains all symbols with a non-zero frequency,
325 // with the least frequent symbol on top.
326 //
327 // Repeat until only one symbol is left on the heap:
328 //
329 // Take the two least frequent symbols off the top of the heap.
330 // Create a new node that has first two nodes as children, and
331 // whose frequency is the sum of the frequencies of the first
332 // two nodes. Put the new node back into the heap.
333 //
334 // The last node left on the heap is the root of the tree. For each
335 // leaf node, the distance between the root and the leaf is the length
336 // of the code for the corresponding symbol.
337 //
338 // The loop below doesn't actually build the tree; instead we compute
339 // the distances of the leaves from the root on the fly. When a new
340 // node is added to the heap, then that node's descendants are linked
341 // into a single linear list that starts at the new node, and the code
342 // lengths of the descendants (that is, their distance from the root
343 // of the tree) are incremented by one.
344 //
345
346 make_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
347
348 AutoArray <uint64_t, HUF_ENCSIZE> scode;
349 memset (scode, 0, sizeof (uint64_t) * HUF_ENCSIZE);
350
351 while (nf > 1)
352 {
353 //
354 // Find the indices, mm and m, of the two smallest non-zero frq
355 // values in fHeap, add the smallest frq to the second-smallest
356 // frq, and remove the smallest frq value from fHeap.
357 //
358
359 int mm = fHeap[0] - frq;
360 pop_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
361 --nf;
362
363 int m = fHeap[0] - frq;
364 pop_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
365
366 frq[m ] += frq[mm];
367 push_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
368
369 //
370 // The entries in scode are linked into lists with the
371 // entries in hlink serving as "next" pointers and with
372 // the end of a list marked by hlink[j] == j.
373 //
374 // Traverse the lists that start at scode[m] and scode[mm].
375 // For each element visited, increment the length of the
376 // corresponding code by one bit. (If we visit scode[j]
377 // during the traversal, then the code for symbol j becomes
378 // one bit longer.)
379 //
380 // Merge the lists that start at scode[m] and scode[mm]
381 // into a single list that starts at scode[m].
382 //
383
384 //
385 // Add a bit to all codes in the first list.
386 //
387
388 for (int j = m; true; j = hlink[j])
389 {
390 scode[j]++;
391
392 assert (scode[j] <= 58);
393
394 if (hlink[j] == j)
395 {
396 //
397 // Merge the two lists.
398 //
399
400 hlink[j] = mm;
401 break;
402 }
403 }
404
405 //
406 // Add a bit to all codes in the second list
407 //
408
409 for (int j = mm; true; j = hlink[j])
410 {
411 scode[j]++;
412
413 assert (scode[j] <= 58);
414
415 if (hlink[j] == j)
416 break;
417 }
418 }
419
420 //
421 // Build a canonical Huffman code table, replacing the code
422 // lengths in scode with (code, code length) pairs. Copy the
423 // code table from scode into frq.
424 //
425
426 hufCanonicalCodeTable (scode);
427 memcpy (frq, scode, sizeof (uint64_t) * HUF_ENCSIZE);
428 }
429
430
431 //
432 // Pack an encoding table:
433 // - only code lengths, not actual codes, are stored
434 // - runs of zeroes are compressed as follows:
435 //
436 // unpacked packed
437 // --------------------------------
438 // 1 zero 0 (6 bits)
439 // 2 zeroes 59
440 // 3 zeroes 60
441 // 4 zeroes 61
442 // 5 zeroes 62
443 // n zeroes (6 or more) 63 n-6 (6 + 8 bits)
444 //
445
446 const int SHORT_ZEROCODE_RUN = 59;
447 const int LONG_ZEROCODE_RUN = 63;
448 const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
449 const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN;
450
451
452 void
hufPackEncTable(const uint64_t * hcode,int im,int iM,char ** pcode)453 hufPackEncTable
454 (const uint64_t* hcode, // i : encoding table [HUF_ENCSIZE]
455 int im, // i : min hcode index
456 int iM, // i : max hcode index
457 char** pcode) // o: ptr to packed table (updated)
458 {
459 char *p = *pcode;
460 uint64_t c = 0;
461 int lc = 0;
462
463 for (; im <= iM; im++)
464 {
465 int l = hufLength (hcode[im]);
466
467 if (l == 0)
468 {
469 int zerun = 1;
470
471 while ((im < iM) && (zerun < LONGEST_LONG_RUN))
472 {
473 if (hufLength (hcode[im+1]) > 0 )
474 break;
475 im++;
476 zerun++;
477 }
478
479 if (zerun >= 2)
480 {
481 if (zerun >= SHORTEST_LONG_RUN)
482 {
483 outputBits (6, LONG_ZEROCODE_RUN, c, lc, p);
484 outputBits (8, zerun - SHORTEST_LONG_RUN, c, lc, p);
485 }
486 else
487 {
488 outputBits (6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p);
489 }
490 continue;
491 }
492 }
493
494 outputBits (6, l, c, lc, p);
495 }
496
497 if (lc > 0)
498 *p++ = (unsigned char) (c << (8 - lc));
499
500 *pcode = p;
501 }
502
503
504 //
505 // Unpack an encoding table packed by hufPackEncTable():
506 //
507
508 void
hufUnpackEncTable(const char ** pcode,int ni,int im,int iM,uint64_t * hcode)509 hufUnpackEncTable
510 (const char** pcode, // io: ptr to packed table (updated)
511 int ni, // i : input size (in bytes)
512 int im, // i : min hcode index
513 int iM, // i : max hcode index
514 uint64_t* hcode) // o: encoding table [HUF_ENCSIZE]
515 {
516 memset (hcode, 0, sizeof (uint64_t) * HUF_ENCSIZE);
517
518 const char *p = *pcode;
519 uint64_t c = 0;
520 int lc = 0;
521
522 for (; im <= iM; im++)
523 {
524 if (p - *pcode > ni)
525 unexpectedEndOfTable();
526
527 uint64_t l = hcode[im] = getBits (6, c, lc, p); // code length
528
529 if (l == (uint64_t) LONG_ZEROCODE_RUN)
530 {
531 if (p - *pcode > ni)
532 unexpectedEndOfTable();
533
534 int zerun = getBits (8, c, lc, p) + SHORTEST_LONG_RUN;
535
536 if (im + zerun > iM + 1)
537 tableTooLong();
538
539 while (zerun--)
540 hcode[im++] = 0;
541
542 im--;
543 }
544 else if (l >= (uint64_t) SHORT_ZEROCODE_RUN)
545 {
546 int zerun = l - SHORT_ZEROCODE_RUN + 2;
547
548 if (im + zerun > iM + 1)
549 tableTooLong();
550
551 while (zerun--)
552 hcode[im++] = 0;
553
554 im--;
555 }
556 }
557
558 *pcode = const_cast<char *>(p);
559
560 hufCanonicalCodeTable (hcode);
561 }
562
563
564 //
565 // DECODING TABLE BUILDING
566 //
567
568 //
569 // Clear a newly allocated decoding table so that it contains only zeroes.
570 //
571
572 void
hufClearDecTable(HufDec * hdecod)573 hufClearDecTable
574 (HufDec * hdecod) // io: (allocated by caller)
575 // decoding table [HUF_DECSIZE]
576 {
577 memset (hdecod, 0, sizeof (HufDec) * HUF_DECSIZE);
578 }
579
580
581 //
582 // Build a decoding hash table based on the encoding table hcode:
583 // - short codes (<= HUF_DECBITS) are resolved with a single table access;
584 // - long code entry allocations are not optimized, because long codes are
585 // unfrequent;
586 // - decoding tables are used by hufDecode();
587 //
588
589 void
hufBuildDecTable(const uint64_t * hcode,int im,int iM,HufDec * hdecod)590 hufBuildDecTable
591 (const uint64_t* hcode, // i : encoding table
592 int im, // i : min index in hcode
593 int iM, // i : max index in hcode
594 HufDec * hdecod) // o: (allocated by caller)
595 // decoding table [HUF_DECSIZE]
596 {
597 //
598 // Init hashtable & loop on all codes.
599 // Assumes that hufClearDecTable(hdecod) has already been called.
600 //
601
602 for (; im <= iM; im++)
603 {
604 uint64_t c = hufCode (hcode[im]);
605 int l = hufLength (hcode[im]);
606
607 if (c >> l)
608 {
609 //
610 // Error: c is supposed to be an l-bit code,
611 // but c contains a value that is greater
612 // than the largest l-bit number.
613 //
614
615 invalidTableEntry();
616 }
617
618 if (l > HUF_DECBITS)
619 {
620 //
621 // Long code: add a secondary entry
622 //
623
624 HufDec *pl = hdecod + (c >> (l - HUF_DECBITS));
625
626 if (pl->len)
627 {
628 //
629 // Error: a short code has already
630 // been stored in table entry *pl.
631 //
632
633 invalidTableEntry();
634 }
635
636 pl->lit++;
637
638 if (pl->p)
639 {
640 int *p = pl->p;
641 pl->p = new int [pl->lit];
642
643 for (int i = 0; i < pl->lit - 1; ++i)
644 pl->p[i] = p[i];
645
646 delete [] p;
647 }
648 else
649 {
650 pl->p = new int [1];
651 }
652
653 pl->p[pl->lit - 1]= im;
654 }
655 else if (l)
656 {
657 //
658 // Short code: init all primary entries
659 //
660
661 HufDec *pl = hdecod + (c << (HUF_DECBITS - l));
662
663 for (uint64_t i = 1 << (HUF_DECBITS - l); i > 0; i--, pl++)
664 {
665 if (pl->len || pl->p)
666 {
667 //
668 // Error: a short code or a long code has
669 // already been stored in table entry *pl.
670 //
671
672 invalidTableEntry();
673 }
674
675 pl->len = l;
676 pl->lit = im;
677 }
678 }
679 }
680 }
681
682
683 //
684 // Free the long code entries of a decoding table built by hufBuildDecTable()
685 //
686
687 void
hufFreeDecTable(HufDec * hdecod)688 hufFreeDecTable (HufDec *hdecod) // io: Decoding table
689 {
690 for (int i = 0; i < HUF_DECSIZE; i++)
691 {
692 if (hdecod[i].p)
693 {
694 delete [] hdecod[i].p;
695 hdecod[i].p = 0;
696 }
697 }
698 }
699
700
701 //
702 // ENCODING
703 //
704
705 inline void
outputCode(uint64_t code,uint64_t & c,int & lc,char * & out)706 outputCode (uint64_t code, uint64_t &c, int &lc, char *&out)
707 {
708 outputBits (hufLength (code), hufCode (code), c, lc, out);
709 }
710
711
712 inline void
sendCode(uint64_t sCode,int runCount,uint64_t runCode,uint64_t & c,int & lc,char * & out)713 sendCode (uint64_t sCode, int runCount, uint64_t runCode,
714 uint64_t &c, int &lc, char *&out)
715 {
716 //
717 // Output a run of runCount instances of the symbol sCount.
718 // Output the symbols explicitly, or if that is shorter, output
719 // the sCode symbol once followed by a runCode symbol and runCount
720 // expressed as an 8-bit number.
721 //
722
723 if (hufLength (sCode) + hufLength (runCode) + 8 <
724 hufLength (sCode) * runCount)
725 {
726 outputCode (sCode, c, lc, out);
727 outputCode (runCode, c, lc, out);
728 outputBits (8, runCount, c, lc, out);
729 }
730 else
731 {
732 while (runCount-- >= 0)
733 outputCode (sCode, c, lc, out);
734 }
735 }
736
737
738 //
739 // Encode (compress) ni values based on the Huffman encoding table hcode:
740 //
741
742 int
hufEncode(const uint64_t * hcode,const unsigned short * in,const int ni,int rlc,char * out)743 hufEncode // return: output size (in bits)
744 (const uint64_t* hcode, // i : encoding table
745 const unsigned short* in, // i : uncompressed input buffer
746 const int ni, // i : input buffer size (in bytes)
747 int rlc, // i : rl code
748 char* out) // o: compressed output buffer
749 {
750 char *outStart = out;
751 uint64_t c = 0; // bits not yet written to out
752 int lc = 0; // number of valid bits in c (LSB)
753 int s = in[0];
754 int cs = 0;
755
756 //
757 // Loop on input values
758 //
759
760 for (int i = 1; i < ni; i++)
761 {
762 //
763 // Count same values or send code
764 //
765
766 if (s == in[i] && cs < 255)
767 {
768 cs++;
769 }
770 else
771 {
772 sendCode (hcode[s], cs, hcode[rlc], c, lc, out);
773 cs=0;
774 }
775
776 s = in[i];
777 }
778
779 //
780 // Send remaining code
781 //
782
783 sendCode (hcode[s], cs, hcode[rlc], c, lc, out);
784
785 if (lc)
786 *out = (c << (8 - lc)) & 0xff;
787
788 return (out - outStart) * 8 + lc;
789 }
790
791
792 //
793 // DECODING
794 //
795
796 //
797 // In order to force the compiler to inline them,
798 // getChar() and getCode() are implemented as macros
799 // instead of "inline" functions.
800 //
801
802 #define getChar(c, lc, in) \
803 { \
804 c = (c << 8) | *(unsigned char *)(in++); \
805 lc += 8; \
806 }
807
808
809 #define getCode(po, rlc, c, lc, in, out, ob, oe)\
810 { \
811 if (po == rlc) \
812 { \
813 if (lc < 8) \
814 getChar(c, lc, in); \
815 \
816 lc -= 8; \
817 \
818 unsigned char cs = (c >> lc); \
819 \
820 if (out + cs > oe) \
821 tooMuchData(); \
822 else if (out - 1 < ob) \
823 notEnoughData(); \
824 \
825 unsigned short s = out[-1]; \
826 \
827 while (cs-- > 0) \
828 *out++ = s; \
829 } \
830 else if (out < oe) \
831 { \
832 *out++ = po; \
833 } \
834 else \
835 { \
836 tooMuchData(); \
837 } \
838 }
839
840
841 //
842 // Decode (uncompress) ni bits based on encoding & decoding tables:
843 //
844
845 void
hufDecode(const uint64_t * hcode,const HufDec * hdecod,const char * in,int ni,int rlc,int no,unsigned short * out)846 hufDecode
847 (const uint64_t * hcode, // i : encoding table
848 const HufDec * hdecod, // i : decoding table
849 const char* in, // i : compressed input buffer
850 int ni, // i : input size (in bits)
851 int rlc, // i : run-length code
852 int no, // i : expected output size (in bytes)
853 unsigned short* out) // o: uncompressed output buffer
854 {
855 uint64_t c = 0;
856 int lc = 0;
857 unsigned short * outb = out;
858 unsigned short * oe = out + no;
859 const char * ie = in + (ni + 7) / 8; // input byte size
860
861 //
862 // Loop on input bytes
863 //
864
865 while (in < ie)
866 {
867 getChar (c, lc, in);
868
869 //
870 // Access decoding table
871 //
872
873 while (lc >= HUF_DECBITS)
874 {
875 const HufDec pl = hdecod[(c >> (lc-HUF_DECBITS)) & HUF_DECMASK];
876
877 if (pl.len)
878 {
879 //
880 // Get short code
881 //
882
883 lc -= pl.len;
884
885 if ( lc < 0 )
886 {
887 invalidCode(); // code length too long
888 }
889 getCode (pl.lit, rlc, c, lc, in, out, outb, oe);
890 }
891 else
892 {
893 if (!pl.p)
894 invalidCode(); // wrong code
895
896 //
897 // Search long code
898 //
899
900 int j;
901
902 for (j = 0; j < pl.lit; j++)
903 {
904 int l = hufLength (hcode[pl.p[j]]);
905
906 while (lc < l && in < ie) // get more bits
907 getChar (c, lc, in);
908
909 if (lc >= l)
910 {
911 if (hufCode (hcode[pl.p[j]]) ==
912 ((c >> (lc - l)) & ((uint64_t(1) << l) - 1)))
913 {
914 //
915 // Found : get long code
916 //
917
918 lc -= l;
919 getCode (pl.p[j], rlc, c, lc, in, out, outb, oe);
920 break;
921 }
922 }
923 }
924
925 if (j == pl.lit)
926 invalidCode(); // Not found
927 }
928 }
929 }
930
931 //
932 // Get remaining (short) codes
933 //
934
935 int i = (8 - ni) & 7;
936 c >>= i;
937 lc -= i;
938
939 while (lc > 0)
940 {
941 const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
942
943 if (pl.len)
944 {
945 lc -= pl.len;
946 if ( lc < 0 )
947 {
948 invalidCode(); // code length too long
949 }
950 getCode (pl.lit, rlc, c, lc, in, out, outb, oe);
951 }
952 else
953 {
954 invalidCode(); // wrong (long) code
955 }
956 }
957
958 if (out - outb != no)
959 notEnoughData ();
960 }
961
962
963 #if !defined (OPENEXR_IMF_HAVE_LARGE_STACK)
964 void
countFrequencies(uint64_t * freq,const unsigned short data[],int n)965 countFrequencies (uint64_t *freq,
966 const unsigned short data[/*n*/],
967 int n)
968 #else
969 void
970 countFrequencies (uint64_t freq[HUF_ENCSIZE],
971 const unsigned short data[/*n*/],
972 int n)
973 #endif
974 {
975 for (int i = 0; i < HUF_ENCSIZE; ++i)
976 freq[i] = 0;
977
978 for (int i = 0; i < n; ++i)
979 ++freq[data[i]];
980 }
981
982
983 void
writeUInt(char buf[4],unsigned int i)984 writeUInt (char buf[4], unsigned int i)
985 {
986 unsigned char *b = (unsigned char *) buf;
987
988 b[0] = i;
989 b[1] = i >> 8;
990 b[2] = i >> 16;
991 b[3] = i >> 24;
992 }
993
994
995 unsigned int
readUInt(const char buf[4])996 readUInt (const char buf[4])
997 {
998 const unsigned char *b = (const unsigned char *) buf;
999
1000 return ( b[0] & 0x000000ff) |
1001 ((b[1] << 8) & 0x0000ff00) |
1002 ((b[2] << 16) & 0x00ff0000) |
1003 ((b[3] << 24) & 0xff000000);
1004 }
1005
1006 } // namespace
1007
1008
1009 //
1010 // EXTERNAL INTERFACE
1011 //
1012
1013
1014 int
hufCompress(const unsigned short raw[],int nRaw,char compressed[])1015 hufCompress (const unsigned short raw[],
1016 int nRaw,
1017 char compressed[])
1018 {
1019 if (nRaw == 0)
1020 return 0;
1021
1022 AutoArray <uint64_t, HUF_ENCSIZE> freq;
1023
1024 countFrequencies (freq, raw, nRaw);
1025
1026 int im = 0;
1027 int iM = 0;
1028 hufBuildEncTable (freq, &im, &iM);
1029
1030 char *tableStart = compressed + 20;
1031 char *tableEnd = tableStart;
1032 hufPackEncTable (freq, im, iM, &tableEnd);
1033 int tableLength = tableEnd - tableStart;
1034
1035 char *dataStart = tableEnd;
1036 int nBits = hufEncode (freq, raw, nRaw, iM, dataStart);
1037 int dataLength = (nBits + 7) / 8;
1038
1039 writeUInt (compressed, im);
1040 writeUInt (compressed + 4, iM);
1041 writeUInt (compressed + 8, tableLength);
1042 writeUInt (compressed + 12, nBits);
1043 writeUInt (compressed + 16, 0); // room for future extensions
1044
1045 return dataStart + dataLength - compressed;
1046 }
1047
1048
1049 void
hufUncompress(const char compressed[],int nCompressed,unsigned short raw[],int nRaw)1050 hufUncompress (const char compressed[],
1051 int nCompressed,
1052 unsigned short raw[],
1053 int nRaw)
1054 {
1055 //
1056 // need at least 20 bytes for header
1057 //
1058 if (nCompressed < 20 )
1059 {
1060 if (nRaw != 0)
1061 notEnoughData();
1062
1063 return;
1064 }
1065
1066 int im = readUInt (compressed);
1067 int iM = readUInt (compressed + 4);
1068 // int tableLength = readUInt (compressed + 8);
1069 int nBits = readUInt (compressed + 12);
1070
1071 if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE)
1072 invalidTableSize();
1073
1074 const char *ptr = compressed + 20;
1075
1076 uint64_t nBytes = (static_cast<uint64_t>(nBits)+7) / 8 ;
1077
1078 if ( ptr + nBytes > compressed+nCompressed)
1079 {
1080 notEnoughData();
1081 return;
1082 }
1083
1084 //
1085 // Fast decoder needs at least 2x64-bits of compressed data, and
1086 // needs to be run-able on this platform. Otherwise, fall back
1087 // to the original decoder
1088 //
1089
1090 if (FastHufDecoder::enabled() && nBits > 128)
1091 {
1092 FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM);
1093
1094 // must be nBytes remaining in buffer
1095 if( ptr-compressed + nBytes > static_cast<uint64_t>(nCompressed))
1096 {
1097 notEnoughData();
1098 return;
1099 }
1100
1101 fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw);
1102 }
1103 else
1104 {
1105 AutoArray <uint64_t, HUF_ENCSIZE> freq;
1106 AutoArray <HufDec, HUF_DECSIZE> hdec;
1107
1108 hufClearDecTable (hdec);
1109
1110 hufUnpackEncTable (&ptr,
1111 nCompressed - (ptr - compressed),
1112 im,
1113 iM,
1114 freq);
1115
1116 try
1117 {
1118 if (nBits > 8 * (nCompressed - (ptr - compressed)))
1119 invalidNBits();
1120
1121 hufBuildDecTable (freq, im, iM, hdec);
1122 hufDecode (freq, hdec, ptr, nBits, iM, nRaw, raw);
1123 }
1124 catch (...)
1125 {
1126 hufFreeDecTable (hdec);
1127 throw;
1128 }
1129
1130 hufFreeDecTable (hdec);
1131 }
1132 }
1133
1134
1135 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
1136