1 /* DeflaterHuffman.java --
2    Copyright (C) 2001, 2004, 2005  Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 package java.util.zip;
39 
40 /**
41  * This is the DeflaterHuffman class.
42  *
43  * This class is <i>not</i> thread safe.  This is inherent in the API, due
44  * to the split of deflate and setInput.
45  *
46  * @author Jochen Hoenicke
47  * @date Jan 6, 2000
48  */
49 class DeflaterHuffman
50 {
51   private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6);
52   private static final int LITERAL_NUM = 286;
53   private static final int DIST_NUM = 30;
54   private static final int BITLEN_NUM = 19;
55   private static final int REP_3_6    = 16;
56   private static final int REP_3_10   = 17;
57   private static final int REP_11_138 = 18;
58   private static final int EOF_SYMBOL = 256;
59   private static final int[] BL_ORDER =
60   { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
61 
62   private static final String bit4Reverse =
63     "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017";
64 
65   class Tree {
66     short[] freqs;
67     short[] codes;
68     byte[]  length;
69     int[]   bl_counts;
70     int     minNumCodes, numCodes;
71     int     maxLength;
72 
Tree(int elems, int minCodes, int maxLength)73     Tree(int elems, int minCodes, int maxLength) {
74       this.minNumCodes = minCodes;
75       this.maxLength  = maxLength;
76       freqs  = new short[elems];
77       bl_counts = new int[maxLength];
78     }
79 
reset()80     void reset() {
81       for (int i = 0; i < freqs.length; i++)
82         freqs[i] = 0;
83       codes = null;
84       length = null;
85     }
86 
writeSymbol(int code)87     final void writeSymbol(int code)
88     {
89       if (DeflaterConstants.DEBUGGING)
90         {
91           freqs[code]--;
92 //        System.err.print("writeSymbol("+freqs.length+","+code+"): ");
93         }
94       pending.writeBits(codes[code] & 0xffff, length[code]);
95     }
96 
checkEmpty()97     final void checkEmpty()
98     {
99       boolean empty = true;
100       for (int i = 0; i < freqs.length; i++)
101         if (freqs[i] != 0)
102           {
103             System.err.println("freqs["+i+"] == "+freqs[i]);
104             empty = false;
105           }
106       if (!empty)
107         throw new InternalError();
108       System.err.println("checkEmpty suceeded!");
109     }
110 
setStaticCodes(short[] stCodes, byte[] stLength)111     void setStaticCodes(short[] stCodes, byte[] stLength)
112     {
113       codes = stCodes;
114       length = stLength;
115     }
116 
buildCodes()117     public void buildCodes() {
118       int[] nextCode = new int[maxLength];
119       int code = 0;
120       codes = new short[freqs.length];
121 
122       if (DeflaterConstants.DEBUGGING)
123         System.err.println("buildCodes: "+freqs.length);
124       for (int bits = 0; bits < maxLength; bits++)
125         {
126           nextCode[bits] = code;
127           code += bl_counts[bits] << (15 - bits);
128           if (DeflaterConstants.DEBUGGING)
129             System.err.println("bits: "+(bits+1)+" count: "+bl_counts[bits]
130                                +" nextCode: "+Integer.toHexString(code));
131         }
132       if (DeflaterConstants.DEBUGGING && code != 65536)
133         throw new RuntimeException("Inconsistent bl_counts!");
134 
135       for (int i=0; i < numCodes; i++)
136         {
137           int bits = length[i];
138           if (bits > 0)
139             {
140               if (DeflaterConstants.DEBUGGING)
141                 System.err.println("codes["+i+"] = rev("
142                                    +Integer.toHexString(nextCode[bits-1])+"),"
143                                    +bits);
144               codes[i] = bitReverse(nextCode[bits-1]);
145               nextCode[bits-1] += 1 << (16 - bits);
146             }
147         }
148     }
149 
buildLength(int childs[])150     private void buildLength(int childs[])
151     {
152       this.length = new byte [freqs.length];
153       int numNodes = childs.length / 2;
154       int numLeafs = (numNodes + 1) / 2;
155       int overflow = 0;
156 
157       for (int i = 0; i < maxLength; i++)
158         bl_counts[i] = 0;
159 
160       /* First calculate optimal bit lengths */
161       int lengths[] = new int[numNodes];
162       lengths[numNodes-1] = 0;
163       for (int i = numNodes - 1; i >= 0; i--)
164         {
165           if (childs[2*i+1] != -1)
166             {
167               int bitLength = lengths[i] + 1;
168               if (bitLength > maxLength)
169                 {
170                   bitLength = maxLength;
171                   overflow++;
172                 }
173               lengths[childs[2*i]] = lengths[childs[2*i+1]] = bitLength;
174             }
175           else
176             {
177               /* A leaf node */
178               int bitLength = lengths[i];
179               bl_counts[bitLength - 1]++;
180               this.length[childs[2*i]] = (byte) lengths[i];
181             }
182         }
183 
184       if (DeflaterConstants.DEBUGGING)
185         {
186           System.err.println("Tree "+freqs.length+" lengths:");
187           for (int i=0; i < numLeafs; i++)
188             System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
189                                + " len: "+length[childs[2*i]]);
190         }
191 
192       if (overflow == 0)
193         return;
194 
195       int incrBitLen = maxLength - 1;
196       do
197         {
198           /* Find the first bit length which could increase: */
199           while (bl_counts[--incrBitLen] == 0)
200             ;
201 
202           /* Move this node one down and remove a corresponding
203            * amount of overflow nodes.
204            */
205           do
206             {
207               bl_counts[incrBitLen]--;
208               bl_counts[++incrBitLen]++;
209               overflow -= 1 << (maxLength - 1 - incrBitLen);
210             }
211           while (overflow > 0 && incrBitLen < maxLength - 1);
212         }
213       while (overflow > 0);
214 
215       /* We may have overshot above.  Move some nodes from maxLength to
216        * maxLength-1 in that case.
217        */
218       bl_counts[maxLength-1] += overflow;
219       bl_counts[maxLength-2] -= overflow;
220 
221       /* Now recompute all bit lengths, scanning in increasing
222        * frequency.  It is simpler to reconstruct all lengths instead of
223        * fixing only the wrong ones. This idea is taken from 'ar'
224        * written by Haruhiko Okumura.
225        *
226        * The nodes were inserted with decreasing frequency into the childs
227        * array.
228        */
229       int nodePtr = 2 * numLeafs;
230       for (int bits = maxLength; bits != 0; bits--)
231         {
232           int n = bl_counts[bits-1];
233           while (n > 0)
234             {
235               int childPtr = 2*childs[nodePtr++];
236               if (childs[childPtr + 1] == -1)
237                 {
238                   /* We found another leaf */
239                   length[childs[childPtr]] = (byte) bits;
240                   n--;
241                 }
242             }
243         }
244       if (DeflaterConstants.DEBUGGING)
245         {
246           System.err.println("*** After overflow elimination. ***");
247           for (int i=0; i < numLeafs; i++)
248             System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
249                                + " len: "+length[childs[2*i]]);
250         }
251     }
252 
buildTree()253     void buildTree()
254     {
255       int numSymbols = freqs.length;
256 
257       /* heap is a priority queue, sorted by frequency, least frequent
258        * nodes first.  The heap is a binary tree, with the property, that
259        * the parent node is smaller than both child nodes.  This assures
260        * that the smallest node is the first parent.
261        *
262        * The binary tree is encoded in an array:  0 is root node and
263        * the nodes 2*n+1, 2*n+2 are the child nodes of node n.
264        */
265       int[] heap = new int[numSymbols];
266       int heapLen = 0;
267       int maxCode = 0;
268       for (int n = 0; n < numSymbols; n++)
269         {
270           int freq = freqs[n];
271           if (freq != 0)
272             {
273               /* Insert n into heap */
274               int pos = heapLen++;
275               int ppos;
276               while (pos > 0 &&
277                      freqs[heap[ppos = (pos - 1) / 2]] > freq) {
278                 heap[pos] = heap[ppos];
279                 pos = ppos;
280               }
281               heap[pos] = n;
282               maxCode = n;
283             }
284         }
285 
286       /* We could encode a single literal with 0 bits but then we
287        * don't see the literals.  Therefore we force at least two
288        * literals to avoid this case.  We don't care about order in
289        * this case, both literals get a 1 bit code.
290        */
291       while (heapLen < 2)
292         {
293           int node = maxCode < 2 ? ++maxCode : 0;
294           heap[heapLen++] = node;
295         }
296 
297       numCodes = Math.max(maxCode + 1, minNumCodes);
298 
299       int numLeafs = heapLen;
300       int[] childs = new int[4*heapLen - 2];
301       int[] values = new int[2*heapLen - 1];
302       int numNodes = numLeafs;
303       for (int i = 0; i < heapLen; i++)
304         {
305           int node = heap[i];
306           childs[2*i]   = node;
307           childs[2*i+1] = -1;
308           values[i] = freqs[node] << 8;
309           heap[i] = i;
310         }
311 
312       /* Construct the Huffman tree by repeatedly combining the least two
313        * frequent nodes.
314        */
315       do
316         {
317           int first = heap[0];
318           int last  = heap[--heapLen];
319 
320           /* Propagate the hole to the leafs of the heap */
321           int ppos = 0;
322           int path = 1;
323           while (path < heapLen)
324             {
325               if (path + 1 < heapLen
326                   && values[heap[path]] > values[heap[path+1]])
327                 path++;
328 
329               heap[ppos] = heap[path];
330               ppos = path;
331               path = path * 2 + 1;
332             }
333 
334           /* Now propagate the last element down along path.  Normally
335            * it shouldn't go too deep.
336            */
337           int lastVal = values[last];
338           while ((path = ppos) > 0
339                  && values[heap[ppos = (path - 1)/2]] > lastVal)
340             heap[path] = heap[ppos];
341           heap[path] = last;
342 
343 
344           int second = heap[0];
345 
346           /* Create a new node father of first and second */
347           last = numNodes++;
348           childs[2*last] = first;
349           childs[2*last+1] = second;
350           int mindepth = Math.min(values[first] & 0xff, values[second] & 0xff);
351           values[last] = lastVal = values[first] + values[second] - mindepth + 1;
352 
353           /* Again, propagate the hole to the leafs */
354           ppos = 0;
355           path = 1;
356           while (path < heapLen)
357             {
358               if (path + 1 < heapLen
359                   && values[heap[path]] > values[heap[path+1]])
360                 path++;
361 
362               heap[ppos] = heap[path];
363               ppos = path;
364               path = ppos * 2 + 1;
365             }
366 
367           /* Now propagate the new element down along path */
368           while ((path = ppos) > 0
369                  && values[heap[ppos = (path - 1)/2]] > lastVal)
370             heap[path] = heap[ppos];
371           heap[path] = last;
372         }
373       while (heapLen > 1);
374 
375       if (heap[0] != childs.length / 2 - 1)
376         throw new RuntimeException("Weird!");
377 
378       buildLength(childs);
379     }
380 
381     int getEncodedLength()
382     {
383       int len = 0;
384       for (int i = 0; i < freqs.length; i++)
385         len += freqs[i] * length[i];
386       return len;
387     }
388 
389     void calcBLFreq(Tree blTree) {
390       int max_count;               /* max repeat count */
391       int min_count;               /* min repeat count */
392       int count;                   /* repeat count of the current code */
393       int curlen = -1;             /* length of current code */
394 
395       int i = 0;
396       while (i < numCodes)
397         {
398           count = 1;
399           int nextlen = length[i];
400           if (nextlen == 0)
401             {
402               max_count = 138;
403               min_count = 3;
404             }
405           else
406             {
407               max_count = 6;
408               min_count = 3;
409               if (curlen != nextlen)
410                 {
411                   blTree.freqs[nextlen]++;
412                   count = 0;
413                 }
414             }
415           curlen = nextlen;
416           i++;
417 
418           while (i < numCodes && curlen == length[i])
419             {
420               i++;
421               if (++count >= max_count)
422                 break;
423             }
424 
425           if (count < min_count)
426             blTree.freqs[curlen] += count;
427           else if (curlen != 0)
428             blTree.freqs[REP_3_6]++;
429           else if (count <= 10)
430             blTree.freqs[REP_3_10]++;
431           else
432             blTree.freqs[REP_11_138]++;
433         }
434     }
435 
436     void writeTree(Tree blTree)
437     {
438       int max_count;               /* max repeat count */
439       int min_count;               /* min repeat count */
440       int count;                   /* repeat count of the current code */
441       int curlen = -1;             /* length of current code */
442 
443       int i = 0;
444       while (i < numCodes)
445         {
446           count = 1;
447           int nextlen = length[i];
448           if (nextlen == 0)
449             {
450               max_count = 138;
451               min_count = 3;
452             }
453           else
454             {
455               max_count = 6;
456               min_count = 3;
457               if (curlen != nextlen)
458                 {
459                   blTree.writeSymbol(nextlen);
460                   count = 0;
461                 }
462             }
463           curlen = nextlen;
464           i++;
465 
466           while (i < numCodes && curlen == length[i])
467             {
468               i++;
469               if (++count >= max_count)
470                 break;
471             }
472 
473           if (count < min_count)
474             {
475               while (count-- > 0)
476                 blTree.writeSymbol(curlen);
477             }
478           else if (curlen != 0)
479             {
480               blTree.writeSymbol(REP_3_6);
481               pending.writeBits(count - 3, 2);
482             }
483           else if (count <= 10)
484             {
485               blTree.writeSymbol(REP_3_10);
486               pending.writeBits(count - 3, 3);
487             }
488           else
489             {
490               blTree.writeSymbol(REP_11_138);
491               pending.writeBits(count - 11, 7);
492             }
493         }
494     }
495   }
496 
497 
498 
499   DeflaterPending pending;
500   private Tree literalTree, distTree, blTree;
501 
502   private short d_buf[];
503   private byte l_buf[];
504   private int last_lit;
505   private int extra_bits;
506 
507   private static short staticLCodes[];
508   private static byte  staticLLength[];
509   private static short staticDCodes[];
510   private static byte  staticDLength[];
511 
512   /**
513    * Reverse the bits of a 16 bit value.
514    */
515   static short bitReverse(int value) {
516     return (short) (bit4Reverse.charAt(value & 0xf) << 12
517                     | bit4Reverse.charAt((value >> 4) & 0xf) << 8
518                     | bit4Reverse.charAt((value >> 8) & 0xf) << 4
519                     | bit4Reverse.charAt(value >> 12));
520   }
521 
522   static {
523     /* See RFC 1951 3.2.6 */
524     /* Literal codes */
525     staticLCodes = new short[LITERAL_NUM];
526     staticLLength = new byte[LITERAL_NUM];
527     int i = 0;
528     while (i < 144) {
529       staticLCodes[i] = bitReverse((0x030 + i) << 8);
530       staticLLength[i++] = 8;
531     }
532     while (i < 256) {
533       staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7);
534       staticLLength[i++] = 9;
535     }
536     while (i < 280) {
537       staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9);
538       staticLLength[i++] = 7;
539     }
540     while (i < LITERAL_NUM) {
541       staticLCodes[i] = bitReverse((0x0c0 - 280 + i)  << 8);
542       staticLLength[i++] = 8;
543     }
544 
545     /* Distant codes */
546     staticDCodes = new short[DIST_NUM];
547     staticDLength = new byte[DIST_NUM];
548     for (i = 0; i < DIST_NUM; i++) {
549       staticDCodes[i] = bitReverse(i << 11);
550       staticDLength[i] = 5;
551     }
552   }
553 
554   public DeflaterHuffman(DeflaterPending pending)
555   {
556     this.pending = pending;
557 
558     literalTree = new Tree(LITERAL_NUM, 257, 15);
559     distTree    = new Tree(DIST_NUM, 1, 15);
560     blTree      = new Tree(BITLEN_NUM, 4, 7);
561 
562     d_buf = new short[BUFSIZE];
563     l_buf = new byte [BUFSIZE];
564   }
565 
566   public final void reset() {
567     last_lit = 0;
568     extra_bits = 0;
569     literalTree.reset();
570     distTree.reset();
571     blTree.reset();
572   }
573 
574   private int l_code(int len) {
575     if (len == 255)
576       return 285;
577 
578     int code = 257;
579     while (len >= 8)
580       {
581         code += 4;
582         len >>= 1;
583       }
584     return code + len;
585   }
586 
587   private int d_code(int distance) {
588     int code = 0;
589     while (distance >= 4)
590       {
591         code += 2;
592         distance >>= 1;
593       }
594     return code + distance;
595   }
596 
597   public void sendAllTrees(int blTreeCodes) {
598     blTree.buildCodes();
599     literalTree.buildCodes();
600     distTree.buildCodes();
601     pending.writeBits(literalTree.numCodes - 257, 5);
602     pending.writeBits(distTree.numCodes - 1, 5);
603     pending.writeBits(blTreeCodes - 4, 4);
604     for (int rank = 0; rank < blTreeCodes; rank++)
605       pending.writeBits(blTree.length[BL_ORDER[rank]], 3);
606     literalTree.writeTree(blTree);
607     distTree.writeTree(blTree);
608     if (DeflaterConstants.DEBUGGING)
609       blTree.checkEmpty();
610   }
611 
612   public void compressBlock() {
613     for (int i = 0; i < last_lit; i++)
614       {
615         int litlen = l_buf[i] & 0xff;
616         int dist = d_buf[i];
617         if (dist-- != 0)
618           {
619             if (DeflaterConstants.DEBUGGING)
620               System.err.print("["+(dist+1)+","+(litlen+3)+"]: ");
621 
622             int lc = l_code(litlen);
623             literalTree.writeSymbol(lc);
624 
625             int bits = (lc - 261) / 4;
626             if (bits > 0 && bits <= 5)
627               pending.writeBits(litlen & ((1 << bits) - 1), bits);
628 
629             int dc = d_code(dist);
630             distTree.writeSymbol(dc);
631 
632             bits = dc / 2 - 1;
633             if (bits > 0)
634               pending.writeBits(dist & ((1 << bits) - 1), bits);
635           }
636         else
637           {
638             if (DeflaterConstants.DEBUGGING)
639               {
640                 if (litlen > 32 && litlen < 127)
641                   System.err.print("("+(char)litlen+"): ");
642                 else
643                   System.err.print("{"+litlen+"}: ");
644               }
645             literalTree.writeSymbol(litlen);
646           }
647       }
648     if (DeflaterConstants.DEBUGGING)
649       System.err.print("EOF: ");
650     literalTree.writeSymbol(EOF_SYMBOL);
651     if (DeflaterConstants.DEBUGGING)
652       {
653         literalTree.checkEmpty();
654         distTree.checkEmpty();
655       }
656   }
657 
658   public void flushStoredBlock(byte[] stored,
659                                int stored_offset, int stored_len,
660                                boolean lastBlock) {
661     if (DeflaterConstants.DEBUGGING)
662       System.err.println("Flushing stored block "+ stored_len);
663     pending.writeBits((DeflaterConstants.STORED_BLOCK << 1)
664                       + (lastBlock ? 1 : 0), 3);
665     pending.alignToByte();
666     pending.writeShort(stored_len);
667     pending.writeShort(~stored_len);
668     pending.writeBlock(stored, stored_offset, stored_len);
669     reset();
670   }
671 
672   public void flushBlock(byte[] stored, int stored_offset, int stored_len,
673                          boolean lastBlock) {
674     literalTree.freqs[EOF_SYMBOL]++;
675 
676     /* Build trees */
677     literalTree.buildTree();
678     distTree.buildTree();
679 
680     /* Calculate bitlen frequency */
681     literalTree.calcBLFreq(blTree);
682     distTree.calcBLFreq(blTree);
683 
684     /* Build bitlen tree */
685     blTree.buildTree();
686 
687     int blTreeCodes = 4;
688     for (int i = 18; i > blTreeCodes; i--)
689       {
690         if (blTree.length[BL_ORDER[i]] > 0)
691           blTreeCodes = i+1;
692       }
693     int opt_len = 14 + blTreeCodes * 3 + blTree.getEncodedLength()
694       + literalTree.getEncodedLength() + distTree.getEncodedLength()
695       + extra_bits;
696 
697     int static_len = extra_bits;
698     for (int i = 0; i < LITERAL_NUM; i++)
699       static_len += literalTree.freqs[i] * staticLLength[i];
700     for (int i = 0; i < DIST_NUM; i++)
701       static_len += distTree.freqs[i] * staticDLength[i];
702     if (opt_len >= static_len)
703       {
704         /* Force static trees */
705         opt_len = static_len;
706       }
707 
708     if (stored_offset >= 0 && stored_len+4 < opt_len >> 3)
709       {
710         /* Store Block */
711         if (DeflaterConstants.DEBUGGING)
712           System.err.println("Storing, since " + stored_len + " < " + opt_len
713                              + " <= " + static_len);
714         flushStoredBlock(stored, stored_offset, stored_len, lastBlock);
715       }
716     else if (opt_len == static_len)
717       {
718         /* Encode with static tree */
719         pending.writeBits((DeflaterConstants.STATIC_TREES << 1)
720                           + (lastBlock ? 1 : 0), 3);
721         literalTree.setStaticCodes(staticLCodes, staticLLength);
722         distTree.setStaticCodes(staticDCodes, staticDLength);
723         compressBlock();
724         reset();
725       }
726     else
727       {
728         /* Encode with dynamic tree */
729         pending.writeBits((DeflaterConstants.DYN_TREES << 1)
730                           + (lastBlock ? 1 : 0), 3);
731         sendAllTrees(blTreeCodes);
732         compressBlock();
733         reset();
734       }
735   }
736 
737   public final boolean isFull()
738   {
739     return last_lit == BUFSIZE;
740   }
741 
742   public final boolean tallyLit(int lit)
743   {
744     if (DeflaterConstants.DEBUGGING)
745       {
746         if (lit > 32 && lit < 127)
747           System.err.println("("+(char)lit+")");
748         else
749           System.err.println("{"+lit+"}");
750       }
751     d_buf[last_lit] = 0;
752     l_buf[last_lit++] = (byte) lit;
753     literalTree.freqs[lit]++;
754     return last_lit == BUFSIZE;
755   }
756 
757   public final boolean tallyDist(int dist, int len)
758   {
759     if (DeflaterConstants.DEBUGGING)
760       System.err.println("["+dist+","+len+"]");
761 
762     d_buf[last_lit] = (short) dist;
763     l_buf[last_lit++] = (byte) (len - 3);
764 
765     int lc = l_code(len-3);
766     literalTree.freqs[lc]++;
767     if (lc >= 265 && lc < 285)
768       extra_bits += (lc - 261) / 4;
769 
770     int dc = d_code(dist-1);
771     distTree.freqs[dc]++;
772     if (dc >= 4)
773       extra_bits += dc / 2 - 1;
774     return last_lit == BUFSIZE;
775   }
776 }
777