1 /* DeflaterHuffman.java -- 2 Copyright (C) 2001, 2004, 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 package java.util.zip; 39 40 /** 41 * This is the DeflaterHuffman class. 42 * 43 * This class is <i>not</i> thread safe. This is inherent in the API, due 44 * to the split of deflate and setInput. 45 * 46 * @author Jochen Hoenicke 47 * @date Jan 6, 2000 48 */ 49 class DeflaterHuffman 50 { 51 private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6); 52 private static final int LITERAL_NUM = 286; 53 private static final int DIST_NUM = 30; 54 private static final int BITLEN_NUM = 19; 55 private static final int REP_3_6 = 16; 56 private static final int REP_3_10 = 17; 57 private static final int REP_11_138 = 18; 58 private static final int EOF_SYMBOL = 256; 59 private static final int[] BL_ORDER = 60 { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; 61 62 private static final String bit4Reverse = 63 "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017"; 64 65 class Tree { 66 short[] freqs; 67 short[] codes; 68 byte[] length; 69 int[] bl_counts; 70 int minNumCodes, numCodes; 71 int maxLength; 72 Tree(int elems, int minCodes, int maxLength)73 Tree(int elems, int minCodes, int maxLength) { 74 this.minNumCodes = minCodes; 75 this.maxLength = maxLength; 76 freqs = new short[elems]; 77 bl_counts = new int[maxLength]; 78 } 79 reset()80 void reset() { 81 for (int i = 0; i < freqs.length; i++) 82 freqs[i] = 0; 83 codes = null; 84 length = null; 85 } 86 writeSymbol(int code)87 final void writeSymbol(int code) 88 { 89 if (DeflaterConstants.DEBUGGING) 90 { 91 freqs[code]--; 92 // System.err.print("writeSymbol("+freqs.length+","+code+"): "); 93 } 94 pending.writeBits(codes[code] & 0xffff, length[code]); 95 } 96 checkEmpty()97 final void checkEmpty() 98 { 99 boolean empty = true; 100 for (int i = 0; i < freqs.length; i++) 101 if (freqs[i] != 0) 102 { 103 System.err.println("freqs["+i+"] == "+freqs[i]); 104 empty = false; 105 } 106 if (!empty) 107 throw new InternalError(); 108 System.err.println("checkEmpty suceeded!"); 109 } 110 setStaticCodes(short[] stCodes, byte[] stLength)111 void setStaticCodes(short[] stCodes, byte[] stLength) 112 { 113 codes = stCodes; 114 length = stLength; 115 } 116 buildCodes()117 public void buildCodes() { 118 int[] nextCode = new int[maxLength]; 119 int code = 0; 120 codes = new short[freqs.length]; 121 122 if (DeflaterConstants.DEBUGGING) 123 System.err.println("buildCodes: "+freqs.length); 124 for (int bits = 0; bits < maxLength; bits++) 125 { 126 nextCode[bits] = code; 127 code += bl_counts[bits] << (15 - bits); 128 if (DeflaterConstants.DEBUGGING) 129 System.err.println("bits: "+(bits+1)+" count: "+bl_counts[bits] 130 +" nextCode: "+Integer.toHexString(code)); 131 } 132 if (DeflaterConstants.DEBUGGING && code != 65536) 133 throw new RuntimeException("Inconsistent bl_counts!"); 134 135 for (int i=0; i < numCodes; i++) 136 { 137 int bits = length[i]; 138 if (bits > 0) 139 { 140 if (DeflaterConstants.DEBUGGING) 141 System.err.println("codes["+i+"] = rev(" 142 +Integer.toHexString(nextCode[bits-1])+")," 143 +bits); 144 codes[i] = bitReverse(nextCode[bits-1]); 145 nextCode[bits-1] += 1 << (16 - bits); 146 } 147 } 148 } 149 buildLength(int childs[])150 private void buildLength(int childs[]) 151 { 152 this.length = new byte [freqs.length]; 153 int numNodes = childs.length / 2; 154 int numLeafs = (numNodes + 1) / 2; 155 int overflow = 0; 156 157 for (int i = 0; i < maxLength; i++) 158 bl_counts[i] = 0; 159 160 /* First calculate optimal bit lengths */ 161 int lengths[] = new int[numNodes]; 162 lengths[numNodes-1] = 0; 163 for (int i = numNodes - 1; i >= 0; i--) 164 { 165 if (childs[2*i+1] != -1) 166 { 167 int bitLength = lengths[i] + 1; 168 if (bitLength > maxLength) 169 { 170 bitLength = maxLength; 171 overflow++; 172 } 173 lengths[childs[2*i]] = lengths[childs[2*i+1]] = bitLength; 174 } 175 else 176 { 177 /* A leaf node */ 178 int bitLength = lengths[i]; 179 bl_counts[bitLength - 1]++; 180 this.length[childs[2*i]] = (byte) lengths[i]; 181 } 182 } 183 184 if (DeflaterConstants.DEBUGGING) 185 { 186 System.err.println("Tree "+freqs.length+" lengths:"); 187 for (int i=0; i < numLeafs; i++) 188 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]] 189 + " len: "+length[childs[2*i]]); 190 } 191 192 if (overflow == 0) 193 return; 194 195 int incrBitLen = maxLength - 1; 196 do 197 { 198 /* Find the first bit length which could increase: */ 199 while (bl_counts[--incrBitLen] == 0) 200 ; 201 202 /* Move this node one down and remove a corresponding 203 * amount of overflow nodes. 204 */ 205 do 206 { 207 bl_counts[incrBitLen]--; 208 bl_counts[++incrBitLen]++; 209 overflow -= 1 << (maxLength - 1 - incrBitLen); 210 } 211 while (overflow > 0 && incrBitLen < maxLength - 1); 212 } 213 while (overflow > 0); 214 215 /* We may have overshot above. Move some nodes from maxLength to 216 * maxLength-1 in that case. 217 */ 218 bl_counts[maxLength-1] += overflow; 219 bl_counts[maxLength-2] -= overflow; 220 221 /* Now recompute all bit lengths, scanning in increasing 222 * frequency. It is simpler to reconstruct all lengths instead of 223 * fixing only the wrong ones. This idea is taken from 'ar' 224 * written by Haruhiko Okumura. 225 * 226 * The nodes were inserted with decreasing frequency into the childs 227 * array. 228 */ 229 int nodePtr = 2 * numLeafs; 230 for (int bits = maxLength; bits != 0; bits--) 231 { 232 int n = bl_counts[bits-1]; 233 while (n > 0) 234 { 235 int childPtr = 2*childs[nodePtr++]; 236 if (childs[childPtr + 1] == -1) 237 { 238 /* We found another leaf */ 239 length[childs[childPtr]] = (byte) bits; 240 n--; 241 } 242 } 243 } 244 if (DeflaterConstants.DEBUGGING) 245 { 246 System.err.println("*** After overflow elimination. ***"); 247 for (int i=0; i < numLeafs; i++) 248 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]] 249 + " len: "+length[childs[2*i]]); 250 } 251 } 252 buildTree()253 void buildTree() 254 { 255 int numSymbols = freqs.length; 256 257 /* heap is a priority queue, sorted by frequency, least frequent 258 * nodes first. The heap is a binary tree, with the property, that 259 * the parent node is smaller than both child nodes. This assures 260 * that the smallest node is the first parent. 261 * 262 * The binary tree is encoded in an array: 0 is root node and 263 * the nodes 2*n+1, 2*n+2 are the child nodes of node n. 264 */ 265 int[] heap = new int[numSymbols]; 266 int heapLen = 0; 267 int maxCode = 0; 268 for (int n = 0; n < numSymbols; n++) 269 { 270 int freq = freqs[n]; 271 if (freq != 0) 272 { 273 /* Insert n into heap */ 274 int pos = heapLen++; 275 int ppos; 276 while (pos > 0 && 277 freqs[heap[ppos = (pos - 1) / 2]] > freq) { 278 heap[pos] = heap[ppos]; 279 pos = ppos; 280 } 281 heap[pos] = n; 282 maxCode = n; 283 } 284 } 285 286 /* We could encode a single literal with 0 bits but then we 287 * don't see the literals. Therefore we force at least two 288 * literals to avoid this case. We don't care about order in 289 * this case, both literals get a 1 bit code. 290 */ 291 while (heapLen < 2) 292 { 293 int node = maxCode < 2 ? ++maxCode : 0; 294 heap[heapLen++] = node; 295 } 296 297 numCodes = Math.max(maxCode + 1, minNumCodes); 298 299 int numLeafs = heapLen; 300 int[] childs = new int[4*heapLen - 2]; 301 int[] values = new int[2*heapLen - 1]; 302 int numNodes = numLeafs; 303 for (int i = 0; i < heapLen; i++) 304 { 305 int node = heap[i]; 306 childs[2*i] = node; 307 childs[2*i+1] = -1; 308 values[i] = freqs[node] << 8; 309 heap[i] = i; 310 } 311 312 /* Construct the Huffman tree by repeatedly combining the least two 313 * frequent nodes. 314 */ 315 do 316 { 317 int first = heap[0]; 318 int last = heap[--heapLen]; 319 320 /* Propagate the hole to the leafs of the heap */ 321 int ppos = 0; 322 int path = 1; 323 while (path < heapLen) 324 { 325 if (path + 1 < heapLen 326 && values[heap[path]] > values[heap[path+1]]) 327 path++; 328 329 heap[ppos] = heap[path]; 330 ppos = path; 331 path = path * 2 + 1; 332 } 333 334 /* Now propagate the last element down along path. Normally 335 * it shouldn't go too deep. 336 */ 337 int lastVal = values[last]; 338 while ((path = ppos) > 0 339 && values[heap[ppos = (path - 1)/2]] > lastVal) 340 heap[path] = heap[ppos]; 341 heap[path] = last; 342 343 344 int second = heap[0]; 345 346 /* Create a new node father of first and second */ 347 last = numNodes++; 348 childs[2*last] = first; 349 childs[2*last+1] = second; 350 int mindepth = Math.min(values[first] & 0xff, values[second] & 0xff); 351 values[last] = lastVal = values[first] + values[second] - mindepth + 1; 352 353 /* Again, propagate the hole to the leafs */ 354 ppos = 0; 355 path = 1; 356 while (path < heapLen) 357 { 358 if (path + 1 < heapLen 359 && values[heap[path]] > values[heap[path+1]]) 360 path++; 361 362 heap[ppos] = heap[path]; 363 ppos = path; 364 path = ppos * 2 + 1; 365 } 366 367 /* Now propagate the new element down along path */ 368 while ((path = ppos) > 0 369 && values[heap[ppos = (path - 1)/2]] > lastVal) 370 heap[path] = heap[ppos]; 371 heap[path] = last; 372 } 373 while (heapLen > 1); 374 375 if (heap[0] != childs.length / 2 - 1) 376 throw new RuntimeException("Weird!"); 377 378 buildLength(childs); 379 } 380 381 int getEncodedLength() 382 { 383 int len = 0; 384 for (int i = 0; i < freqs.length; i++) 385 len += freqs[i] * length[i]; 386 return len; 387 } 388 389 void calcBLFreq(Tree blTree) { 390 int max_count; /* max repeat count */ 391 int min_count; /* min repeat count */ 392 int count; /* repeat count of the current code */ 393 int curlen = -1; /* length of current code */ 394 395 int i = 0; 396 while (i < numCodes) 397 { 398 count = 1; 399 int nextlen = length[i]; 400 if (nextlen == 0) 401 { 402 max_count = 138; 403 min_count = 3; 404 } 405 else 406 { 407 max_count = 6; 408 min_count = 3; 409 if (curlen != nextlen) 410 { 411 blTree.freqs[nextlen]++; 412 count = 0; 413 } 414 } 415 curlen = nextlen; 416 i++; 417 418 while (i < numCodes && curlen == length[i]) 419 { 420 i++; 421 if (++count >= max_count) 422 break; 423 } 424 425 if (count < min_count) 426 blTree.freqs[curlen] += count; 427 else if (curlen != 0) 428 blTree.freqs[REP_3_6]++; 429 else if (count <= 10) 430 blTree.freqs[REP_3_10]++; 431 else 432 blTree.freqs[REP_11_138]++; 433 } 434 } 435 436 void writeTree(Tree blTree) 437 { 438 int max_count; /* max repeat count */ 439 int min_count; /* min repeat count */ 440 int count; /* repeat count of the current code */ 441 int curlen = -1; /* length of current code */ 442 443 int i = 0; 444 while (i < numCodes) 445 { 446 count = 1; 447 int nextlen = length[i]; 448 if (nextlen == 0) 449 { 450 max_count = 138; 451 min_count = 3; 452 } 453 else 454 { 455 max_count = 6; 456 min_count = 3; 457 if (curlen != nextlen) 458 { 459 blTree.writeSymbol(nextlen); 460 count = 0; 461 } 462 } 463 curlen = nextlen; 464 i++; 465 466 while (i < numCodes && curlen == length[i]) 467 { 468 i++; 469 if (++count >= max_count) 470 break; 471 } 472 473 if (count < min_count) 474 { 475 while (count-- > 0) 476 blTree.writeSymbol(curlen); 477 } 478 else if (curlen != 0) 479 { 480 blTree.writeSymbol(REP_3_6); 481 pending.writeBits(count - 3, 2); 482 } 483 else if (count <= 10) 484 { 485 blTree.writeSymbol(REP_3_10); 486 pending.writeBits(count - 3, 3); 487 } 488 else 489 { 490 blTree.writeSymbol(REP_11_138); 491 pending.writeBits(count - 11, 7); 492 } 493 } 494 } 495 } 496 497 498 499 DeflaterPending pending; 500 private Tree literalTree, distTree, blTree; 501 502 private short d_buf[]; 503 private byte l_buf[]; 504 private int last_lit; 505 private int extra_bits; 506 507 private static short staticLCodes[]; 508 private static byte staticLLength[]; 509 private static short staticDCodes[]; 510 private static byte staticDLength[]; 511 512 /** 513 * Reverse the bits of a 16 bit value. 514 */ 515 static short bitReverse(int value) { 516 return (short) (bit4Reverse.charAt(value & 0xf) << 12 517 | bit4Reverse.charAt((value >> 4) & 0xf) << 8 518 | bit4Reverse.charAt((value >> 8) & 0xf) << 4 519 | bit4Reverse.charAt(value >> 12)); 520 } 521 522 static { 523 /* See RFC 1951 3.2.6 */ 524 /* Literal codes */ 525 staticLCodes = new short[LITERAL_NUM]; 526 staticLLength = new byte[LITERAL_NUM]; 527 int i = 0; 528 while (i < 144) { 529 staticLCodes[i] = bitReverse((0x030 + i) << 8); 530 staticLLength[i++] = 8; 531 } 532 while (i < 256) { 533 staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7); 534 staticLLength[i++] = 9; 535 } 536 while (i < 280) { 537 staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9); 538 staticLLength[i++] = 7; 539 } 540 while (i < LITERAL_NUM) { 541 staticLCodes[i] = bitReverse((0x0c0 - 280 + i) << 8); 542 staticLLength[i++] = 8; 543 } 544 545 /* Distant codes */ 546 staticDCodes = new short[DIST_NUM]; 547 staticDLength = new byte[DIST_NUM]; 548 for (i = 0; i < DIST_NUM; i++) { 549 staticDCodes[i] = bitReverse(i << 11); 550 staticDLength[i] = 5; 551 } 552 } 553 554 public DeflaterHuffman(DeflaterPending pending) 555 { 556 this.pending = pending; 557 558 literalTree = new Tree(LITERAL_NUM, 257, 15); 559 distTree = new Tree(DIST_NUM, 1, 15); 560 blTree = new Tree(BITLEN_NUM, 4, 7); 561 562 d_buf = new short[BUFSIZE]; 563 l_buf = new byte [BUFSIZE]; 564 } 565 566 public final void reset() { 567 last_lit = 0; 568 extra_bits = 0; 569 literalTree.reset(); 570 distTree.reset(); 571 blTree.reset(); 572 } 573 574 private int l_code(int len) { 575 if (len == 255) 576 return 285; 577 578 int code = 257; 579 while (len >= 8) 580 { 581 code += 4; 582 len >>= 1; 583 } 584 return code + len; 585 } 586 587 private int d_code(int distance) { 588 int code = 0; 589 while (distance >= 4) 590 { 591 code += 2; 592 distance >>= 1; 593 } 594 return code + distance; 595 } 596 597 public void sendAllTrees(int blTreeCodes) { 598 blTree.buildCodes(); 599 literalTree.buildCodes(); 600 distTree.buildCodes(); 601 pending.writeBits(literalTree.numCodes - 257, 5); 602 pending.writeBits(distTree.numCodes - 1, 5); 603 pending.writeBits(blTreeCodes - 4, 4); 604 for (int rank = 0; rank < blTreeCodes; rank++) 605 pending.writeBits(blTree.length[BL_ORDER[rank]], 3); 606 literalTree.writeTree(blTree); 607 distTree.writeTree(blTree); 608 if (DeflaterConstants.DEBUGGING) 609 blTree.checkEmpty(); 610 } 611 612 public void compressBlock() { 613 for (int i = 0; i < last_lit; i++) 614 { 615 int litlen = l_buf[i] & 0xff; 616 int dist = d_buf[i]; 617 if (dist-- != 0) 618 { 619 if (DeflaterConstants.DEBUGGING) 620 System.err.print("["+(dist+1)+","+(litlen+3)+"]: "); 621 622 int lc = l_code(litlen); 623 literalTree.writeSymbol(lc); 624 625 int bits = (lc - 261) / 4; 626 if (bits > 0 && bits <= 5) 627 pending.writeBits(litlen & ((1 << bits) - 1), bits); 628 629 int dc = d_code(dist); 630 distTree.writeSymbol(dc); 631 632 bits = dc / 2 - 1; 633 if (bits > 0) 634 pending.writeBits(dist & ((1 << bits) - 1), bits); 635 } 636 else 637 { 638 if (DeflaterConstants.DEBUGGING) 639 { 640 if (litlen > 32 && litlen < 127) 641 System.err.print("("+(char)litlen+"): "); 642 else 643 System.err.print("{"+litlen+"}: "); 644 } 645 literalTree.writeSymbol(litlen); 646 } 647 } 648 if (DeflaterConstants.DEBUGGING) 649 System.err.print("EOF: "); 650 literalTree.writeSymbol(EOF_SYMBOL); 651 if (DeflaterConstants.DEBUGGING) 652 { 653 literalTree.checkEmpty(); 654 distTree.checkEmpty(); 655 } 656 } 657 658 public void flushStoredBlock(byte[] stored, 659 int stored_offset, int stored_len, 660 boolean lastBlock) { 661 if (DeflaterConstants.DEBUGGING) 662 System.err.println("Flushing stored block "+ stored_len); 663 pending.writeBits((DeflaterConstants.STORED_BLOCK << 1) 664 + (lastBlock ? 1 : 0), 3); 665 pending.alignToByte(); 666 pending.writeShort(stored_len); 667 pending.writeShort(~stored_len); 668 pending.writeBlock(stored, stored_offset, stored_len); 669 reset(); 670 } 671 672 public void flushBlock(byte[] stored, int stored_offset, int stored_len, 673 boolean lastBlock) { 674 literalTree.freqs[EOF_SYMBOL]++; 675 676 /* Build trees */ 677 literalTree.buildTree(); 678 distTree.buildTree(); 679 680 /* Calculate bitlen frequency */ 681 literalTree.calcBLFreq(blTree); 682 distTree.calcBLFreq(blTree); 683 684 /* Build bitlen tree */ 685 blTree.buildTree(); 686 687 int blTreeCodes = 4; 688 for (int i = 18; i > blTreeCodes; i--) 689 { 690 if (blTree.length[BL_ORDER[i]] > 0) 691 blTreeCodes = i+1; 692 } 693 int opt_len = 14 + blTreeCodes * 3 + blTree.getEncodedLength() 694 + literalTree.getEncodedLength() + distTree.getEncodedLength() 695 + extra_bits; 696 697 int static_len = extra_bits; 698 for (int i = 0; i < LITERAL_NUM; i++) 699 static_len += literalTree.freqs[i] * staticLLength[i]; 700 for (int i = 0; i < DIST_NUM; i++) 701 static_len += distTree.freqs[i] * staticDLength[i]; 702 if (opt_len >= static_len) 703 { 704 /* Force static trees */ 705 opt_len = static_len; 706 } 707 708 if (stored_offset >= 0 && stored_len+4 < opt_len >> 3) 709 { 710 /* Store Block */ 711 if (DeflaterConstants.DEBUGGING) 712 System.err.println("Storing, since " + stored_len + " < " + opt_len 713 + " <= " + static_len); 714 flushStoredBlock(stored, stored_offset, stored_len, lastBlock); 715 } 716 else if (opt_len == static_len) 717 { 718 /* Encode with static tree */ 719 pending.writeBits((DeflaterConstants.STATIC_TREES << 1) 720 + (lastBlock ? 1 : 0), 3); 721 literalTree.setStaticCodes(staticLCodes, staticLLength); 722 distTree.setStaticCodes(staticDCodes, staticDLength); 723 compressBlock(); 724 reset(); 725 } 726 else 727 { 728 /* Encode with dynamic tree */ 729 pending.writeBits((DeflaterConstants.DYN_TREES << 1) 730 + (lastBlock ? 1 : 0), 3); 731 sendAllTrees(blTreeCodes); 732 compressBlock(); 733 reset(); 734 } 735 } 736 737 public final boolean isFull() 738 { 739 return last_lit == BUFSIZE; 740 } 741 742 public final boolean tallyLit(int lit) 743 { 744 if (DeflaterConstants.DEBUGGING) 745 { 746 if (lit > 32 && lit < 127) 747 System.err.println("("+(char)lit+")"); 748 else 749 System.err.println("{"+lit+"}"); 750 } 751 d_buf[last_lit] = 0; 752 l_buf[last_lit++] = (byte) lit; 753 literalTree.freqs[lit]++; 754 return last_lit == BUFSIZE; 755 } 756 757 public final boolean tallyDist(int dist, int len) 758 { 759 if (DeflaterConstants.DEBUGGING) 760 System.err.println("["+dist+","+len+"]"); 761 762 d_buf[last_lit] = (short) dist; 763 l_buf[last_lit++] = (byte) (len - 3); 764 765 int lc = l_code(len-3); 766 literalTree.freqs[lc]++; 767 if (lc >= 265 && lc < 285) 768 extra_bits += (lc - 261) / 4; 769 770 int dc = d_code(dist-1); 771 distTree.freqs[dc]++; 772 if (dc >= 4) 773 extra_bits += dc / 2 - 1; 774 return last_lit == BUFSIZE; 775 } 776 } 777