1 /* 2 * $Header: /home/cvspublic/jakarta-commons-sandbox/compress/src/java/org/apache/commons/compress/bzip2/CBZip2OutputStream.java,v 1.1 2003/12/02 20:43:04 dirkv Exp $ 3 * $Revision: 1.1 $ 4 * $Date: 2003/12/02 20:43:04 $ 5 * 6 * ==================================================================== 7 * 8 * The Apache Software License, Version 1.1 9 * 10 * Copyright (c) 2002 The Apache Software Foundation. All rights 11 * reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in 22 * the documentation and/or other materials provided with the 23 * distribution. 24 * 25 * 3. The end-user documentation included with the redistribution, if 26 * any, must include the following acknowledgement: 27 * "This product includes software developed by the 28 * Apache Software Foundation (http://www.apache.org/)." 29 * Alternately, this acknowledgement may appear in the software itself, 30 * if and wherever such third-party acknowledgements normally appear. 31 * 32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software 33 * Foundation" must not be used to endorse or promote products derived 34 * from this software without prior written permission. For written 35 * permission, please contact apache@apache.org. 36 * 37 * 5. Products derived from this software may not be called "Apache" 38 * nor may "Apache" appear in their names without prior written 39 * permission of the Apache Software Foundation. 40 * 41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * ==================================================================== 54 * 55 * This software consists of voluntary contributions made by many 56 * individuals on behalf of the Apache Software Foundation. For more 57 * information on the Apache Software Foundation, please see 58 * <http://www.apache.org/>. 59 * 60 */ 61 package org.apache.commons.compress.bzip2; 62 63 import java.io.IOException; 64 import java.io.OutputStream; 65 66 /* 67 * This package is based on the work done by Keiron Liddle, Aftex Software 68 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 69 * great code. 70 */ 71 72 /** 73 * An output stream that compresses into the BZip2 format (without the file 74 * header chars) into another stream. TODO: Update to BZip2 1.0.1 75 * 76 * @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a> 77 */ 78 public class CBZip2OutputStream 79 extends OutputStream 80 implements BZip2Constants 81 { 82 private static final int LOWER_BYTE_MASK = 0x000000ff; 83 private static final int UPPER_BYTE_MASK = 0xffffff00; 84 private static final int SETMASK = ( 1 << 21 ); 85 private static final int CLEARMASK = ( ~SETMASK ); 86 private static final int GREATER_ICOST = 15; 87 private static final int LESSER_ICOST = 0; 88 private static final int SMALL_THRESH = 20; 89 private static final int DEPTH_THRESH = 10; 90 91 /* 92 * If you are ever unlucky/improbable enough 93 * to get a stack overflow whilst sorting, 94 * increase the following constant and try 95 * again. In practice I have never seen the 96 * stack go above 27 elems, so the following 97 * limit seems very generous. 98 */ 99 private static final int QSORT_STACK_SIZE = 1000; 100 101 private CRC m_crc = new CRC(); 102 103 private boolean[] m_inUse = new boolean[ 256 ]; 104 105 private char[] m_seqToUnseq = new char[ 256 ]; 106 private char[] m_unseqToSeq = new char[ 256 ]; 107 108 private char[] m_selector = new char[ MAX_SELECTORS ]; 109 private char[] m_selectorMtf = new char[ MAX_SELECTORS ]; 110 111 private int[] m_mtfFreq = new int[ MAX_ALPHA_SIZE ]; 112 113 private int m_currentChar = -1; 114 private int m_runLength; 115 116 private boolean m_closed; 117 118 /* 119 * Knuth's increments seem to work better 120 * than Incerpi-Sedgewick here. Possibly 121 * because the number of elems to sort is 122 * usually small, typically <= 20. 123 */ 124 private int[] m_incs = new int[] 125 { 126 1, 4, 13, 40, 121, 364, 1093, 3280, 127 9841, 29524, 88573, 265720, 128 797161, 2391484 129 }; 130 131 private boolean m_blockRandomised; 132 133 /* 134 * always: in the range 0 .. 9. 135 * The current block size is 100000 * this number. 136 */ 137 private int m_blockSize100k; 138 private int m_bsBuff; 139 private int m_bsLive; 140 141 /* 142 * index of the last char in the block, so 143 * the block size == last + 1. 144 */ 145 private int m_last; 146 147 /* 148 * index in zptr[] of original string after sorting. 149 */ 150 private int m_origPtr; 151 152 private int m_allowableBlockSize; 153 154 private char[] m_block; 155 156 private int m_blockCRC; 157 private int m_combinedCRC; 158 159 private OutputStream m_bsStream; 160 private boolean m_firstAttempt; 161 private int[] m_ftab; 162 private int m_nInUse; 163 164 private int m_nMTF; 165 private int[] m_quadrant; 166 private short[] m_szptr; 167 private int m_workDone; 168 169 /* 170 * Used when sorting. If too many long comparisons 171 * happen, we stop sorting, randomise the block 172 * slightly, and try again. 173 */ 174 private int m_workFactor; 175 private int m_workLimit; 176 private int[] m_zptr; 177 CBZip2OutputStream( final OutputStream output )178 public CBZip2OutputStream( final OutputStream output ) 179 throws IOException 180 { 181 this( output, 9 ); 182 } 183 CBZip2OutputStream( final OutputStream output, final int blockSize )184 public CBZip2OutputStream( final OutputStream output, final int blockSize ) 185 throws IOException 186 { 187 bsSetStream( output ); 188 m_workFactor = 50; 189 190 int outBlockSize = blockSize; 191 if( outBlockSize > 9 ) 192 { 193 outBlockSize = 9; 194 } 195 if( outBlockSize < 1 ) 196 { 197 outBlockSize = 1; 198 } 199 m_blockSize100k = outBlockSize; 200 allocateCompressStructures(); 201 initialize(); 202 initBlock(); 203 } 204 hbMakeCodeLengths( char[] len, int[] freq, int alphaSize, int maxLen )205 private static void hbMakeCodeLengths( char[] len, int[] freq, 206 int alphaSize, int maxLen ) 207 { 208 /* 209 * Nodes and heap entries run from 1. Entry 0 210 * for both the heap and nodes is a sentinel. 211 */ 212 int nNodes; 213 /* 214 * Nodes and heap entries run from 1. Entry 0 215 * for both the heap and nodes is a sentinel. 216 */ 217 int nHeap; 218 /* 219 * Nodes and heap entries run from 1. Entry 0 220 * for both the heap and nodes is a sentinel. 221 */ 222 int n1; 223 /* 224 * Nodes and heap entries run from 1. Entry 0 225 * for both the heap and nodes is a sentinel. 226 */ 227 int n2; 228 /* 229 * Nodes and heap entries run from 1. Entry 0 230 * for both the heap and nodes is a sentinel. 231 */ 232 int i; 233 /* 234 * Nodes and heap entries run from 1. Entry 0 235 * for both the heap and nodes is a sentinel. 236 */ 237 int j; 238 /* 239 * Nodes and heap entries run from 1. Entry 0 240 * for both the heap and nodes is a sentinel. 241 */ 242 int k; 243 boolean tooLong; 244 245 int[] heap = new int[ MAX_ALPHA_SIZE + 2 ]; 246 int[] weights = new int[ MAX_ALPHA_SIZE * 2 ]; 247 int[] parent = new int[ MAX_ALPHA_SIZE * 2 ]; 248 249 for( i = 0; i < alphaSize; i++ ) 250 { 251 weights[ i + 1 ] = ( freq[ i ] == 0 ? 1 : freq[ i ] ) << 8; 252 } 253 254 while( true ) 255 { 256 nNodes = alphaSize; 257 nHeap = 0; 258 259 heap[ 0 ] = 0; 260 weights[ 0 ] = 0; 261 parent[ 0 ] = -2; 262 263 for( i = 1; i <= alphaSize; i++ ) 264 { 265 parent[ i ] = -1; 266 nHeap++; 267 heap[ nHeap ] = i; 268 { 269 int zz; 270 int tmp; 271 zz = nHeap; 272 tmp = heap[ zz ]; 273 while( weights[ tmp ] < weights[ heap[ zz >> 1 ] ] ) 274 { 275 heap[ zz ] = heap[ zz >> 1 ]; 276 zz >>= 1; 277 } 278 heap[ zz ] = tmp; 279 } 280 } 281 if( !( nHeap < ( MAX_ALPHA_SIZE + 2 ) ) ) 282 { 283 panic(); 284 } 285 286 while( nHeap > 1 ) 287 { 288 n1 = heap[ 1 ]; 289 heap[ 1 ] = heap[ nHeap ]; 290 nHeap--; 291 { 292 int zz = 0; 293 int yy = 0; 294 int tmp = 0; 295 zz = 1; 296 tmp = heap[ zz ]; 297 while( true ) 298 { 299 yy = zz << 1; 300 if( yy > nHeap ) 301 { 302 break; 303 } 304 if( yy < nHeap && 305 weights[ heap[ yy + 1 ] ] < weights[ heap[ yy ] ] ) 306 { 307 yy++; 308 } 309 if( weights[ tmp ] < weights[ heap[ yy ] ] ) 310 { 311 break; 312 } 313 heap[ zz ] = heap[ yy ]; 314 zz = yy; 315 } 316 heap[ zz ] = tmp; 317 } 318 n2 = heap[ 1 ]; 319 heap[ 1 ] = heap[ nHeap ]; 320 nHeap--; 321 { 322 int zz = 0; 323 int yy = 0; 324 int tmp = 0; 325 zz = 1; 326 tmp = heap[ zz ]; 327 while( true ) 328 { 329 yy = zz << 1; 330 if( yy > nHeap ) 331 { 332 break; 333 } 334 if( yy < nHeap && 335 weights[ heap[ yy + 1 ] ] < weights[ heap[ yy ] ] ) 336 { 337 yy++; 338 } 339 if( weights[ tmp ] < weights[ heap[ yy ] ] ) 340 { 341 break; 342 } 343 heap[ zz ] = heap[ yy ]; 344 zz = yy; 345 } 346 heap[ zz ] = tmp; 347 } 348 nNodes++; 349 parent[ n1 ] = nNodes; 350 parent[ n2 ] = nNodes; 351 352 final int v1 = weights[ n1 ]; 353 final int v2 = weights[ n2 ]; 354 final int weight = calculateWeight( v1, v2 ); 355 weights[ nNodes ] = weight; 356 357 parent[ nNodes ] = -1; 358 nHeap++; 359 heap[ nHeap ] = nNodes; 360 { 361 int zz = 0; 362 int tmp = 0; 363 zz = nHeap; 364 tmp = heap[ zz ]; 365 while( weights[ tmp ] < weights[ heap[ zz >> 1 ] ] ) 366 { 367 heap[ zz ] = heap[ zz >> 1 ]; 368 zz >>= 1; 369 } 370 heap[ zz ] = tmp; 371 } 372 } 373 if( !( nNodes < ( MAX_ALPHA_SIZE * 2 ) ) ) 374 { 375 panic(); 376 } 377 378 tooLong = false; 379 for( i = 1; i <= alphaSize; i++ ) 380 { 381 j = 0; 382 k = i; 383 while( parent[ k ] >= 0 ) 384 { 385 k = parent[ k ]; 386 j++; 387 } 388 len[ i - 1 ] = (char)j; 389 if( j > maxLen ) 390 { 391 tooLong = true; 392 } 393 } 394 395 if( !tooLong ) 396 { 397 break; 398 } 399 400 for( i = 1; i < alphaSize; i++ ) 401 { 402 j = weights[ i ] >> 8; 403 j = 1 + ( j / 2 ); 404 weights[ i ] = j << 8; 405 } 406 } 407 } 408 calculateWeight( final int v1, final int v2 )409 private static int calculateWeight( final int v1, final int v2 ) 410 { 411 final int upper = ( v1 & UPPER_BYTE_MASK ) + ( v2 & UPPER_BYTE_MASK ); 412 final int v1Lower = ( v1 & LOWER_BYTE_MASK ); 413 final int v2Lower = ( v2 & LOWER_BYTE_MASK ); 414 final int nnnn = ( v1Lower > v2Lower ) ? v1Lower : v2Lower; 415 return upper | ( 1 + nnnn ); 416 } 417 panic()418 private static void panic() 419 { 420 System.out.println( "panic" ); 421 //throw new CError(); 422 } 423 close()424 public void close() 425 throws IOException 426 { 427 if( m_closed ) 428 { 429 return; 430 } 431 432 if( m_runLength > 0 ) 433 { 434 writeRun(); 435 } 436 m_currentChar = -1; 437 endBlock(); 438 endCompression(); 439 m_closed = true; 440 super.close(); 441 m_bsStream.close(); 442 } 443 finalize()444 public void finalize() 445 throws Throwable 446 { 447 close(); 448 } 449 flush()450 public void flush() 451 throws IOException 452 { 453 super.flush(); 454 m_bsStream.flush(); 455 } 456 457 /** 458 * modified by Oliver Merkel, 010128 459 * 460 * @param bv Description of Parameter 461 * @exception java.io.IOException Description of Exception 462 */ write( int bv )463 public void write( int bv ) 464 throws IOException 465 { 466 int b = ( 256 + bv ) % 256; 467 if( m_currentChar != -1 ) 468 { 469 if( m_currentChar == b ) 470 { 471 m_runLength++; 472 if( m_runLength > 254 ) 473 { 474 writeRun(); 475 m_currentChar = -1; 476 m_runLength = 0; 477 } 478 } 479 else 480 { 481 writeRun(); 482 m_runLength = 1; 483 m_currentChar = b; 484 } 485 } 486 else 487 { 488 m_currentChar = b; 489 m_runLength++; 490 } 491 } 492 allocateCompressStructures()493 private void allocateCompressStructures() 494 { 495 int n = BASE_BLOCK_SIZE * m_blockSize100k; 496 m_block = new char[ ( n + 1 + NUM_OVERSHOOT_BYTES ) ]; 497 m_quadrant = new int[ ( n + NUM_OVERSHOOT_BYTES ) ]; 498 m_zptr = new int[ n ]; 499 m_ftab = new int[ 65537 ]; 500 501 if( m_block == null || m_quadrant == null || m_zptr == null 502 || m_ftab == null ) 503 { 504 //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; 505 //compressOutOfMemory ( totalDraw, n ); 506 } 507 508 /* 509 * The back end needs a place to store the MTF values 510 * whilst it calculates the coding tables. We could 511 * put them in the zptr array. However, these values 512 * will fit in a short, so we overlay szptr at the 513 * start of zptr, in the hope of reducing the number 514 * of cache misses induced by the multiple traversals 515 * of the MTF values when calculating coding tables. 516 * Seems to improve compression speed by about 1%. 517 */ 518 // szptr = zptr; 519 520 m_szptr = new short[ 2 * n ]; 521 } 522 bsFinishedWithStream()523 private void bsFinishedWithStream() 524 throws IOException 525 { 526 while( m_bsLive > 0 ) 527 { 528 int ch = ( m_bsBuff >> 24 ); 529 try 530 { 531 m_bsStream.write( ch );// write 8-bit 532 } 533 catch( IOException e ) 534 { 535 throw e; 536 } 537 m_bsBuff <<= 8; 538 m_bsLive -= 8; 539 } 540 } 541 bsPutIntVS( int numBits, int c )542 private void bsPutIntVS( int numBits, int c ) 543 throws IOException 544 { 545 bsW( numBits, c ); 546 } 547 bsPutUChar( int c )548 private void bsPutUChar( int c ) 549 throws IOException 550 { 551 bsW( 8, c ); 552 } 553 bsPutint( int u )554 private void bsPutint( int u ) 555 throws IOException 556 { 557 bsW( 8, ( u >> 24 ) & 0xff ); 558 bsW( 8, ( u >> 16 ) & 0xff ); 559 bsW( 8, ( u >> 8 ) & 0xff ); 560 bsW( 8, u & 0xff ); 561 } 562 bsSetStream( OutputStream f )563 private void bsSetStream( OutputStream f ) 564 { 565 m_bsStream = f; 566 m_bsLive = 0; 567 m_bsBuff = 0; 568 } 569 bsW( int n, int v )570 private void bsW( int n, int v ) 571 throws IOException 572 { 573 while( m_bsLive >= 8 ) 574 { 575 int ch = ( m_bsBuff >> 24 ); 576 try 577 { 578 m_bsStream.write( ch );// write 8-bit 579 } 580 catch( IOException e ) 581 { 582 throw e; 583 } 584 m_bsBuff <<= 8; 585 m_bsLive -= 8; 586 } 587 m_bsBuff |= ( v << ( 32 - m_bsLive - n ) ); 588 m_bsLive += n; 589 } 590 doReversibleTransformation()591 private void doReversibleTransformation() 592 { 593 int i; 594 595 m_workLimit = m_workFactor * m_last; 596 m_workDone = 0; 597 m_blockRandomised = false; 598 m_firstAttempt = true; 599 600 mainSort(); 601 602 if( m_workDone > m_workLimit && m_firstAttempt ) 603 { 604 randomiseBlock(); 605 m_workLimit = 0; 606 m_workDone = 0; 607 m_blockRandomised = true; 608 m_firstAttempt = false; 609 mainSort(); 610 } 611 612 m_origPtr = -1; 613 for( i = 0; i <= m_last; i++ ) 614 { 615 if( m_zptr[ i ] == 0 ) 616 { 617 m_origPtr = i; 618 break; 619 } 620 } 621 ; 622 623 if( m_origPtr == -1 ) 624 { 625 panic(); 626 } 627 } 628 endBlock()629 private void endBlock() 630 throws IOException 631 { 632 m_blockCRC = m_crc.getFinalCRC(); 633 m_combinedCRC = ( m_combinedCRC << 1 ) | ( m_combinedCRC >>> 31 ); 634 m_combinedCRC ^= m_blockCRC; 635 636 /* 637 * sort the block and establish posn of original string 638 */ 639 doReversibleTransformation(); 640 641 /* 642 * A 6-byte block header, the value chosen arbitrarily 643 * as 0x314159265359 :-). A 32 bit value does not really 644 * give a strong enough guarantee that the value will not 645 * appear by chance in the compressed datastream. Worst-case 646 * probability of this event, for a 900k block, is about 647 * 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. 648 * For a compressed file of size 100Gb -- about 100000 blocks -- 649 * only a 48-bit marker will do. NB: normal compression/ 650 * decompression do *not* rely on these statistical properties. 651 * They are only important when trying to recover blocks from 652 * damaged files. 653 */ 654 bsPutUChar( 0x31 ); 655 bsPutUChar( 0x41 ); 656 bsPutUChar( 0x59 ); 657 bsPutUChar( 0x26 ); 658 bsPutUChar( 0x53 ); 659 bsPutUChar( 0x59 ); 660 661 /* 662 * Now the block's CRC, so it is in a known place. 663 */ 664 bsPutint( m_blockCRC ); 665 666 /* 667 * Now a single bit indicating randomisation. 668 */ 669 if( m_blockRandomised ) 670 { 671 bsW( 1, 1 ); 672 } 673 else 674 { 675 bsW( 1, 0 ); 676 } 677 678 /* 679 * Finally, block's contents proper. 680 */ 681 moveToFrontCodeAndSend(); 682 } 683 endCompression()684 private void endCompression() 685 throws IOException 686 { 687 /* 688 * Now another magic 48-bit number, 0x177245385090, to 689 * indicate the end of the last block. (sqrt(pi), if 690 * you want to know. I did want to use e, but it contains 691 * too much repetition -- 27 18 28 18 28 46 -- for me 692 * to feel statistically comfortable. Call me paranoid.) 693 */ 694 bsPutUChar( 0x17 ); 695 bsPutUChar( 0x72 ); 696 bsPutUChar( 0x45 ); 697 bsPutUChar( 0x38 ); 698 bsPutUChar( 0x50 ); 699 bsPutUChar( 0x90 ); 700 701 bsPutint( m_combinedCRC ); 702 703 bsFinishedWithStream(); 704 } 705 fullGtU( int i1, int i2 )706 private boolean fullGtU( int i1, int i2 ) 707 { 708 int k; 709 char c1; 710 char c2; 711 int s1; 712 int s2; 713 714 c1 = m_block[ i1 + 1 ]; 715 c2 = m_block[ i2 + 1 ]; 716 if( c1 != c2 ) 717 { 718 return ( c1 > c2 ); 719 } 720 i1++; 721 i2++; 722 723 c1 = m_block[ i1 + 1 ]; 724 c2 = m_block[ i2 + 1 ]; 725 if( c1 != c2 ) 726 { 727 return ( c1 > c2 ); 728 } 729 i1++; 730 i2++; 731 732 c1 = m_block[ i1 + 1 ]; 733 c2 = m_block[ i2 + 1 ]; 734 if( c1 != c2 ) 735 { 736 return ( c1 > c2 ); 737 } 738 i1++; 739 i2++; 740 741 c1 = m_block[ i1 + 1 ]; 742 c2 = m_block[ i2 + 1 ]; 743 if( c1 != c2 ) 744 { 745 return ( c1 > c2 ); 746 } 747 i1++; 748 i2++; 749 750 c1 = m_block[ i1 + 1 ]; 751 c2 = m_block[ i2 + 1 ]; 752 if( c1 != c2 ) 753 { 754 return ( c1 > c2 ); 755 } 756 i1++; 757 i2++; 758 759 c1 = m_block[ i1 + 1 ]; 760 c2 = m_block[ i2 + 1 ]; 761 if( c1 != c2 ) 762 { 763 return ( c1 > c2 ); 764 } 765 i1++; 766 i2++; 767 768 k = m_last + 1; 769 770 do 771 { 772 c1 = m_block[ i1 + 1 ]; 773 c2 = m_block[ i2 + 1 ]; 774 if( c1 != c2 ) 775 { 776 return ( c1 > c2 ); 777 } 778 s1 = m_quadrant[ i1 ]; 779 s2 = m_quadrant[ i2 ]; 780 if( s1 != s2 ) 781 { 782 return ( s1 > s2 ); 783 } 784 i1++; 785 i2++; 786 787 c1 = m_block[ i1 + 1 ]; 788 c2 = m_block[ i2 + 1 ]; 789 if( c1 != c2 ) 790 { 791 return ( c1 > c2 ); 792 } 793 s1 = m_quadrant[ i1 ]; 794 s2 = m_quadrant[ i2 ]; 795 if( s1 != s2 ) 796 { 797 return ( s1 > s2 ); 798 } 799 i1++; 800 i2++; 801 802 c1 = m_block[ i1 + 1 ]; 803 c2 = m_block[ i2 + 1 ]; 804 if( c1 != c2 ) 805 { 806 return ( c1 > c2 ); 807 } 808 s1 = m_quadrant[ i1 ]; 809 s2 = m_quadrant[ i2 ]; 810 if( s1 != s2 ) 811 { 812 return ( s1 > s2 ); 813 } 814 i1++; 815 i2++; 816 817 c1 = m_block[ i1 + 1 ]; 818 c2 = m_block[ i2 + 1 ]; 819 if( c1 != c2 ) 820 { 821 return ( c1 > c2 ); 822 } 823 s1 = m_quadrant[ i1 ]; 824 s2 = m_quadrant[ i2 ]; 825 if( s1 != s2 ) 826 { 827 return ( s1 > s2 ); 828 } 829 i1++; 830 i2++; 831 832 if( i1 > m_last ) 833 { 834 i1 -= m_last; 835 i1--; 836 } 837 ; 838 if( i2 > m_last ) 839 { 840 i2 -= m_last; 841 i2--; 842 } 843 ; 844 845 k -= 4; 846 m_workDone++; 847 } while( k >= 0 ); 848 849 return false; 850 } 851 generateMTFValues()852 private void generateMTFValues() 853 { 854 char[] yy = new char[ 256 ]; 855 int i; 856 int j; 857 char tmp; 858 char tmp2; 859 int zPend; 860 int wr; 861 int EOB; 862 863 makeMaps(); 864 EOB = m_nInUse + 1; 865 866 for( i = 0; i <= EOB; i++ ) 867 { 868 m_mtfFreq[ i ] = 0; 869 } 870 871 wr = 0; 872 zPend = 0; 873 for( i = 0; i < m_nInUse; i++ ) 874 { 875 yy[ i ] = (char)i; 876 } 877 878 for( i = 0; i <= m_last; i++ ) 879 { 880 char ll_i; 881 882 ll_i = m_unseqToSeq[ m_block[ m_zptr[ i ] ] ]; 883 884 j = 0; 885 tmp = yy[ j ]; 886 while( ll_i != tmp ) 887 { 888 j++; 889 tmp2 = tmp; 890 tmp = yy[ j ]; 891 yy[ j ] = tmp2; 892 } 893 ; 894 yy[ 0 ] = tmp; 895 896 if( j == 0 ) 897 { 898 zPend++; 899 } 900 else 901 { 902 if( zPend > 0 ) 903 { 904 zPend--; 905 while( true ) 906 { 907 switch( zPend % 2 ) 908 { 909 case 0: 910 m_szptr[ wr ] = (short)RUNA; 911 wr++; 912 m_mtfFreq[ RUNA ]++; 913 break; 914 case 1: 915 m_szptr[ wr ] = (short)RUNB; 916 wr++; 917 m_mtfFreq[ RUNB ]++; 918 break; 919 } 920 ; 921 if( zPend < 2 ) 922 { 923 break; 924 } 925 zPend = ( zPend - 2 ) / 2; 926 } 927 ; 928 zPend = 0; 929 } 930 m_szptr[ wr ] = (short)( j + 1 ); 931 wr++; 932 m_mtfFreq[ j + 1 ]++; 933 } 934 } 935 936 if( zPend > 0 ) 937 { 938 zPend--; 939 while( true ) 940 { 941 switch( zPend % 2 ) 942 { 943 case 0: 944 m_szptr[ wr ] = (short)RUNA; 945 wr++; 946 m_mtfFreq[ RUNA ]++; 947 break; 948 case 1: 949 m_szptr[ wr ] = (short)RUNB; 950 wr++; 951 m_mtfFreq[ RUNB ]++; 952 break; 953 } 954 if( zPend < 2 ) 955 { 956 break; 957 } 958 zPend = ( zPend - 2 ) / 2; 959 } 960 } 961 962 m_szptr[ wr ] = (short)EOB; 963 wr++; 964 m_mtfFreq[ EOB ]++; 965 966 m_nMTF = wr; 967 } 968 hbAssignCodes( int[] code, char[] length, int minLen, int maxLen, int alphaSize )969 private void hbAssignCodes( int[] code, char[] length, int minLen, 970 int maxLen, int alphaSize ) 971 { 972 int n; 973 int vec; 974 int i; 975 976 vec = 0; 977 for( n = minLen; n <= maxLen; n++ ) 978 { 979 for( i = 0; i < alphaSize; i++ ) 980 { 981 if( length[ i ] == n ) 982 { 983 code[ i ] = vec; 984 vec++; 985 } 986 } 987 ; 988 vec <<= 1; 989 } 990 } 991 initBlock()992 private void initBlock() 993 { 994 // blockNo++; 995 m_crc.initialiseCRC(); 996 m_last = -1; 997 // ch = 0; 998 999 for( int i = 0; i < 256; i++ ) 1000 { 1001 m_inUse[ i ] = false; 1002 } 1003 1004 /* 1005 * 20 is just a paranoia constant 1006 */ 1007 m_allowableBlockSize = BASE_BLOCK_SIZE * m_blockSize100k - 20; 1008 } 1009 initialize()1010 private void initialize() 1011 throws IOException 1012 { 1013 /* 1014 * Write `magic' bytes h indicating file-format == huffmanised, 1015 * followed by a digit indicating blockSize100k. 1016 */ 1017 bsPutUChar( 'h' ); 1018 bsPutUChar( '0' + m_blockSize100k ); 1019 1020 m_combinedCRC = 0; 1021 } 1022 mainSort()1023 private void mainSort() 1024 { 1025 int i; 1026 int j; 1027 int ss; 1028 int sb; 1029 int[] runningOrder = new int[ 256 ]; 1030 int[] copy = new int[ 256 ]; 1031 boolean[] bigDone = new boolean[ 256 ]; 1032 int c1; 1033 int c2; 1034 1035 /* 1036 * In the various block-sized structures, live data runs 1037 * from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, 1038 * set up the overshoot area for block. 1039 */ 1040 // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); 1041 for( i = 0; i < NUM_OVERSHOOT_BYTES; i++ ) 1042 { 1043 m_block[ m_last + i + 2 ] = m_block[ ( i % ( m_last + 1 ) ) + 1 ]; 1044 } 1045 for( i = 0; i <= m_last + NUM_OVERSHOOT_BYTES; i++ ) 1046 { 1047 m_quadrant[ i ] = 0; 1048 } 1049 1050 m_block[ 0 ] = m_block[ m_last + 1 ]; 1051 1052 if( m_last < 4000 ) 1053 { 1054 /* 1055 * Use simpleSort(), since the full sorting mechanism 1056 * has quite a large constant overhead. 1057 */ 1058 for( i = 0; i <= m_last; i++ ) 1059 { 1060 m_zptr[ i ] = i; 1061 } 1062 m_firstAttempt = false; 1063 m_workDone = 0; 1064 m_workLimit = 0; 1065 simpleSort( 0, m_last, 0 ); 1066 } 1067 else 1068 { 1069 for( i = 0; i <= 255; i++ ) 1070 { 1071 bigDone[ i ] = false; 1072 } 1073 1074 for( i = 0; i <= 65536; i++ ) 1075 { 1076 m_ftab[ i ] = 0; 1077 } 1078 1079 c1 = m_block[ 0 ]; 1080 for( i = 0; i <= m_last; i++ ) 1081 { 1082 c2 = m_block[ i + 1 ]; 1083 m_ftab[ ( c1 << 8 ) + c2 ]++; 1084 c1 = c2; 1085 } 1086 1087 for( i = 1; i <= 65536; i++ ) 1088 { 1089 m_ftab[ i ] += m_ftab[ i - 1 ]; 1090 } 1091 1092 c1 = m_block[ 1 ]; 1093 for( i = 0; i < m_last; i++ ) 1094 { 1095 c2 = m_block[ i + 2 ]; 1096 j = ( c1 << 8 ) + c2; 1097 c1 = c2; 1098 m_ftab[ j ]--; 1099 m_zptr[ m_ftab[ j ] ] = i; 1100 } 1101 1102 j = ( ( m_block[ m_last + 1 ] ) << 8 ) + ( m_block[ 1 ] ); 1103 m_ftab[ j ]--; 1104 m_zptr[ m_ftab[ j ] ] = m_last; 1105 1106 /* 1107 * Now ftab contains the first loc of every small bucket. 1108 * Calculate the running order, from smallest to largest 1109 * big bucket. 1110 */ 1111 for( i = 0; i <= 255; i++ ) 1112 { 1113 runningOrder[ i ] = i; 1114 } 1115 { 1116 int vv; 1117 int h = 1; 1118 do 1119 { 1120 h = 3 * h + 1; 1121 } while( h <= 256 ); 1122 do 1123 { 1124 h = h / 3; 1125 for( i = h; i <= 255; i++ ) 1126 { 1127 vv = runningOrder[ i ]; 1128 j = i; 1129 while( ( m_ftab[ ( ( runningOrder[ j - h ] ) + 1 ) << 8 ] 1130 - m_ftab[ ( runningOrder[ j - h ] ) << 8 ] ) > 1131 ( m_ftab[ ( ( vv ) + 1 ) << 8 ] - m_ftab[ ( vv ) << 8 ] ) ) 1132 { 1133 runningOrder[ j ] = runningOrder[ j - h ]; 1134 j = j - h; 1135 if( j <= ( h - 1 ) ) 1136 { 1137 break; 1138 } 1139 } 1140 runningOrder[ j ] = vv; 1141 } 1142 } while( h != 1 ); 1143 } 1144 1145 /* 1146 * The main sorting loop. 1147 */ 1148 for( i = 0; i <= 255; i++ ) 1149 { 1150 1151 /* 1152 * Process big buckets, starting with the least full. 1153 */ 1154 ss = runningOrder[ i ]; 1155 1156 /* 1157 * Complete the big bucket [ss] by quicksorting 1158 * any unsorted small buckets [ss, j]. Hopefully 1159 * previous pointer-scanning phases have already 1160 * completed many of the small buckets [ss, j], so 1161 * we don't have to sort them at all. 1162 */ 1163 for( j = 0; j <= 255; j++ ) 1164 { 1165 sb = ( ss << 8 ) + j; 1166 if( !( ( m_ftab[ sb ] & SETMASK ) == SETMASK ) ) 1167 { 1168 int lo = m_ftab[ sb ] & CLEARMASK; 1169 int hi = ( m_ftab[ sb + 1 ] & CLEARMASK ) - 1; 1170 if( hi > lo ) 1171 { 1172 qSort3( lo, hi, 2 ); 1173 if( m_workDone > m_workLimit && m_firstAttempt ) 1174 { 1175 return; 1176 } 1177 } 1178 m_ftab[ sb ] |= SETMASK; 1179 } 1180 } 1181 1182 /* 1183 * The ss big bucket is now done. Record this fact, 1184 * and update the quadrant descriptors. Remember to 1185 * update quadrants in the overshoot area too, if 1186 * necessary. The "if (i < 255)" test merely skips 1187 * this updating for the last bucket processed, since 1188 * updating for the last bucket is pointless. 1189 */ 1190 bigDone[ ss ] = true; 1191 1192 if( i < 255 ) 1193 { 1194 int bbStart = m_ftab[ ss << 8 ] & CLEARMASK; 1195 int bbSize = ( m_ftab[ ( ss + 1 ) << 8 ] & CLEARMASK ) - bbStart; 1196 int shifts = 0; 1197 1198 while( ( bbSize >> shifts ) > 65534 ) 1199 { 1200 shifts++; 1201 } 1202 1203 for( j = 0; j < bbSize; j++ ) 1204 { 1205 int a2update = m_zptr[ bbStart + j ]; 1206 int qVal = ( j >> shifts ); 1207 m_quadrant[ a2update ] = qVal; 1208 if( a2update < NUM_OVERSHOOT_BYTES ) 1209 { 1210 m_quadrant[ a2update + m_last + 1 ] = qVal; 1211 } 1212 } 1213 1214 if( !( ( ( bbSize - 1 ) >> shifts ) <= 65535 ) ) 1215 { 1216 panic(); 1217 } 1218 } 1219 1220 /* 1221 * Now scan this big bucket so as to synthesise the 1222 * sorted order for small buckets [t, ss] for all t != ss. 1223 */ 1224 for( j = 0; j <= 255; j++ ) 1225 { 1226 copy[ j ] = m_ftab[ ( j << 8 ) + ss ] & CLEARMASK; 1227 } 1228 1229 for( j = m_ftab[ ss << 8 ] & CLEARMASK; 1230 j < ( m_ftab[ ( ss + 1 ) << 8 ] & CLEARMASK ); j++ ) 1231 { 1232 c1 = m_block[ m_zptr[ j ] ]; 1233 if( !bigDone[ c1 ] ) 1234 { 1235 m_zptr[ copy[ c1 ] ] = m_zptr[ j ] == 0 ? m_last : m_zptr[ j ] - 1; 1236 copy[ c1 ]++; 1237 } 1238 } 1239 1240 for( j = 0; j <= 255; j++ ) 1241 { 1242 m_ftab[ ( j << 8 ) + ss ] |= SETMASK; 1243 } 1244 } 1245 } 1246 } 1247 makeMaps()1248 private void makeMaps() 1249 { 1250 int i; 1251 m_nInUse = 0; 1252 for( i = 0; i < 256; i++ ) 1253 { 1254 if( m_inUse[ i ] ) 1255 { 1256 m_seqToUnseq[ m_nInUse ] = (char)i; 1257 m_unseqToSeq[ i ] = (char)m_nInUse; 1258 m_nInUse++; 1259 } 1260 } 1261 } 1262 med3( char a, char b, char c )1263 private char med3( char a, char b, char c ) 1264 { 1265 char t; 1266 if( a > b ) 1267 { 1268 t = a; 1269 a = b; 1270 b = t; 1271 } 1272 if( b > c ) 1273 { 1274 t = b; 1275 b = c; 1276 c = t; 1277 } 1278 if( a > b ) 1279 { 1280 b = a; 1281 } 1282 return b; 1283 } 1284 moveToFrontCodeAndSend()1285 private void moveToFrontCodeAndSend() 1286 throws IOException 1287 { 1288 bsPutIntVS( 24, m_origPtr ); 1289 generateMTFValues(); 1290 sendMTFValues(); 1291 } 1292 qSort3( int loSt, int hiSt, int dSt )1293 private void qSort3( int loSt, int hiSt, int dSt ) 1294 { 1295 int unLo; 1296 int unHi; 1297 int ltLo; 1298 int gtHi; 1299 int med; 1300 int n; 1301 int m; 1302 int sp; 1303 int lo; 1304 int hi; 1305 int d; 1306 StackElem[] stack = new StackElem[ QSORT_STACK_SIZE ]; 1307 for( int count = 0; count < QSORT_STACK_SIZE; count++ ) 1308 { 1309 stack[ count ] = new StackElem(); 1310 } 1311 1312 sp = 0; 1313 1314 stack[ sp ].m_ll = loSt; 1315 stack[ sp ].m_hh = hiSt; 1316 stack[ sp ].m_dd = dSt; 1317 sp++; 1318 1319 while( sp > 0 ) 1320 { 1321 if( sp >= QSORT_STACK_SIZE ) 1322 { 1323 panic(); 1324 } 1325 1326 sp--; 1327 lo = stack[ sp ].m_ll; 1328 hi = stack[ sp ].m_hh; 1329 d = stack[ sp ].m_dd; 1330 1331 if( hi - lo < SMALL_THRESH || d > DEPTH_THRESH ) 1332 { 1333 simpleSort( lo, hi, d ); 1334 if( m_workDone > m_workLimit && m_firstAttempt ) 1335 { 1336 return; 1337 } 1338 continue; 1339 } 1340 1341 med = med3( m_block[ m_zptr[ lo ] + d + 1 ], 1342 m_block[ m_zptr[ hi ] + d + 1 ], 1343 m_block[ m_zptr[ ( lo + hi ) >> 1 ] + d + 1 ] ); 1344 1345 unLo = lo; 1346 ltLo = lo; 1347 unHi = hi; 1348 gtHi = hi; 1349 1350 while( true ) 1351 { 1352 while( true ) 1353 { 1354 if( unLo > unHi ) 1355 { 1356 break; 1357 } 1358 n = m_block[ m_zptr[ unLo ] + d + 1 ] - med; 1359 if( n == 0 ) 1360 { 1361 int temp = 0; 1362 temp = m_zptr[ unLo ]; 1363 m_zptr[ unLo ] = m_zptr[ ltLo ]; 1364 m_zptr[ ltLo ] = temp; 1365 ltLo++; 1366 unLo++; 1367 continue; 1368 } 1369 ; 1370 if( n > 0 ) 1371 { 1372 break; 1373 } 1374 unLo++; 1375 } 1376 while( true ) 1377 { 1378 if( unLo > unHi ) 1379 { 1380 break; 1381 } 1382 n = m_block[ m_zptr[ unHi ] + d + 1 ] - med; 1383 if( n == 0 ) 1384 { 1385 int temp = 0; 1386 temp = m_zptr[ unHi ]; 1387 m_zptr[ unHi ] = m_zptr[ gtHi ]; 1388 m_zptr[ gtHi ] = temp; 1389 gtHi--; 1390 unHi--; 1391 continue; 1392 } 1393 ; 1394 if( n < 0 ) 1395 { 1396 break; 1397 } 1398 unHi--; 1399 } 1400 if( unLo > unHi ) 1401 { 1402 break; 1403 } 1404 int temp = 0; 1405 temp = m_zptr[ unLo ]; 1406 m_zptr[ unLo ] = m_zptr[ unHi ]; 1407 m_zptr[ unHi ] = temp; 1408 unLo++; 1409 unHi--; 1410 } 1411 1412 if( gtHi < ltLo ) 1413 { 1414 stack[ sp ].m_ll = lo; 1415 stack[ sp ].m_hh = hi; 1416 stack[ sp ].m_dd = d + 1; 1417 sp++; 1418 continue; 1419 } 1420 1421 n = ( ( ltLo - lo ) < ( unLo - ltLo ) ) ? ( ltLo - lo ) : ( unLo - ltLo ); 1422 vswap( lo, unLo - n, n ); 1423 m = ( ( hi - gtHi ) < ( gtHi - unHi ) ) ? ( hi - gtHi ) : ( gtHi - unHi ); 1424 vswap( unLo, hi - m + 1, m ); 1425 1426 n = lo + unLo - ltLo - 1; 1427 m = hi - ( gtHi - unHi ) + 1; 1428 1429 stack[ sp ].m_ll = lo; 1430 stack[ sp ].m_hh = n; 1431 stack[ sp ].m_dd = d; 1432 sp++; 1433 1434 stack[ sp ].m_ll = n + 1; 1435 stack[ sp ].m_hh = m - 1; 1436 stack[ sp ].m_dd = d + 1; 1437 sp++; 1438 1439 stack[ sp ].m_ll = m; 1440 stack[ sp ].m_hh = hi; 1441 stack[ sp ].m_dd = d; 1442 sp++; 1443 } 1444 } 1445 randomiseBlock()1446 private void randomiseBlock() 1447 { 1448 int i; 1449 int rNToGo = 0; 1450 int rTPos = 0; 1451 for( i = 0; i < 256; i++ ) 1452 { 1453 m_inUse[ i ] = false; 1454 } 1455 1456 for( i = 0; i <= m_last; i++ ) 1457 { 1458 if( rNToGo == 0 ) 1459 { 1460 rNToGo = (char)RAND_NUMS[ rTPos ]; 1461 rTPos++; 1462 if( rTPos == 512 ) 1463 { 1464 rTPos = 0; 1465 } 1466 } 1467 rNToGo--; 1468 m_block[ i + 1 ] ^= ( ( rNToGo == 1 ) ? 1 : 0 ); 1469 // handle 16 bit signed numbers 1470 m_block[ i + 1 ] &= 0xFF; 1471 1472 m_inUse[ m_block[ i + 1 ] ] = true; 1473 } 1474 } 1475 sendMTFValues()1476 private void sendMTFValues() 1477 throws IOException 1478 { 1479 char[][] len = new char[ N_GROUPS ][ MAX_ALPHA_SIZE ]; 1480 1481 int v; 1482 1483 int t; 1484 1485 int i; 1486 1487 int j; 1488 1489 int gs; 1490 1491 int ge; 1492 1493 int bt; 1494 1495 int bc; 1496 1497 int iter; 1498 int nSelectors = 0; 1499 int alphaSize; 1500 int minLen; 1501 int maxLen; 1502 int selCtr; 1503 int nGroups; 1504 1505 alphaSize = m_nInUse + 2; 1506 for( t = 0; t < N_GROUPS; t++ ) 1507 { 1508 for( v = 0; v < alphaSize; v++ ) 1509 { 1510 len[ t ][ v ] = (char)GREATER_ICOST; 1511 } 1512 } 1513 1514 /* 1515 * Decide how many coding tables to use 1516 */ 1517 if( m_nMTF <= 0 ) 1518 { 1519 panic(); 1520 } 1521 1522 if( m_nMTF < 200 ) 1523 { 1524 nGroups = 2; 1525 } 1526 else if( m_nMTF < 600 ) 1527 { 1528 nGroups = 3; 1529 } 1530 else if( m_nMTF < 1200 ) 1531 { 1532 nGroups = 4; 1533 } 1534 else if( m_nMTF < 2400 ) 1535 { 1536 nGroups = 5; 1537 } 1538 else 1539 { 1540 nGroups = 6; 1541 } 1542 { 1543 /* 1544 * Generate an initial set of coding tables 1545 */ 1546 int nPart; 1547 int remF; 1548 int tFreq; 1549 int aFreq; 1550 1551 nPart = nGroups; 1552 remF = m_nMTF; 1553 gs = 0; 1554 while( nPart > 0 ) 1555 { 1556 tFreq = remF / nPart; 1557 ge = gs - 1; 1558 aFreq = 0; 1559 while( aFreq < tFreq && ge < alphaSize - 1 ) 1560 { 1561 ge++; 1562 aFreq += m_mtfFreq[ ge ]; 1563 } 1564 1565 if( ge > gs && nPart != nGroups && nPart != 1 1566 && ( ( nGroups - nPart ) % 2 == 1 ) ) 1567 { 1568 aFreq -= m_mtfFreq[ ge ]; 1569 ge--; 1570 } 1571 1572 for( v = 0; v < alphaSize; v++ ) 1573 { 1574 if( v >= gs && v <= ge ) 1575 { 1576 len[ nPart - 1 ][ v ] = (char)LESSER_ICOST; 1577 } 1578 else 1579 { 1580 len[ nPart - 1 ][ v ] = (char)GREATER_ICOST; 1581 } 1582 } 1583 1584 nPart--; 1585 gs = ge + 1; 1586 remF -= aFreq; 1587 } 1588 } 1589 1590 int[][] rfreq = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; 1591 int[] fave = new int[ N_GROUPS ]; 1592 short[] cost = new short[ N_GROUPS ]; 1593 /* 1594 * Iterate up to N_ITERS times to improve the tables. 1595 */ 1596 for( iter = 0; iter < N_ITERS; iter++ ) 1597 { 1598 for( t = 0; t < nGroups; t++ ) 1599 { 1600 fave[ t ] = 0; 1601 } 1602 1603 for( t = 0; t < nGroups; t++ ) 1604 { 1605 for( v = 0; v < alphaSize; v++ ) 1606 { 1607 rfreq[ t ][ v ] = 0; 1608 } 1609 } 1610 1611 nSelectors = 0; 1612 gs = 0; 1613 while( true ) 1614 { 1615 1616 /* 1617 * Set group start & end marks. 1618 */ 1619 if( gs >= m_nMTF ) 1620 { 1621 break; 1622 } 1623 ge = gs + G_SIZE - 1; 1624 if( ge >= m_nMTF ) 1625 { 1626 ge = m_nMTF - 1; 1627 } 1628 1629 /* 1630 * Calculate the cost of this group as coded 1631 * by each of the coding tables. 1632 */ 1633 for( t = 0; t < nGroups; t++ ) 1634 { 1635 cost[ t ] = 0; 1636 } 1637 1638 if( nGroups == 6 ) 1639 { 1640 short cost0 = 0; 1641 short cost1 = 0; 1642 short cost2 = 0; 1643 short cost3 = 0; 1644 short cost4 = 0; 1645 short cost5 = 0; 1646 1647 for( i = gs; i <= ge; i++ ) 1648 { 1649 short icv = m_szptr[ i ]; 1650 cost0 += len[ 0 ][ icv ]; 1651 cost1 += len[ 1 ][ icv ]; 1652 cost2 += len[ 2 ][ icv ]; 1653 cost3 += len[ 3 ][ icv ]; 1654 cost4 += len[ 4 ][ icv ]; 1655 cost5 += len[ 5 ][ icv ]; 1656 } 1657 cost[ 0 ] = cost0; 1658 cost[ 1 ] = cost1; 1659 cost[ 2 ] = cost2; 1660 cost[ 3 ] = cost3; 1661 cost[ 4 ] = cost4; 1662 cost[ 5 ] = cost5; 1663 } 1664 else 1665 { 1666 for( i = gs; i <= ge; i++ ) 1667 { 1668 short icv = m_szptr[ i ]; 1669 for( t = 0; t < nGroups; t++ ) 1670 { 1671 cost[ t ] += len[ t ][ icv ]; 1672 } 1673 } 1674 } 1675 1676 /* 1677 * Find the coding table which is best for this group, 1678 * and record its identity in the selector table. 1679 */ 1680 bc = 999999999; 1681 bt = -1; 1682 for( t = 0; t < nGroups; t++ ) 1683 { 1684 if( cost[ t ] < bc ) 1685 { 1686 bc = cost[ t ]; 1687 bt = t; 1688 } 1689 } 1690 ; 1691 fave[ bt ]++; 1692 m_selector[ nSelectors ] = (char)bt; 1693 nSelectors++; 1694 1695 /* 1696 * Increment the symbol frequencies for the selected table. 1697 */ 1698 for( i = gs; i <= ge; i++ ) 1699 { 1700 rfreq[ bt ][ m_szptr[ i ] ]++; 1701 } 1702 1703 gs = ge + 1; 1704 } 1705 1706 /* 1707 * Recompute the tables based on the accumulated frequencies. 1708 */ 1709 for( t = 0; t < nGroups; t++ ) 1710 { 1711 hbMakeCodeLengths( len[ t ], rfreq[ t ], alphaSize, 20 ); 1712 } 1713 } 1714 1715 rfreq = null; 1716 fave = null; 1717 cost = null; 1718 1719 if( !( nGroups < 8 ) ) 1720 { 1721 panic(); 1722 } 1723 if( !( nSelectors < 32768 && nSelectors <= ( 2 + ( 900000 / G_SIZE ) ) ) ) 1724 { 1725 panic(); 1726 } 1727 { 1728 /* 1729 * Compute MTF values for the selectors. 1730 */ 1731 char[] pos = new char[ N_GROUPS ]; 1732 char ll_i; 1733 char tmp2; 1734 char tmp; 1735 for( i = 0; i < nGroups; i++ ) 1736 { 1737 pos[ i ] = (char)i; 1738 } 1739 for( i = 0; i < nSelectors; i++ ) 1740 { 1741 ll_i = m_selector[ i ]; 1742 j = 0; 1743 tmp = pos[ j ]; 1744 while( ll_i != tmp ) 1745 { 1746 j++; 1747 tmp2 = tmp; 1748 tmp = pos[ j ]; 1749 pos[ j ] = tmp2; 1750 } 1751 pos[ 0 ] = tmp; 1752 m_selectorMtf[ i ] = (char)j; 1753 } 1754 } 1755 1756 int[][] code = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; 1757 1758 /* 1759 * Assign actual codes for the tables. 1760 */ 1761 for( t = 0; t < nGroups; t++ ) 1762 { 1763 minLen = 32; 1764 maxLen = 0; 1765 for( i = 0; i < alphaSize; i++ ) 1766 { 1767 if( len[ t ][ i ] > maxLen ) 1768 { 1769 maxLen = len[ t ][ i ]; 1770 } 1771 if( len[ t ][ i ] < minLen ) 1772 { 1773 minLen = len[ t ][ i ]; 1774 } 1775 } 1776 if( maxLen > 20 ) 1777 { 1778 panic(); 1779 } 1780 if( minLen < 1 ) 1781 { 1782 panic(); 1783 } 1784 hbAssignCodes( code[ t ], len[ t ], minLen, maxLen, alphaSize ); 1785 } 1786 { 1787 /* 1788 * Transmit the mapping table. 1789 */ 1790 boolean[] inUse16 = new boolean[ 16 ]; 1791 for( i = 0; i < 16; i++ ) 1792 { 1793 inUse16[ i ] = false; 1794 for( j = 0; j < 16; j++ ) 1795 { 1796 if( m_inUse[ i * 16 + j ] ) 1797 { 1798 inUse16[ i ] = true; 1799 } 1800 } 1801 } 1802 1803 for( i = 0; i < 16; i++ ) 1804 { 1805 if( inUse16[ i ] ) 1806 { 1807 bsW( 1, 1 ); 1808 } 1809 else 1810 { 1811 bsW( 1, 0 ); 1812 } 1813 } 1814 1815 for( i = 0; i < 16; i++ ) 1816 { 1817 if( inUse16[ i ] ) 1818 { 1819 for( j = 0; j < 16; j++ ) 1820 { 1821 if( m_inUse[ i * 16 + j ] ) 1822 { 1823 bsW( 1, 1 ); 1824 } 1825 else 1826 { 1827 bsW( 1, 0 ); 1828 } 1829 } 1830 } 1831 } 1832 1833 } 1834 1835 /* 1836 * Now the selectors. 1837 */ 1838 bsW( 3, nGroups ); 1839 bsW( 15, nSelectors ); 1840 for( i = 0; i < nSelectors; i++ ) 1841 { 1842 for( j = 0; j < m_selectorMtf[ i ]; j++ ) 1843 { 1844 bsW( 1, 1 ); 1845 } 1846 bsW( 1, 0 ); 1847 } 1848 1849 for( t = 0; t < nGroups; t++ ) 1850 { 1851 int curr = len[ t ][ 0 ]; 1852 bsW( 5, curr ); 1853 for( i = 0; i < alphaSize; i++ ) 1854 { 1855 while( curr < len[ t ][ i ] ) 1856 { 1857 bsW( 2, 2 ); 1858 curr++; 1859 /* 1860 * 10 1861 */ 1862 } 1863 while( curr > len[ t ][ i ] ) 1864 { 1865 bsW( 2, 3 ); 1866 curr--; 1867 /* 1868 * 11 1869 */ 1870 } 1871 bsW( 1, 0 ); 1872 } 1873 } 1874 1875 /* 1876 * And finally, the block data proper 1877 */ 1878 selCtr = 0; 1879 gs = 0; 1880 while( true ) 1881 { 1882 if( gs >= m_nMTF ) 1883 { 1884 break; 1885 } 1886 ge = gs + G_SIZE - 1; 1887 if( ge >= m_nMTF ) 1888 { 1889 ge = m_nMTF - 1; 1890 } 1891 for( i = gs; i <= ge; i++ ) 1892 { 1893 bsW( len[ m_selector[ selCtr ] ][ m_szptr[ i ] ], 1894 code[ m_selector[ selCtr ] ][ m_szptr[ i ] ] ); 1895 } 1896 1897 gs = ge + 1; 1898 selCtr++; 1899 } 1900 if( !( selCtr == nSelectors ) ) 1901 { 1902 panic(); 1903 } 1904 } 1905 simpleSort( int lo, int hi, int d )1906 private void simpleSort( int lo, int hi, int d ) 1907 { 1908 int i; 1909 int j; 1910 int h; 1911 int bigN; 1912 int hp; 1913 int v; 1914 1915 bigN = hi - lo + 1; 1916 if( bigN < 2 ) 1917 { 1918 return; 1919 } 1920 1921 hp = 0; 1922 while( m_incs[ hp ] < bigN ) 1923 { 1924 hp++; 1925 } 1926 hp--; 1927 1928 for( ; hp >= 0; hp-- ) 1929 { 1930 h = m_incs[ hp ]; 1931 1932 i = lo + h; 1933 while( true ) 1934 { 1935 /* 1936 * copy 1 1937 */ 1938 if( i > hi ) 1939 { 1940 break; 1941 } 1942 v = m_zptr[ i ]; 1943 j = i; 1944 while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) 1945 { 1946 m_zptr[ j ] = m_zptr[ j - h ]; 1947 j = j - h; 1948 if( j <= ( lo + h - 1 ) ) 1949 { 1950 break; 1951 } 1952 } 1953 m_zptr[ j ] = v; 1954 i++; 1955 1956 /* 1957 * copy 2 1958 */ 1959 if( i > hi ) 1960 { 1961 break; 1962 } 1963 v = m_zptr[ i ]; 1964 j = i; 1965 while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) 1966 { 1967 m_zptr[ j ] = m_zptr[ j - h ]; 1968 j = j - h; 1969 if( j <= ( lo + h - 1 ) ) 1970 { 1971 break; 1972 } 1973 } 1974 m_zptr[ j ] = v; 1975 i++; 1976 1977 /* 1978 * copy 3 1979 */ 1980 if( i > hi ) 1981 { 1982 break; 1983 } 1984 v = m_zptr[ i ]; 1985 j = i; 1986 while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) 1987 { 1988 m_zptr[ j ] = m_zptr[ j - h ]; 1989 j = j - h; 1990 if( j <= ( lo + h - 1 ) ) 1991 { 1992 break; 1993 } 1994 } 1995 m_zptr[ j ] = v; 1996 i++; 1997 1998 if( m_workDone > m_workLimit && m_firstAttempt ) 1999 { 2000 return; 2001 } 2002 } 2003 } 2004 } 2005 vswap( int p1, int p2, int n )2006 private void vswap( int p1, int p2, int n ) 2007 { 2008 int temp = 0; 2009 while( n > 0 ) 2010 { 2011 temp = m_zptr[ p1 ]; 2012 m_zptr[ p1 ] = m_zptr[ p2 ]; 2013 m_zptr[ p2 ] = temp; 2014 p1++; 2015 p2++; 2016 n--; 2017 } 2018 } 2019 writeRun()2020 private void writeRun() 2021 throws IOException 2022 { 2023 if( m_last < m_allowableBlockSize ) 2024 { 2025 m_inUse[ m_currentChar ] = true; 2026 for( int i = 0; i < m_runLength; i++ ) 2027 { 2028 m_crc.updateCRC( (char)m_currentChar ); 2029 } 2030 switch( m_runLength ) 2031 { 2032 case 1: 2033 m_last++; 2034 m_block[ m_last + 1 ] = (char)m_currentChar; 2035 break; 2036 case 2: 2037 m_last++; 2038 m_block[ m_last + 1 ] = (char)m_currentChar; 2039 m_last++; 2040 m_block[ m_last + 1 ] = (char)m_currentChar; 2041 break; 2042 case 3: 2043 m_last++; 2044 m_block[ m_last + 1 ] = (char)m_currentChar; 2045 m_last++; 2046 m_block[ m_last + 1 ] = (char)m_currentChar; 2047 m_last++; 2048 m_block[ m_last + 1 ] = (char)m_currentChar; 2049 break; 2050 default: 2051 m_inUse[ m_runLength - 4 ] = true; 2052 m_last++; 2053 m_block[ m_last + 1 ] = (char)m_currentChar; 2054 m_last++; 2055 m_block[ m_last + 1 ] = (char)m_currentChar; 2056 m_last++; 2057 m_block[ m_last + 1 ] = (char)m_currentChar; 2058 m_last++; 2059 m_block[ m_last + 1 ] = (char)m_currentChar; 2060 m_last++; 2061 m_block[ m_last + 1 ] = (char)( m_runLength - 4 ); 2062 break; 2063 } 2064 } 2065 else 2066 { 2067 endBlock(); 2068 initBlock(); 2069 writeRun(); 2070 } 2071 } 2072 2073 private static class StackElem 2074 { 2075 int m_dd; 2076 int m_hh; 2077 int m_ll; 2078 } 2079 } 2080 2081