1 /* $Id: tif_lzw.c,v 1.57 2017-07-11 10:54:29 erouault Exp $ */ 2 3 /* 4 * Copyright (c) 1988-1997 Sam Leffler 5 * Copyright (c) 1991-1997 Silicon Graphics, Inc. 6 * 7 * Permission to use, copy, modify, distribute, and sell this software and 8 * its documentation for any purpose is hereby granted without fee, provided 9 * that (i) the above copyright notices and this permission notice appear in 10 * all copies of the software and related documentation, and (ii) the names of 11 * Sam Leffler and Silicon Graphics may not be used in any advertising or 12 * publicity relating to the software without the specific, prior written 13 * permission of Sam Leffler and Silicon Graphics. 14 * 15 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 17 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 18 * 19 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR 20 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, 21 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 22 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 23 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 24 * OF THIS SOFTWARE. 25 */ 26 27 #include <precomp.h> 28 #ifdef LZW_SUPPORT 29 /* 30 * TIFF Library. 31 * Rev 5.0 Lempel-Ziv & Welch Compression Support 32 * 33 * This code is derived from the compress program whose code is 34 * derived from software contributed to Berkeley by James A. Woods, 35 * derived from original work by Spencer Thomas and Joseph Orost. 36 * 37 * The original Berkeley copyright notice appears below in its entirety. 38 */ 39 #include "tif_predict.h" 40 41 #include <stdio.h> 42 43 /* 44 * NB: The 5.0 spec describes a different algorithm than Aldus 45 * implements. Specifically, Aldus does code length transitions 46 * one code earlier than should be done (for real LZW). 47 * Earlier versions of this library implemented the correct 48 * LZW algorithm, but emitted codes in a bit order opposite 49 * to the TIFF spec. Thus, to maintain compatibility w/ Aldus 50 * we interpret MSB-LSB ordered codes to be images written w/ 51 * old versions of this library, but otherwise adhere to the 52 * Aldus "off by one" algorithm. 53 * 54 * Future revisions to the TIFF spec are expected to "clarify this issue". 55 */ 56 #define LZW_COMPAT /* include backwards compatibility code */ 57 /* 58 * Each strip of data is supposed to be terminated by a CODE_EOI. 59 * If the following #define is included, the decoder will also 60 * check for end-of-strip w/o seeing this code. This makes the 61 * library more robust, but also slower. 62 */ 63 #define LZW_CHECKEOS /* include checks for strips w/o EOI code */ 64 65 #define MAXCODE(n) ((1L<<(n))-1) 66 /* 67 * The TIFF spec specifies that encoded bit 68 * strings range from 9 to 12 bits. 69 */ 70 #define BITS_MIN 9 /* start with 9 bits */ 71 #define BITS_MAX 12 /* max of 12 bit strings */ 72 /* predefined codes */ 73 #define CODE_CLEAR 256 /* code to clear string table */ 74 #define CODE_EOI 257 /* end-of-information code */ 75 #define CODE_FIRST 258 /* first free code entry */ 76 #define CODE_MAX MAXCODE(BITS_MAX) 77 #define HSIZE 9001L /* 91% occupancy */ 78 #define HSHIFT (13-8) 79 #ifdef LZW_COMPAT 80 /* NB: +1024 is for compatibility with old files */ 81 #define CSIZE (MAXCODE(BITS_MAX)+1024L) 82 #else 83 #define CSIZE (MAXCODE(BITS_MAX)+1L) 84 #endif 85 86 /* 87 * State block for each open TIFF file using LZW 88 * compression/decompression. Note that the predictor 89 * state block must be first in this data structure. 90 */ 91 typedef struct { 92 TIFFPredictorState predict; /* predictor super class */ 93 94 unsigned short nbits; /* # of bits/code */ 95 unsigned short maxcode; /* maximum code for lzw_nbits */ 96 unsigned short free_ent; /* next free entry in hash table */ 97 unsigned long nextdata; /* next bits of i/o */ 98 long nextbits; /* # of valid bits in lzw_nextdata */ 99 100 int rw_mode; /* preserve rw_mode from init */ 101 } LZWBaseState; 102 103 #define lzw_nbits base.nbits 104 #define lzw_maxcode base.maxcode 105 #define lzw_free_ent base.free_ent 106 #define lzw_nextdata base.nextdata 107 #define lzw_nextbits base.nextbits 108 109 /* 110 * Encoding-specific state. 111 */ 112 typedef uint16 hcode_t; /* codes fit in 16 bits */ 113 typedef struct { 114 long hash; 115 hcode_t code; 116 } hash_t; 117 118 /* 119 * Decoding-specific state. 120 */ 121 typedef struct code_ent { 122 struct code_ent *next; 123 unsigned short length; /* string len, including this token */ 124 unsigned char value; /* data value */ 125 unsigned char firstchar; /* first token of string */ 126 } code_t; 127 128 typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16); 129 130 typedef struct { 131 LZWBaseState base; 132 133 /* Decoding specific data */ 134 long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ 135 long dec_restart; /* restart count */ 136 #ifdef LZW_CHECKEOS 137 uint64 dec_bitsleft; /* available bits in raw data */ 138 #endif 139 decodeFunc dec_decode; /* regular or backwards compatible */ 140 code_t* dec_codep; /* current recognized code */ 141 code_t* dec_oldcodep; /* previously recognized code */ 142 code_t* dec_free_entp; /* next free entry */ 143 code_t* dec_maxcodep; /* max available entry */ 144 code_t* dec_codetab; /* kept separate for small machines */ 145 146 /* Encoding specific data */ 147 int enc_oldcode; /* last code encountered */ 148 long enc_checkpoint; /* point at which to clear table */ 149 #define CHECK_GAP 10000 /* enc_ratio check interval */ 150 long enc_ratio; /* current compression ratio */ 151 long enc_incount; /* (input) data bytes encoded */ 152 long enc_outcount; /* encoded (output) bytes */ 153 uint8* enc_rawlimit; /* bound on tif_rawdata buffer */ 154 hash_t* enc_hashtab; /* kept separate for small machines */ 155 } LZWCodecState; 156 157 #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) 158 #define DecoderState(tif) ((LZWCodecState*) LZWState(tif)) 159 #define EncoderState(tif) ((LZWCodecState*) LZWState(tif)) 160 161 static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 162 #ifdef LZW_COMPAT 163 static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 164 #endif 165 static void cl_hash(LZWCodecState*); 166 167 /* 168 * LZW Decoder. 169 */ 170 171 #ifdef LZW_CHECKEOS 172 /* 173 * This check shouldn't be necessary because each 174 * strip is suppose to be terminated with CODE_EOI. 175 */ 176 #define NextCode(_tif, _sp, _bp, _code, _get) { \ 177 if ((_sp)->dec_bitsleft < (uint64)nbits) { \ 178 TIFFWarningExt(_tif->tif_clientdata, module, \ 179 "LZWDecode: Strip %d not terminated with EOI code", \ 180 _tif->tif_curstrip); \ 181 _code = CODE_EOI; \ 182 } else { \ 183 _get(_sp,_bp,_code); \ 184 (_sp)->dec_bitsleft -= nbits; \ 185 } \ 186 } 187 #else 188 #define NextCode(tif, sp, bp, code, get) get(sp, bp, code) 189 #endif 190 191 static int 192 LZWFixupTags(TIFF* tif) 193 { 194 (void) tif; 195 return (1); 196 } 197 198 static int 199 LZWSetupDecode(TIFF* tif) 200 { 201 static const char module[] = "LZWSetupDecode"; 202 LZWCodecState* sp = DecoderState(tif); 203 int code; 204 205 if( sp == NULL ) 206 { 207 /* 208 * Allocate state block so tag methods have storage to record 209 * values. 210 */ 211 tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState)); 212 if (tif->tif_data == NULL) 213 { 214 TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block"); 215 return (0); 216 } 217 218 DecoderState(tif)->dec_codetab = NULL; 219 DecoderState(tif)->dec_decode = NULL; 220 221 /* 222 * Setup predictor setup. 223 */ 224 (void) TIFFPredictorInit(tif); 225 226 sp = DecoderState(tif); 227 } 228 229 assert(sp != NULL); 230 231 if (sp->dec_codetab == NULL) { 232 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); 233 if (sp->dec_codetab == NULL) { 234 TIFFErrorExt(tif->tif_clientdata, module, 235 "No space for LZW code table"); 236 return (0); 237 } 238 /* 239 * Pre-load the table. 240 */ 241 code = 255; 242 do { 243 sp->dec_codetab[code].value = (unsigned char)code; 244 sp->dec_codetab[code].firstchar = (unsigned char)code; 245 sp->dec_codetab[code].length = 1; 246 sp->dec_codetab[code].next = NULL; 247 } while (code--); 248 /* 249 * Zero-out the unused entries 250 */ 251 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0, 252 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t)); 253 } 254 return (1); 255 } 256 257 /* 258 * Setup state for decoding a strip. 259 */ 260 static int 261 LZWPreDecode(TIFF* tif, uint16 s) 262 { 263 static const char module[] = "LZWPreDecode"; 264 LZWCodecState *sp = DecoderState(tif); 265 266 (void) s; 267 assert(sp != NULL); 268 if( sp->dec_codetab == NULL ) 269 { 270 tif->tif_setupdecode( tif ); 271 if( sp->dec_codetab == NULL ) 272 return (0); 273 } 274 275 /* 276 * Check for old bit-reversed codes. 277 */ 278 if (tif->tif_rawcc >= 2 && 279 tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { 280 #ifdef LZW_COMPAT 281 if (!sp->dec_decode) { 282 TIFFWarningExt(tif->tif_clientdata, module, 283 "Old-style LZW codes, convert file"); 284 /* 285 * Override default decoding methods with 286 * ones that deal with the old coding. 287 * Otherwise the predictor versions set 288 * above will call the compatibility routines 289 * through the dec_decode method. 290 */ 291 tif->tif_decoderow = LZWDecodeCompat; 292 tif->tif_decodestrip = LZWDecodeCompat; 293 tif->tif_decodetile = LZWDecodeCompat; 294 /* 295 * If doing horizontal differencing, must 296 * re-setup the predictor logic since we 297 * switched the basic decoder methods... 298 */ 299 (*tif->tif_setupdecode)(tif); 300 sp->dec_decode = LZWDecodeCompat; 301 } 302 sp->lzw_maxcode = MAXCODE(BITS_MIN); 303 #else /* !LZW_COMPAT */ 304 if (!sp->dec_decode) { 305 TIFFErrorExt(tif->tif_clientdata, module, 306 "Old-style LZW codes not supported"); 307 sp->dec_decode = LZWDecode; 308 } 309 return (0); 310 #endif/* !LZW_COMPAT */ 311 } else { 312 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; 313 sp->dec_decode = LZWDecode; 314 } 315 sp->lzw_nbits = BITS_MIN; 316 sp->lzw_nextbits = 0; 317 sp->lzw_nextdata = 0; 318 319 sp->dec_restart = 0; 320 sp->dec_nbitsmask = MAXCODE(BITS_MIN); 321 #ifdef LZW_CHECKEOS 322 sp->dec_bitsleft = 0; 323 #endif 324 sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; 325 /* 326 * Zero entries that are not yet filled in. We do 327 * this to guard against bogus input data that causes 328 * us to index into undefined entries. If you can 329 * come up with a way to safely bounds-check input codes 330 * while decoding then you can remove this operation. 331 */ 332 _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); 333 sp->dec_oldcodep = &sp->dec_codetab[-1]; 334 sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; 335 return (1); 336 } 337 338 /* 339 * Decode a "hunk of data". 340 */ 341 #define GetNextCode(sp, bp, code) { \ 342 nextdata = (nextdata<<8) | *(bp)++; \ 343 nextbits += 8; \ 344 if (nextbits < nbits) { \ 345 nextdata = (nextdata<<8) | *(bp)++; \ 346 nextbits += 8; \ 347 } \ 348 code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ 349 nextbits -= nbits; \ 350 } 351 352 static void 353 codeLoop(TIFF* tif, const char* module) 354 { 355 TIFFErrorExt(tif->tif_clientdata, module, 356 "Bogus encoding, loop in the code table; scanline %d", 357 tif->tif_row); 358 } 359 360 static int 361 LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 362 { 363 static const char module[] = "LZWDecode"; 364 LZWCodecState *sp = DecoderState(tif); 365 char *op = (char*) op0; 366 long occ = (long) occ0; 367 char *tp; 368 unsigned char *bp; 369 hcode_t code; 370 int len; 371 long nbits, nextbits, nbitsmask; 372 unsigned long nextdata; 373 code_t *codep, *free_entp, *maxcodep, *oldcodep; 374 375 (void) s; 376 assert(sp != NULL); 377 assert(sp->dec_codetab != NULL); 378 379 /* 380 Fail if value does not fit in long. 381 */ 382 if ((tmsize_t) occ != occ0) 383 return (0); 384 /* 385 * Restart interrupted output operation. 386 */ 387 if (sp->dec_restart) { 388 long residue; 389 390 codep = sp->dec_codep; 391 residue = codep->length - sp->dec_restart; 392 if (residue > occ) { 393 /* 394 * Residue from previous decode is sufficient 395 * to satisfy decode request. Skip to the 396 * start of the decoded string, place decoded 397 * values in the output buffer, and return. 398 */ 399 sp->dec_restart += occ; 400 do { 401 codep = codep->next; 402 } while (--residue > occ && codep); 403 if (codep) { 404 tp = op + occ; 405 do { 406 *--tp = codep->value; 407 codep = codep->next; 408 } while (--occ && codep); 409 } 410 return (1); 411 } 412 /* 413 * Residue satisfies only part of the decode request. 414 */ 415 op += residue; 416 occ -= residue; 417 tp = op; 418 do { 419 int t; 420 --tp; 421 t = codep->value; 422 codep = codep->next; 423 *tp = (char)t; 424 } while (--residue && codep); 425 sp->dec_restart = 0; 426 } 427 428 bp = (unsigned char *)tif->tif_rawcp; 429 #ifdef LZW_CHECKEOS 430 sp->dec_bitsleft = (((uint64)tif->tif_rawcc) << 3); 431 #endif 432 nbits = sp->lzw_nbits; 433 nextdata = sp->lzw_nextdata; 434 nextbits = sp->lzw_nextbits; 435 nbitsmask = sp->dec_nbitsmask; 436 oldcodep = sp->dec_oldcodep; 437 free_entp = sp->dec_free_entp; 438 maxcodep = sp->dec_maxcodep; 439 440 while (occ > 0) { 441 NextCode(tif, sp, bp, code, GetNextCode); 442 if (code == CODE_EOI) 443 break; 444 if (code == CODE_CLEAR) { 445 do { 446 free_entp = sp->dec_codetab + CODE_FIRST; 447 _TIFFmemset(free_entp, 0, 448 (CSIZE - CODE_FIRST) * sizeof (code_t)); 449 nbits = BITS_MIN; 450 nbitsmask = MAXCODE(BITS_MIN); 451 maxcodep = sp->dec_codetab + nbitsmask-1; 452 NextCode(tif, sp, bp, code, GetNextCode); 453 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 454 if (code == CODE_EOI) 455 break; 456 if (code > CODE_CLEAR) { 457 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 458 "LZWDecode: Corrupted LZW table at scanline %d", 459 tif->tif_row); 460 return (0); 461 } 462 *op++ = (char)code; 463 occ--; 464 oldcodep = sp->dec_codetab + code; 465 continue; 466 } 467 codep = sp->dec_codetab + code; 468 469 /* 470 * Add the new entry to the code table. 471 */ 472 if (free_entp < &sp->dec_codetab[0] || 473 free_entp >= &sp->dec_codetab[CSIZE]) { 474 TIFFErrorExt(tif->tif_clientdata, module, 475 "Corrupted LZW table at scanline %d", 476 tif->tif_row); 477 return (0); 478 } 479 480 free_entp->next = oldcodep; 481 if (free_entp->next < &sp->dec_codetab[0] || 482 free_entp->next >= &sp->dec_codetab[CSIZE]) { 483 TIFFErrorExt(tif->tif_clientdata, module, 484 "Corrupted LZW table at scanline %d", 485 tif->tif_row); 486 return (0); 487 } 488 free_entp->firstchar = free_entp->next->firstchar; 489 free_entp->length = free_entp->next->length+1; 490 free_entp->value = (codep < free_entp) ? 491 codep->firstchar : free_entp->firstchar; 492 if (++free_entp > maxcodep) { 493 if (++nbits > BITS_MAX) /* should not happen */ 494 nbits = BITS_MAX; 495 nbitsmask = MAXCODE(nbits); 496 maxcodep = sp->dec_codetab + nbitsmask-1; 497 } 498 oldcodep = codep; 499 if (code >= 256) { 500 /* 501 * Code maps to a string, copy string 502 * value to output (written in reverse). 503 */ 504 if(codep->length == 0) { 505 TIFFErrorExt(tif->tif_clientdata, module, 506 "Wrong length of decoded string: " 507 "data probably corrupted at scanline %d", 508 tif->tif_row); 509 return (0); 510 } 511 if (codep->length > occ) { 512 /* 513 * String is too long for decode buffer, 514 * locate portion that will fit, copy to 515 * the decode buffer, and setup restart 516 * logic for the next decoding call. 517 */ 518 sp->dec_codep = codep; 519 do { 520 codep = codep->next; 521 } while (codep && codep->length > occ); 522 if (codep) { 523 sp->dec_restart = (long)occ; 524 tp = op + occ; 525 do { 526 *--tp = codep->value; 527 codep = codep->next; 528 } while (--occ && codep); 529 if (codep) 530 codeLoop(tif, module); 531 } 532 break; 533 } 534 len = codep->length; 535 tp = op + len; 536 do { 537 int t; 538 --tp; 539 t = codep->value; 540 codep = codep->next; 541 *tp = (char)t; 542 } while (codep && tp > op); 543 if (codep) { 544 codeLoop(tif, module); 545 break; 546 } 547 assert(occ >= len); 548 op += len; 549 occ -= len; 550 } else { 551 *op++ = (char)code; 552 occ--; 553 } 554 } 555 556 tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp ); 557 tif->tif_rawcp = (uint8*) bp; 558 sp->lzw_nbits = (unsigned short) nbits; 559 sp->lzw_nextdata = nextdata; 560 sp->lzw_nextbits = nextbits; 561 sp->dec_nbitsmask = nbitsmask; 562 sp->dec_oldcodep = oldcodep; 563 sp->dec_free_entp = free_entp; 564 sp->dec_maxcodep = maxcodep; 565 566 if (occ > 0) { 567 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 568 TIFFErrorExt(tif->tif_clientdata, module, 569 "Not enough data at scanline %d (short %I64d bytes)", 570 tif->tif_row, (unsigned __int64) occ); 571 #else 572 TIFFErrorExt(tif->tif_clientdata, module, 573 "Not enough data at scanline %d (short %llu bytes)", 574 tif->tif_row, (unsigned long long) occ); 575 #endif 576 return (0); 577 } 578 return (1); 579 } 580 581 #ifdef LZW_COMPAT 582 /* 583 * Decode a "hunk of data" for old images. 584 */ 585 #define GetNextCodeCompat(sp, bp, code) { \ 586 nextdata |= (unsigned long) *(bp)++ << nextbits; \ 587 nextbits += 8; \ 588 if (nextbits < nbits) { \ 589 nextdata |= (unsigned long) *(bp)++ << nextbits;\ 590 nextbits += 8; \ 591 } \ 592 code = (hcode_t)(nextdata & nbitsmask); \ 593 nextdata >>= nbits; \ 594 nextbits -= nbits; \ 595 } 596 597 static int 598 LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 599 { 600 static const char module[] = "LZWDecodeCompat"; 601 LZWCodecState *sp = DecoderState(tif); 602 char *op = (char*) op0; 603 long occ = (long) occ0; 604 char *tp; 605 unsigned char *bp; 606 int code, nbits; 607 long nextbits, nextdata, nbitsmask; 608 code_t *codep, *free_entp, *maxcodep, *oldcodep; 609 610 (void) s; 611 assert(sp != NULL); 612 613 /* 614 Fail if value does not fit in long. 615 */ 616 if ((tmsize_t) occ != occ0) 617 return (0); 618 619 /* 620 * Restart interrupted output operation. 621 */ 622 if (sp->dec_restart) { 623 long residue; 624 625 codep = sp->dec_codep; 626 residue = codep->length - sp->dec_restart; 627 if (residue > occ) { 628 /* 629 * Residue from previous decode is sufficient 630 * to satisfy decode request. Skip to the 631 * start of the decoded string, place decoded 632 * values in the output buffer, and return. 633 */ 634 sp->dec_restart += occ; 635 do { 636 codep = codep->next; 637 } while (--residue > occ); 638 tp = op + occ; 639 do { 640 *--tp = codep->value; 641 codep = codep->next; 642 } while (--occ); 643 return (1); 644 } 645 /* 646 * Residue satisfies only part of the decode request. 647 */ 648 op += residue; 649 occ -= residue; 650 tp = op; 651 do { 652 *--tp = codep->value; 653 codep = codep->next; 654 } while (--residue); 655 sp->dec_restart = 0; 656 } 657 658 bp = (unsigned char *)tif->tif_rawcp; 659 #ifdef LZW_CHECKEOS 660 sp->dec_bitsleft = (((uint64)tif->tif_rawcc) << 3); 661 #endif 662 nbits = sp->lzw_nbits; 663 nextdata = sp->lzw_nextdata; 664 nextbits = sp->lzw_nextbits; 665 nbitsmask = sp->dec_nbitsmask; 666 oldcodep = sp->dec_oldcodep; 667 free_entp = sp->dec_free_entp; 668 maxcodep = sp->dec_maxcodep; 669 670 while (occ > 0) { 671 NextCode(tif, sp, bp, code, GetNextCodeCompat); 672 if (code == CODE_EOI) 673 break; 674 if (code == CODE_CLEAR) { 675 do { 676 free_entp = sp->dec_codetab + CODE_FIRST; 677 _TIFFmemset(free_entp, 0, 678 (CSIZE - CODE_FIRST) * sizeof (code_t)); 679 nbits = BITS_MIN; 680 nbitsmask = MAXCODE(BITS_MIN); 681 maxcodep = sp->dec_codetab + nbitsmask; 682 NextCode(tif, sp, bp, code, GetNextCodeCompat); 683 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 684 if (code == CODE_EOI) 685 break; 686 if (code > CODE_CLEAR) { 687 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 688 "LZWDecode: Corrupted LZW table at scanline %d", 689 tif->tif_row); 690 return (0); 691 } 692 *op++ = (char)code; 693 occ--; 694 oldcodep = sp->dec_codetab + code; 695 continue; 696 } 697 codep = sp->dec_codetab + code; 698 699 /* 700 * Add the new entry to the code table. 701 */ 702 if (free_entp < &sp->dec_codetab[0] || 703 free_entp >= &sp->dec_codetab[CSIZE]) { 704 TIFFErrorExt(tif->tif_clientdata, module, 705 "Corrupted LZW table at scanline %d", tif->tif_row); 706 return (0); 707 } 708 709 free_entp->next = oldcodep; 710 if (free_entp->next < &sp->dec_codetab[0] || 711 free_entp->next >= &sp->dec_codetab[CSIZE]) { 712 TIFFErrorExt(tif->tif_clientdata, module, 713 "Corrupted LZW table at scanline %d", tif->tif_row); 714 return (0); 715 } 716 free_entp->firstchar = free_entp->next->firstchar; 717 free_entp->length = free_entp->next->length+1; 718 free_entp->value = (codep < free_entp) ? 719 codep->firstchar : free_entp->firstchar; 720 if (++free_entp > maxcodep) { 721 if (++nbits > BITS_MAX) /* should not happen */ 722 nbits = BITS_MAX; 723 nbitsmask = MAXCODE(nbits); 724 maxcodep = sp->dec_codetab + nbitsmask; 725 } 726 oldcodep = codep; 727 if (code >= 256) { 728 /* 729 * Code maps to a string, copy string 730 * value to output (written in reverse). 731 */ 732 if(codep->length == 0) { 733 TIFFErrorExt(tif->tif_clientdata, module, 734 "Wrong length of decoded " 735 "string: data probably corrupted at scanline %d", 736 tif->tif_row); 737 return (0); 738 } 739 if (codep->length > occ) { 740 /* 741 * String is too long for decode buffer, 742 * locate portion that will fit, copy to 743 * the decode buffer, and setup restart 744 * logic for the next decoding call. 745 */ 746 sp->dec_codep = codep; 747 do { 748 codep = codep->next; 749 } while (codep->length > occ); 750 sp->dec_restart = occ; 751 tp = op + occ; 752 do { 753 *--tp = codep->value; 754 codep = codep->next; 755 } while (--occ); 756 break; 757 } 758 assert(occ >= codep->length); 759 op += codep->length; 760 occ -= codep->length; 761 tp = op; 762 do { 763 *--tp = codep->value; 764 } while( (codep = codep->next) != NULL ); 765 } else { 766 *op++ = (char)code; 767 occ--; 768 } 769 } 770 771 tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp ); 772 tif->tif_rawcp = (uint8*) bp; 773 sp->lzw_nbits = (unsigned short)nbits; 774 sp->lzw_nextdata = nextdata; 775 sp->lzw_nextbits = nextbits; 776 sp->dec_nbitsmask = nbitsmask; 777 sp->dec_oldcodep = oldcodep; 778 sp->dec_free_entp = free_entp; 779 sp->dec_maxcodep = maxcodep; 780 781 if (occ > 0) { 782 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 783 TIFFErrorExt(tif->tif_clientdata, module, 784 "Not enough data at scanline %d (short %I64d bytes)", 785 tif->tif_row, (unsigned __int64) occ); 786 #else 787 TIFFErrorExt(tif->tif_clientdata, module, 788 "Not enough data at scanline %d (short %llu bytes)", 789 tif->tif_row, (unsigned long long) occ); 790 #endif 791 return (0); 792 } 793 return (1); 794 } 795 #endif /* LZW_COMPAT */ 796 797 /* 798 * LZW Encoding. 799 */ 800 801 static int 802 LZWSetupEncode(TIFF* tif) 803 { 804 static const char module[] = "LZWSetupEncode"; 805 LZWCodecState* sp = EncoderState(tif); 806 807 assert(sp != NULL); 808 sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); 809 if (sp->enc_hashtab == NULL) { 810 TIFFErrorExt(tif->tif_clientdata, module, 811 "No space for LZW hash table"); 812 return (0); 813 } 814 return (1); 815 } 816 817 /* 818 * Reset encoding state at the start of a strip. 819 */ 820 static int 821 LZWPreEncode(TIFF* tif, uint16 s) 822 { 823 LZWCodecState *sp = EncoderState(tif); 824 825 (void) s; 826 assert(sp != NULL); 827 828 if( sp->enc_hashtab == NULL ) 829 { 830 tif->tif_setupencode( tif ); 831 } 832 833 sp->lzw_nbits = BITS_MIN; 834 sp->lzw_maxcode = MAXCODE(BITS_MIN); 835 sp->lzw_free_ent = CODE_FIRST; 836 sp->lzw_nextbits = 0; 837 sp->lzw_nextdata = 0; 838 sp->enc_checkpoint = CHECK_GAP; 839 sp->enc_ratio = 0; 840 sp->enc_incount = 0; 841 sp->enc_outcount = 0; 842 /* 843 * The 4 here insures there is space for 2 max-sized 844 * codes in LZWEncode and LZWPostDecode. 845 */ 846 sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; 847 cl_hash(sp); /* clear hash table */ 848 sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ 849 return (1); 850 } 851 852 #define CALCRATIO(sp, rat) { \ 853 if (incount > 0x007fffff) { /* NB: shift will overflow */\ 854 rat = outcount >> 8; \ 855 rat = (rat == 0 ? 0x7fffffff : incount/rat); \ 856 } else \ 857 rat = (incount<<8) / outcount; \ 858 } 859 860 /* Explicit 0xff masking to make icc -check=conversions happy */ 861 #define PutNextCode(op, c) { \ 862 nextdata = (nextdata << nbits) | c; \ 863 nextbits += nbits; \ 864 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 865 nextbits -= 8; \ 866 if (nextbits >= 8) { \ 867 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 868 nextbits -= 8; \ 869 } \ 870 outcount += nbits; \ 871 } 872 873 /* 874 * Encode a chunk of pixels. 875 * 876 * Uses an open addressing double hashing (no chaining) on the 877 * prefix code/next character combination. We do a variant of 878 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's 879 * relatively-prime secondary probe. Here, the modular division 880 * first probe is gives way to a faster exclusive-or manipulation. 881 * Also do block compression with an adaptive reset, whereby the 882 * code table is cleared when the compression ratio decreases, 883 * but after the table fills. The variable-length output codes 884 * are re-sized at this point, and a CODE_CLEAR is generated 885 * for the decoder. 886 */ 887 static int 888 LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) 889 { 890 register LZWCodecState *sp = EncoderState(tif); 891 register long fcode; 892 register hash_t *hp; 893 register int h, c; 894 hcode_t ent; 895 long disp; 896 long incount, outcount, checkpoint; 897 unsigned long nextdata; 898 long nextbits; 899 int free_ent, maxcode, nbits; 900 uint8* op; 901 uint8* limit; 902 903 (void) s; 904 if (sp == NULL) 905 return (0); 906 907 assert(sp->enc_hashtab != NULL); 908 909 /* 910 * Load local state. 911 */ 912 incount = sp->enc_incount; 913 outcount = sp->enc_outcount; 914 checkpoint = sp->enc_checkpoint; 915 nextdata = sp->lzw_nextdata; 916 nextbits = sp->lzw_nextbits; 917 free_ent = sp->lzw_free_ent; 918 maxcode = sp->lzw_maxcode; 919 nbits = sp->lzw_nbits; 920 op = tif->tif_rawcp; 921 limit = sp->enc_rawlimit; 922 ent = (hcode_t)sp->enc_oldcode; 923 924 if (ent == (hcode_t) -1 && cc > 0) { 925 /* 926 * NB: This is safe because it can only happen 927 * at the start of a strip where we know there 928 * is space in the data buffer. 929 */ 930 PutNextCode(op, CODE_CLEAR); 931 ent = *bp++; cc--; incount++; 932 } 933 while (cc > 0) { 934 c = *bp++; cc--; incount++; 935 fcode = ((long)c << BITS_MAX) + ent; 936 h = (c << HSHIFT) ^ ent; /* xor hashing */ 937 #ifdef _WINDOWS 938 /* 939 * Check hash index for an overflow. 940 */ 941 if (h >= HSIZE) 942 h -= HSIZE; 943 #endif 944 hp = &sp->enc_hashtab[h]; 945 if (hp->hash == fcode) { 946 ent = hp->code; 947 continue; 948 } 949 if (hp->hash >= 0) { 950 /* 951 * Primary hash failed, check secondary hash. 952 */ 953 disp = HSIZE - h; 954 if (h == 0) 955 disp = 1; 956 do { 957 /* 958 * Avoid pointer arithmetic because of 959 * wraparound problems with segments. 960 */ 961 if ((h -= disp) < 0) 962 h += HSIZE; 963 hp = &sp->enc_hashtab[h]; 964 if (hp->hash == fcode) { 965 ent = hp->code; 966 goto hit; 967 } 968 } while (hp->hash >= 0); 969 } 970 /* 971 * New entry, emit code and add to table. 972 */ 973 /* 974 * Verify there is space in the buffer for the code 975 * and any potential Clear code that might be emitted 976 * below. The value of limit is setup so that there 977 * are at least 4 bytes free--room for 2 codes. 978 */ 979 if (op > limit) { 980 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 981 if( !TIFFFlushData1(tif) ) 982 return 0; 983 op = tif->tif_rawdata; 984 } 985 PutNextCode(op, ent); 986 ent = (hcode_t)c; 987 hp->code = (hcode_t)(free_ent++); 988 hp->hash = fcode; 989 if (free_ent == CODE_MAX-1) { 990 /* table is full, emit clear code and reset */ 991 cl_hash(sp); 992 sp->enc_ratio = 0; 993 incount = 0; 994 outcount = 0; 995 free_ent = CODE_FIRST; 996 PutNextCode(op, CODE_CLEAR); 997 nbits = BITS_MIN; 998 maxcode = MAXCODE(BITS_MIN); 999 } else { 1000 /* 1001 * If the next entry is going to be too big for 1002 * the code size, then increase it, if possible. 1003 */ 1004 if (free_ent > maxcode) { 1005 nbits++; 1006 assert(nbits <= BITS_MAX); 1007 maxcode = (int) MAXCODE(nbits); 1008 } else if (incount >= checkpoint) { 1009 long rat; 1010 /* 1011 * Check compression ratio and, if things seem 1012 * to be slipping, clear the hash table and 1013 * reset state. The compression ratio is a 1014 * 24+8-bit fractional number. 1015 */ 1016 checkpoint = incount+CHECK_GAP; 1017 CALCRATIO(sp, rat); 1018 if (rat <= sp->enc_ratio) { 1019 cl_hash(sp); 1020 sp->enc_ratio = 0; 1021 incount = 0; 1022 outcount = 0; 1023 free_ent = CODE_FIRST; 1024 PutNextCode(op, CODE_CLEAR); 1025 nbits = BITS_MIN; 1026 maxcode = MAXCODE(BITS_MIN); 1027 } else 1028 sp->enc_ratio = rat; 1029 } 1030 } 1031 hit: 1032 ; 1033 } 1034 1035 /* 1036 * Restore global state. 1037 */ 1038 sp->enc_incount = incount; 1039 sp->enc_outcount = outcount; 1040 sp->enc_checkpoint = checkpoint; 1041 sp->enc_oldcode = ent; 1042 sp->lzw_nextdata = nextdata; 1043 sp->lzw_nextbits = nextbits; 1044 sp->lzw_free_ent = (unsigned short)free_ent; 1045 sp->lzw_maxcode = (unsigned short)maxcode; 1046 sp->lzw_nbits = (unsigned short)nbits; 1047 tif->tif_rawcp = op; 1048 return (1); 1049 } 1050 1051 /* 1052 * Finish off an encoded strip by flushing the last 1053 * string and tacking on an End Of Information code. 1054 */ 1055 static int 1056 LZWPostEncode(TIFF* tif) 1057 { 1058 register LZWCodecState *sp = EncoderState(tif); 1059 uint8* op = tif->tif_rawcp; 1060 long nextbits = sp->lzw_nextbits; 1061 unsigned long nextdata = sp->lzw_nextdata; 1062 long outcount = sp->enc_outcount; 1063 int nbits = sp->lzw_nbits; 1064 1065 if (op > sp->enc_rawlimit) { 1066 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1067 if( !TIFFFlushData1(tif) ) 1068 return 0; 1069 op = tif->tif_rawdata; 1070 } 1071 if (sp->enc_oldcode != (hcode_t) -1) { 1072 int free_ent = sp->lzw_free_ent; 1073 1074 PutNextCode(op, sp->enc_oldcode); 1075 sp->enc_oldcode = (hcode_t) -1; 1076 free_ent ++; 1077 1078 if (free_ent == CODE_MAX-1) { 1079 /* table is full, emit clear code and reset */ 1080 outcount = 0; 1081 PutNextCode(op, CODE_CLEAR); 1082 nbits = BITS_MIN; 1083 } else { 1084 /* 1085 * If the next entry is going to be too big for 1086 * the code size, then increase it, if possible. 1087 */ 1088 if (free_ent > sp->lzw_maxcode) { 1089 nbits++; 1090 assert(nbits <= BITS_MAX); 1091 } 1092 } 1093 } 1094 PutNextCode(op, CODE_EOI); 1095 /* Explicit 0xff masking to make icc -check=conversions happy */ 1096 if (nextbits > 0) 1097 *op++ = (unsigned char)((nextdata << (8-nextbits))&0xff); 1098 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1099 return (1); 1100 } 1101 1102 /* 1103 * Reset encoding hash table. 1104 */ 1105 static void 1106 cl_hash(LZWCodecState* sp) 1107 { 1108 register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; 1109 register long i = HSIZE-8; 1110 1111 do { 1112 i -= 8; 1113 hp[-7].hash = -1; 1114 hp[-6].hash = -1; 1115 hp[-5].hash = -1; 1116 hp[-4].hash = -1; 1117 hp[-3].hash = -1; 1118 hp[-2].hash = -1; 1119 hp[-1].hash = -1; 1120 hp[ 0].hash = -1; 1121 hp -= 8; 1122 } while (i >= 0); 1123 for (i += 8; i > 0; i--, hp--) 1124 hp->hash = -1; 1125 } 1126 1127 static void 1128 LZWCleanup(TIFF* tif) 1129 { 1130 (void)TIFFPredictorCleanup(tif); 1131 1132 assert(tif->tif_data != 0); 1133 1134 if (DecoderState(tif)->dec_codetab) 1135 _TIFFfree(DecoderState(tif)->dec_codetab); 1136 1137 if (EncoderState(tif)->enc_hashtab) 1138 _TIFFfree(EncoderState(tif)->enc_hashtab); 1139 1140 _TIFFfree(tif->tif_data); 1141 tif->tif_data = NULL; 1142 1143 _TIFFSetDefaultCompressionState(tif); 1144 } 1145 1146 int 1147 TIFFInitLZW(TIFF* tif, int scheme) 1148 { 1149 static const char module[] = "TIFFInitLZW"; 1150 assert(scheme == COMPRESSION_LZW); 1151 /* 1152 * Allocate state block so tag methods have storage to record values. 1153 */ 1154 tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState)); 1155 if (tif->tif_data == NULL) 1156 goto bad; 1157 DecoderState(tif)->dec_codetab = NULL; 1158 DecoderState(tif)->dec_decode = NULL; 1159 EncoderState(tif)->enc_hashtab = NULL; 1160 LZWState(tif)->rw_mode = tif->tif_mode; 1161 1162 /* 1163 * Install codec methods. 1164 */ 1165 tif->tif_fixuptags = LZWFixupTags; 1166 tif->tif_setupdecode = LZWSetupDecode; 1167 tif->tif_predecode = LZWPreDecode; 1168 tif->tif_decoderow = LZWDecode; 1169 tif->tif_decodestrip = LZWDecode; 1170 tif->tif_decodetile = LZWDecode; 1171 tif->tif_setupencode = LZWSetupEncode; 1172 tif->tif_preencode = LZWPreEncode; 1173 tif->tif_postencode = LZWPostEncode; 1174 tif->tif_encoderow = LZWEncode; 1175 tif->tif_encodestrip = LZWEncode; 1176 tif->tif_encodetile = LZWEncode; 1177 tif->tif_cleanup = LZWCleanup; 1178 /* 1179 * Setup predictor setup. 1180 */ 1181 (void) TIFFPredictorInit(tif); 1182 return (1); 1183 bad: 1184 TIFFErrorExt(tif->tif_clientdata, module, 1185 "No space for LZW state block"); 1186 return (0); 1187 } 1188 1189 /* 1190 * Copyright (c) 1985, 1986 The Regents of the University of California. 1191 * All rights reserved. 1192 * 1193 * This code is derived from software contributed to Berkeley by 1194 * James A. Woods, derived from original work by Spencer Thomas 1195 * and Joseph Orost. 1196 * 1197 * Redistribution and use in source and binary forms are permitted 1198 * provided that the above copyright notice and this paragraph are 1199 * duplicated in all such forms and that any documentation, 1200 * advertising materials, and other materials related to such 1201 * distribution and use acknowledge that the software was developed 1202 * by the University of California, Berkeley. The name of the 1203 * University may not be used to endorse or promote products derived 1204 * from this software without specific prior written permission. 1205 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1206 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1207 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1208 */ 1209 #endif /* LZW_SUPPORT */ 1210 1211 /* vim: set ts=8 sts=8 sw=8 noet: */ 1212 /* 1213 * Local Variables: 1214 * mode: c 1215 * c-basic-offset: 8 1216 * fill-column: 78 1217 * End: 1218 */ 1219