1 /*
2   Copyright (c) 1990-2008 Info-ZIP.  All rights reserved.
3 
4   See the accompanying file LICENSE, version 2000-Apr-09 or later
5   (the contents of which are also included in unzip.h) for terms of use.
6   If, for some reason, all these files are missing, the Info-ZIP license
7   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8 */
9 /*---------------------------------------------------------------------------
10 
11   unshrink.c                     version 1.22                     19 Mar 2008
12 
13 
14        NOTE:  This code may or may not infringe on the so-called "Welch
15        patent" owned by Unisys.  (From reading the patent, it appears
16        that a pure LZW decompressor is *not* covered, but this claim has
17        not been tested in court, and Unisys is reported to believe other-
18        wise.)  It is therefore the responsibility of the user to acquire
19        whatever license(s) may be required for legal use of this code.
20 
21        THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
22        IN VIOLATION OF APPLICABLE PATENT LAW.
23 
24 
25   Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
26   up to 13 bits; in addition, there is provision for partial clearing of
27   leaf nodes.  PKWARE uses the special code 256 (decimal) to indicate a
28   change in code size or a partial clear of the code tree:  256,1 for the
29   former and 256,2 for the latter.  [Note that partial clearing can "orphan"
30   nodes:  the parent-to-be can be cleared before its new child is added,
31   but the child is added anyway (as an orphan, as though the parent still
32   existed).  When the tree fills up to the point where the parent node is
33   reused, the orphan is effectively "adopted."  Versions prior to 1.05 were
34   affected more due to greater use of pointers (to children and siblings
35   as well as parents).]
36 
37   This replacement version of unshrink.c was written from scratch.  It is
38   based only on the algorithms described in Mark Nelson's _The Data Compres-
39   sion Book_ and in Terry Welch's original paper in the June 1984 issue of
40   IEEE _Computer_; no existing source code, including any in Nelson's book,
41   was used.
42 
43   Memory requirements have been reduced in this version and are now no more
44   than the original Sam Smith code.  This is still larger than any of the
45   other algorithms:  at a minimum, 8K+8K+16K (stack+values+parents) assuming
46   16-bit short ints, and this does not even include the output buffer (the
47   other algorithms leave the uncompressed data in the work area, typically
48   called slide[]).  For machines with a 64KB data space this is a problem,
49   particularly when text conversion is required and line endings have more
50   than one character.  UnZip's solution is to use two roughly equal halves
51   of outbuf for the ASCII conversion in such a case; the "unshrink" argument
52   to flush() signals that this is the case.
53 
54   For large-memory machines, a second outbuf is allocated for translations,
55   but only if unshrinking and only if translations are required.
56 
57               | binary mode  |        text mode
58     ---------------------------------------------------
59     big mem   |  big outbuf  | big outbuf + big outbuf2  <- malloc'd here
60     small mem | small outbuf | half + half small outbuf
61 
62   Copyright 1994, 1995 Greg Roelofs.  See the accompanying file "COPYING"
63   in UnZip 5.20 (or later) source or binary distributions.
64 
65   ---------------------------------------------------------------------------*/
66 
67 
68 #define __UNSHRINK_C    /* identifies this source module */
69 #define UNZIP_INTERNAL
70 #include "unzip.h"
71 
72 
73 #ifndef LZW_CLEAN
74 
75 static void  partial_clear  OF((__GPRO__ int lastcodeused));
76 
77 #ifdef DEBUG
78 #  define OUTDBG(c) \
79    if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
80 #else
81 #  define OUTDBG(c)
82 #endif
83 
84 /* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */
85 #define BOGUSCODE  256
86 #define FLAG_BITS  parent        /* upper bits of parent[] used as flag bits */
87 #define CODE_MASK  (HSIZE - 1)   /* 0x1fff (lower bits are parent's index) */
88 #define FREE_CODE  HSIZE         /* 0x2000 (code is unused or was cleared) */
89 #define HAS_CHILD  (HSIZE << 1)  /* 0x4000 (code has a child--do not clear) */
90 
91 #define parent G.area.shrink.Parent
92 #define Value  G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
93 #define stack  G.area.shrink.Stack
94 
95 
96 /***********************/
97 /* Function unshrink() */
98 /***********************/
99 
unshrink(__G)100 int unshrink(__G)
101      __GDEF
102 {
103     uch *stacktop = stack + (HSIZE - 1);
104     register uch *newstr;
105     uch finalval;
106     int codesize=9, len, error;
107     shrint code, oldcode, curcode;
108     shrint lastfreecode;
109     unsigned int outbufsiz;
110 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
111     /* Normally realbuf and outbuf will be the same.  However, if the data
112      * are redirected to a large memory buffer, realbuf will point to the
113      * new location while outbuf will remain pointing to the malloc'd
114      * memory buffer. */
115     uch *realbuf = G.outbuf;
116 #else
117 #   define realbuf G.outbuf
118 #endif
119 
120 
121 /*---------------------------------------------------------------------------
122     Initialize various variables.
123   ---------------------------------------------------------------------------*/
124 
125     lastfreecode = BOGUSCODE;
126 
127 #ifndef VMS     /* VMS uses its own buffer scheme for textmode flush(). */
128 #ifndef SMALL_MEM
129     /* non-memory-limited machines:  allocate second (large) buffer for
130      * textmode conversion in flush(), but only if needed */
131     if (G.pInfo->textmode && !G.outbuf2 &&
132         (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
133         return PK_MEM3;
134 #endif
135 #endif /* !VMS */
136 
137     for (code = 0;  code < BOGUSCODE;  ++code) {
138         Value[code] = (uch)code;
139         parent[code] = BOGUSCODE;
140     }
141     for (code = BOGUSCODE+1;  code < HSIZE;  ++code)
142         parent[code] = FREE_CODE;
143 
144 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
145     if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
146         realbuf = G.redirect_buffer;
147         outbufsiz = (unsigned)G.redirect_size;
148     } else
149 #endif
150 #ifdef DLL
151     if (G.pInfo->textmode && !G.redirect_data)
152 #else
153     if (G.pInfo->textmode)
154 #endif
155         outbufsiz = RAWBUFSIZ;
156     else
157         outbufsiz = OUTBUFSIZ;
158     G.outptr = realbuf;
159     G.outcnt = 0L;
160 
161 /*---------------------------------------------------------------------------
162     Get and output first code, then loop over remaining ones.
163   ---------------------------------------------------------------------------*/
164 
165     READBITS(codesize, oldcode)
166     if (G.zipeof)
167         return PK_OK;
168 
169     finalval = (uch)oldcode;
170     OUTDBG(finalval)
171     *G.outptr++ = finalval;
172     ++G.outcnt;
173 
174     while (TRUE) {
175         READBITS(codesize, code)
176         if (G.zipeof)
177             break;
178         if (code == BOGUSCODE) {   /* possible to have consecutive escapes? */
179             READBITS(codesize, code)
180             if (G.zipeof)
181                 break;
182             if (code == 1) {
183                 ++codesize;
184                 Trace((stderr, " (codesize now %d bits)\n", codesize));
185                 if (codesize > MAX_BITS) return PK_ERR;
186             } else if (code == 2) {
187                 Trace((stderr, " (partial clear code)\n"));
188                 /* clear leafs (nodes with no children) */
189                 partial_clear(__G__ lastfreecode);
190                 Trace((stderr, " (done with partial clear)\n"));
191                 lastfreecode = BOGUSCODE; /* reset start of free-node search */
192             }
193             continue;
194         }
195 
196     /*-----------------------------------------------------------------------
197         Translate code:  traverse tree from leaf back to root.
198       -----------------------------------------------------------------------*/
199 
200         newstr = stacktop;
201         curcode = code;
202 
203         if (parent[code] == FREE_CODE) {
204             /* or (FLAG_BITS[code] & FREE_CODE)? */
205             Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
206               oldcode));
207             *newstr-- = finalval;
208             code = oldcode;
209         }
210 
211         while (code != BOGUSCODE) {
212             if (newstr < stack) {
213                 /* Bogus compression stream caused buffer underflow! */
214                 Trace((stderr, "unshrink stack overflow!\n"));
215                 return PK_ERR;
216             }
217             if (parent[code] == FREE_CODE) {
218                 /* or (FLAG_BITS[code] & FREE_CODE)? */
219                 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n",
220                   code, oldcode));
221                 *newstr-- = finalval;
222                 code = oldcode;
223             } else {
224                 *newstr-- = Value[code];
225                 code = (shrint)(parent[code] & CODE_MASK);
226             }
227         }
228 
229         len = (int)(stacktop - newstr++);
230         finalval = *newstr;
231 
232     /*-----------------------------------------------------------------------
233         Write expanded string in reverse order to output buffer.
234       -----------------------------------------------------------------------*/
235 
236         Trace((stderr,
237           "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode,
238           oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr,
239           len));
240 
241         {
242             register uch *p;
243 
244             for (p = newstr;  p < newstr+len;  ++p) {
245                 *G.outptr++ = *p;
246                 OUTDBG(*p)
247                 if (++G.outcnt == outbufsiz) {
248                     Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
249                     if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
250                         Trace((stderr, "unshrink:  flush() error (%d)\n",
251                           error));
252                         return error;
253                     }
254                     G.outptr = realbuf;
255                     G.outcnt = 0L;
256                     Trace((stderr, "done with flush()\n"));
257                 }
258             }
259         }
260 
261     /*-----------------------------------------------------------------------
262         Add new leaf (first character of newstr) to tree as child of oldcode.
263       -----------------------------------------------------------------------*/
264 
265         /* search for freecode */
266         code = (shrint)(lastfreecode + 1);
267         /* add if-test before loop for speed? */
268         while ((code < HSIZE) && (parent[code] != FREE_CODE))
269             ++code;
270         lastfreecode = code;
271         Trace((stderr, "]; newcode %d\n", code));
272         if (code >= HSIZE)
273             /* invalid compressed data caused max-code overflow! */
274             return PK_ERR;
275 
276         Value[code] = finalval;
277         parent[code] = oldcode;
278         oldcode = curcode;
279 
280     }
281 
282 /*---------------------------------------------------------------------------
283     Flush any remaining data and return to sender...
284   ---------------------------------------------------------------------------*/
285 
286     if (G.outcnt > 0L) {
287         Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
288         if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
289             Trace((stderr, "unshrink:  flush() error (%d)\n", error));
290             return error;
291         }
292         Trace((stderr, "done with flush()\n"));
293     }
294 
295     return PK_OK;
296 
297 } /* end function unshrink() */
298 
299 
300 
301 
302 
303 /****************************/
304 /* Function partial_clear() */      /* no longer recursive... */
305 /****************************/
306 
307 static void partial_clear(__G__ lastcodeused)
308     __GDEF
309     int lastcodeused;
310 {
311     register shrint code;
312 
313     /* clear all nodes which have no children (i.e., leaf nodes only) */
314 
315     /* first loop:  mark each parent as such */
316     for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
317         register shrint cparent = (shrint)(parent[code] & CODE_MASK);
318 
319         if (cparent > BOGUSCODE)
320             FLAG_BITS[cparent] |= HAS_CHILD;   /* set parent's child-bit */
321     }
322 
323     /* second loop:  clear all nodes *not* marked as parents; reset flag bits */
324     for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
325         if (FLAG_BITS[code] & HAS_CHILD)    /* just clear child-bit */
326             FLAG_BITS[code] &= ~HAS_CHILD;
327         else {                              /* leaf:  lose it */
328             Trace((stderr, "%d\n", code));
329             parent[code] = FREE_CODE;
330         }
331     }
332 
333     return;
334 }
335 
336 #endif /* !LZW_CLEAN */
337