1 /*
2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, May 2003.
3 *
4 * Derived from unzip 5.40.
5 *
6 * Sccsid @(#)unshrink.c 1.4 (gritter) 6/18/04
7 */
8 /*---------------------------------------------------------------------------
9
10 unshrink.c version 1.21 23 Nov 95
11
12
13 NOTE: This code may or may not infringe on the so-called "Welch
14 patent" owned by Unisys. (From reading the patent, it appears
15 that a pure LZW decompressor is *not* covered, but this claim has
16 not been tested in court, and Unisys is reported to believe other-
17 wise.) It is therefore the responsibility of the user to acquire
18 whatever license(s) may be required for legal use of this code.
19
20 THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
21 IN VIOLATION OF APPLICABLE PATENT LAW.
22
23
24 Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
25 up to 13 bits; in addition, there is provision for partial clearing of
26 leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a
27 change in code size or a partial clear of the code tree: 256,1 for the
28 former and 256,2 for the latter. [Note that partial clearing can "orphan"
29 nodes: the parent-to-be can be cleared before its new child is added,
30 but the child is added anyway (as an orphan, as though the parent still
31 existed). When the tree fills up to the point where the parent node is
32 reused, the orphan is effectively "adopted." Versions prior to 1.05 were
33 affected more due to greater use of pointers (to children and siblings
34 as well as parents).]
35
36 This replacement version of unshrink.c was written from scratch. It is
37 based only on the algorithms described in Mark Nelson's _The Data Compres-
38 sion Book_ and in Terry Welch's original paper in the June 1984 issue of
39 IEEE _Computer_; no existing source code, including any in Nelson's book,
40 was used.
41
42 Memory requirements have been reduced in this version and are now no more
43 than the original Sam Smith code. This is still larger than any of the
44 other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming
45 16-bit short ints, and this does not even include the output buffer (the
46 other algorithms leave the uncompressed data in the work area, typically
47 called slide[]). For machines with a 64KB data space this is a problem,
48 particularly when text conversion is required and line endings have more
49 than one character. UnZip's solution is to use two roughly equal halves
50 of outbuf for the ASCII conversion in such a case; the "unshrink" argument
51 to flush() signals that this is the case.
52
53 For large-memory machines, a second outbuf is allocated for translations,
54 but only if unshrinking and only if translations are required.
55
56 | binary mode | text mode
57 ---------------------------------------------------
58 big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here
59 small mem | small outbuf | half + half small outbuf
60
61 Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING"
62 in UnZip 5.20 (or later) source or binary distributions.
63
64 From "COPYING":
65
66 The following copyright applies to the new version of unshrink.c,
67 distributed with UnZip version 5.2 and later:
68
69 * Copyright (c) 1994 Greg Roelofs.
70 * Permission is granted to any individual/institution/corporate
71 * entity to use, copy, redistribute or modify this software for
72 * any purpose whatsoever, subject to the conditions noted in the
73 * Frequently Asked Questions section below, plus one additional
74 * condition: namely, that my name not be removed from the source
75 * code. (Other names may, of course, be added as modifications
76 * are made.) Corporate legal staff (like at IBM :-) ) who have
77 * problems understanding this can contact me through Zip-Bugs...
78
79
80 Q. Can I use the source code of Zip and UnZip in my commercial
81 application?
82
83 A. Yes, so long as you include in your product an acknowledgment; a
84 pointer to the original, free compression sources; and a statement
85 making it clear that there are no extra or hidden charges resulting
86 from the use of our compression code in your product (see below for
87 an example). The acknowledgment should appear in at least one piece
88 of human-readable documentation (e.g., a README file or man page),
89 although additionally putting it in the executable(s) is OK, too.
90 In other words, you are allowed to sell only your own work, not ours,
91 and we'd like a little credit. (Note the additional restrictions
92 above on the code in unreduce.c, unshrink.c, vms.c, time_lib.c, and
93 everything in the wince and windll subdirectories.) Contact us at
94 Zip-Bugs@lists.wku.edu if you have special requirements. We also
95 like to hear when our code is being used, but we don't require that.
96
97 <Product> incorporates compression code from the Info-ZIP group.
98 There are no extra charges or costs due to the use of this code,
99 and the original compression sources are freely available from
100 http://www.cdrom.com/pub/infozip/ or ftp://ftp.cdrom.com/pub/infozip/
101 on the Internet.
102
103 If you only need compression capability, not full zipfile support,
104 you might want to look at zlib instead; it has fewer restrictions
105 on commercial use. See http://www.cdrom.com/pub/infozip/zlib/ .
106
107 ---------------------------------------------------------------------------*/
108
109 #include <string.h>
110 #include <stdio.h>
111
112 #include "cpio.h"
113 #include "unzip.h"
114
115 static void partial_clear(struct globals *);
116
117 #define trace()
118
119 /* HSIZE is defined as 2^13 (8192) in unzip.h */
120 #define BOGUSCODE 256
121 #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */
122 #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */
123 #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */
124 #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */
125
126 #define parent G.area.shrink.Parent
127 #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
128 #define stack G.area.shrink.Stack
129
130 /***********************/
131 /* Function unshrink() */
132 /***********************/
133
134 int
zipunshrink(struct file * f,const char * tgt,int tfd,int doswap,uint32_t * crc)135 zipunshrink(struct file *f, const char *tgt, int tfd, int doswap, uint32_t *crc)
136 {
137 struct globals G;
138 int offset = (HSIZE - 1);
139 uint8_t *stacktop = stack + offset;
140 register uint8_t *newstr;
141 int codesize=9, len, KwKwK;
142 int16_t code, oldcode, freecode, curcode;
143 int16_t lastfreecode;
144 unsigned int outbufsiz;
145
146 /*---------------------------------------------------------------------------
147 Initialize various variables.
148 ---------------------------------------------------------------------------*/
149
150 memset(&G, 0, sizeof G);
151 G.tgt = tgt;
152 G.tfd = tfd;
153 G.doswap = doswap;
154 G.crc = crc;
155 G.zsize = G.uzsize = f->f_csize;
156 lastfreecode = BOGUSCODE;
157
158 for (code = 0; code < BOGUSCODE; ++code) {
159 Value[code] = (uint8_t)code;
160 parent[code] = BOGUSCODE;
161 }
162 for (code = BOGUSCODE+1; code < HSIZE; ++code)
163 parent[code] = FREE_CODE;
164
165 outbufsiz = OUTBUFSIZ;
166 G.outptr = G.outbuf;
167 G.outcnt = 0L;
168
169 /*---------------------------------------------------------------------------
170 Get and output first code, then loop over remaining ones.
171 ---------------------------------------------------------------------------*/
172
173 READBITS(codesize, oldcode)
174 if (!G.zipeof) {
175 *G.outptr++ = (uint8_t)oldcode;
176 ++G.outcnt;
177 }
178
179 do {
180 READBITS(codesize, code)
181 if (G.zipeof)
182 break;
183 if (code == BOGUSCODE) { /* possible to have consecutive escapes? */
184 READBITS(codesize, code)
185 if (code == 1) {
186 ++codesize;
187 Trace((stderr, " (codesize now %d bits)\n", codesize));
188 } else if (code == 2) {
189 Trace((stderr, " (partial clear code)\n"));
190 partial_clear(&G); /* clear leafs (nodes with no children) */
191 Trace((stderr, " (done with partial clear)\n"));
192 lastfreecode = BOGUSCODE; /* reset start of free-node search */
193 }
194 continue;
195 }
196
197 /*-----------------------------------------------------------------------
198 Translate code: traverse tree from leaf back to root.
199 -----------------------------------------------------------------------*/
200
201 newstr = stacktop;
202 curcode = code;
203
204 if (parent[curcode] == FREE_CODE) {
205 /* or (FLAG_BITS[curcode] & FREE_CODE)? */
206 KwKwK = TRUE;
207 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
208 oldcode));
209 --newstr; /* last character will be same as first character */
210 curcode = oldcode;
211 } else
212 KwKwK = FALSE;
213
214 do {
215 *newstr-- = Value[curcode];
216 curcode = (int16_t)(parent[curcode] & CODE_MASK);
217 } while (curcode != BOGUSCODE);
218
219 len = (int)(stacktop - newstr++);
220 if (KwKwK)
221 *stacktop = *newstr;
222
223 /*-----------------------------------------------------------------------
224 Write expanded string in reverse order to output buffer.
225 -----------------------------------------------------------------------*/
226
227 Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code,
228 oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr));
229
230 {
231 register uint8_t *p;
232
233 for (p = newstr; p < newstr+len; ++p) {
234 *G.outptr++ = *p;
235 if (++G.outcnt == outbufsiz) {
236 flush(&G, G.outbuf, G.outcnt);
237 G.outptr = G.outbuf;
238 G.outcnt = 0L;
239 }
240 }
241 }
242
243 /*-----------------------------------------------------------------------
244 Add new leaf (first character of newstr) to tree as child of oldcode.
245 -----------------------------------------------------------------------*/
246
247 /* search for freecode */
248 freecode = (int16_t)(lastfreecode + 1);
249 /* add if-test before loop for speed? */
250 while (parent[freecode] != FREE_CODE)
251 ++freecode;
252 lastfreecode = freecode;
253 Trace((stderr, "]; newcode %d\n", freecode));
254
255 Value[freecode] = *newstr;
256 parent[freecode] = oldcode;
257 oldcode = code;
258
259 } while (!G.zipeof);
260
261 /*---------------------------------------------------------------------------
262 Flush any remaining data and return to sender...
263 ---------------------------------------------------------------------------*/
264
265 if (G.outcnt > 0L)
266 flush(&G, G.outbuf, G.outcnt);
267
268 return G.status;
269
270 } /* end function unshrink() */
271
272
273
274
275
276 /****************************/
277 /* Function partial_clear() */ /* no longer recursive... */
278 /****************************/
279
280 static void
partial_clear(struct globals * Gp)281 partial_clear(struct globals *Gp)
282 {
283 #define G (*Gp)
284 register int16_t code;
285
286 /* clear all nodes which have no children (i.e., leaf nodes only) */
287
288 /* first loop: mark each parent as such */
289 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
290 register int16_t cparent = (int16_t)(parent[code] & CODE_MASK);
291
292 if (cparent > BOGUSCODE && cparent != FREE_CODE)
293 FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */
294 }
295
296 /* second loop: clear all nodes *not* marked as parents; reset flag bits */
297 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
298 if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */
299 FLAG_BITS[code] &= ~HAS_CHILD;
300 else { /* leaf: lose it */
301 Trace((stderr, "%d\n", code));
302 parent[code] = FREE_CODE;
303 }
304 }
305
306 return;
307 }
308