1 /* Copyright (C) 2001-2006 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied, modified
8 or distributed except as expressly authorized under the terms of that
9 license. Refer to licensing information at http://www.artifex.com/
10 or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
11 San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
12 */
13
14 /* $Id: slzwd.c 8250 2007-09-25 13:31:24Z giles $ */
15 /* LZW decoding filter */
16 #include "stdio_.h" /* includes std.h */
17 #include "gdebug.h"
18 #include "strimpl.h"
19 #include "slzwx.h"
20
21 /* ------ LZWDecode ------ */
22
23 /********************************************************/
24 /* LZW routines are based on: */
25 /* Dr. Dobbs Journal --- Oct. 1989. */
26 /* Article on LZW Data Compression by Mark R. Nelson */
27 /********************************************************/
28
29 /* Define the special codes in terms of code_escape, which is */
30 /* 1 << InitialCodeLength. */
31 #define code_reset (code_escape + 0)
32 #define code_eod (code_escape + 1)
33 #define code_0 (code_escape + 2) /* first assignable code */
34
35 struct lzw_decode_s {
36 byte datum;
37 byte len; /* length of code */
38 ushort prefix; /* code to be prefixed */
39 };
40
41 gs_private_st_simple(st_lzw_decode, lzw_decode, "lzw_decode");
42 /* We can use a simple type as the element type, */
43 /* because there are no pointers to enumerate or relocate. */
44 #define st_lzw_decode_element st_lzw_decode
45 #define lzw_decode_max 4096 /* must be 4096 */
46
47 /* Initialize LZWDecode filter */
48 /* We separate out the reset function for some non-stream clients. */
49 static int
s_LZWD_reset(stream_state * st)50 s_LZWD_reset(stream_state * st)
51 {
52 stream_LZW_state *const ss = (stream_LZW_state *) st;
53 register lzw_decode *dc = ss->table.decode;
54 register int i;
55 uint code_escape = 1 << ss->InitialCodeLength;
56
57 ss->bits_left = 0;
58 ss->bytes_left = 0;
59 ss->next_code = code_0;
60 ss->code_size = ss->InitialCodeLength + 1;
61 ss->prev_code = -1;
62 ss->copy_code = -1;
63 dc[code_reset].len = 255;
64 dc[code_eod].len = 255;
65 for (i = 0; i < code_escape; i++, dc++)
66 dc->datum = i, dc->len = 1, dc->prefix = code_eod;
67 return 0;
68 }
69 static int
s_LZWD_init(stream_state * st)70 s_LZWD_init(stream_state * st)
71 {
72 stream_LZW_state *const ss = (stream_LZW_state *) st;
73 lzw_decode *dc =
74 gs_alloc_struct_array(st->memory, lzw_decode_max + 1,
75 lzw_decode, &st_lzw_decode_element,
76 "LZWDecode(init)");
77
78 if (dc == 0)
79 return ERRC;
80 /****** WRONG ******/
81 ss->table.decode = dc;
82 ss->min_left = 1;
83 return s_LZWD_reset(st);
84 }
85
86 /* Process a buffer */
87 static int
s_LZWD_process(stream_state * st,stream_cursor_read * pr,stream_cursor_write * pw,bool last)88 s_LZWD_process(stream_state * st, stream_cursor_read * pr,
89 stream_cursor_write * pw, bool last)
90 {
91 stream_LZW_state *const ss = (stream_LZW_state *) st;
92 register const byte *p = pr->ptr;
93 register byte *q = pw->ptr;
94
95 #ifdef DEBUG
96 byte *q0 = q;
97
98 #endif
99 const byte *rlimit = pr->limit; /* constant pointer */
100 byte *wlimit = pw->limit; /* constant pointer */
101 int status = 0;
102 int code = ss->copy_code;
103 int prev_code = ss->prev_code;
104 uint prev_len = ss->prev_len;
105 byte bits = ss->bits;
106 int bits_left = ss->bits_left;
107 int bytes_left = ss->bytes_left;
108 int code_size = ss->code_size;
109 int code_mask;
110 int switch_code;
111 int next_code = ss->next_code;
112 lzw_decode *table = ss->table.decode; /* constant pointer */
113 lzw_decode *dc_next = table + next_code; /* invariant */
114 lzw_decode *dc;
115 int code_escape = 1 << ss->InitialCodeLength;
116 int eod = code_eod;
117 bool low_order = ss->FirstBitLowOrder;
118 uint len;
119 int c;
120 byte b;
121 byte *q1;
122
123 if_debug2('w', "[w]process decode: code_size=%d next_code=%d\n",
124 code_size, next_code);
125 #define set_code_size()\
126 code_mask = (1 << code_size) - 1,\
127 switch_code = code_mask + 1 - ss->EarlyChange
128 set_code_size();
129 if (!ss->BlockData)
130 bytes_left = rlimit - p + 2; /* never stop for bytes_left */
131 /* If we are in the middle of copying a string, */
132 /* do some more now. */
133 if (code >= 0) {
134 int rlen = ss->copy_left;
135 int wlen = wlimit - q;
136 int n = len = min(rlen, wlen);
137
138 c = code;
139 ss->copy_left = rlen -= len;
140 if_debug3('W', "[W]copying 0x%x, %d byte(s) out of %d left\n",
141 code, len, rlen + len);
142 while (rlen)
143 c = table[c].prefix,
144 rlen--;
145 q1 = q += len;
146 n = len;
147 while (--n >= 0) {
148 *q1-- = (dc = &table[c])->datum;
149 c = dc->prefix;
150 }
151 if (ss->copy_left) { /* more to do */
152 pw->ptr = q;
153 return 1;
154 }
155 ss->copy_code = -1;
156 len = ss->copy_len;
157 /* Retrieve the first byte of the code just copied. */
158 if (c == eod) { /* We just copied the entire code, */
159 /* so the byte we want is immediately available. */
160 b = q1[1];
161 } else { /* We have to scan to the beginning of the code. */
162 for (; c != eod; c = table[c].prefix)
163 b = (byte) c;
164 }
165 goto add;
166 }
167 top:if (code_size > bits_left) {
168 if (bytes_left == 0) {
169 if (p == rlimit)
170 goto out;
171 bytes_left = *++p;
172 if_debug1('W', "[W]block count %d\n", bytes_left);
173 if (bytes_left == 0) {
174 status = EOFC;
175 goto out;
176 }
177 goto top;
178 }
179 if (low_order)
180 code = bits >> (8 - bits_left);
181 else
182 code = (uint) bits << (code_size - bits_left);
183 if (bits_left + 8 < code_size) { /* Need 2 more data bytes */
184 if (bytes_left == 1) {
185 if (rlimit - p < 3)
186 goto out;
187 bytes_left = p[2];
188 if_debug1('W', "[W]block count %d\n",
189 bytes_left);
190 if (bytes_left == 0) {
191 status = EOFC;
192 goto out;
193 }
194 bytes_left++;
195 bits = p[1];
196 p++;
197 } else {
198 if (rlimit - p < 2)
199 goto out;
200 bits = p[1];
201 }
202 if (low_order)
203 code += (uint) bits << bits_left;
204 else
205 code += (uint) bits << (code_size - 8 - bits_left);
206 bits_left += 8;
207 bits = p[2];
208 p += 2;
209 bytes_left -= 2;
210 } else {
211 if (p == rlimit)
212 goto out;
213 bits = *++p;
214 bytes_left--;
215 }
216 if (low_order)
217 code += (uint) bits << bits_left,
218 bits_left += 8 - code_size;
219 else
220 bits_left += 8 - code_size,
221 code += bits >> bits_left;
222 } else {
223 if (low_order)
224 code = bits >> (8 - bits_left),
225 bits_left -= code_size;
226 else
227 bits_left -= code_size,
228 code = bits >> bits_left;
229 }
230 code &= code_mask;
231 if_debug2('W', "[W]reading 0x%x,%d\n", code, code_size);
232 /*
233 * There is an anomalous case where a code S is followed
234 * immediately by another occurrence of the S string.
235 * In this case, the next available code will be defined as
236 * S followed by the first character of S, and will be
237 * emitted immediately after the code S. We have to
238 * recognize this case specially, by noting that the code is
239 * equal to next_code.
240 */
241 if (code >= next_code) {
242 if (code > next_code) {
243 #ifdef DEBUG
244 lprintf2("[W]code = %d > next_code = %d\n",
245 code, next_code);
246 #endif
247 status = ERRC;
248 goto out;
249 }
250 /* Fabricate the entry for the code. It will be */
251 /* overwritten immediately, of course. */
252 for (c = prev_code; c != eod; c = table[c].prefix)
253 dc_next->datum = c;
254 len = prev_len + 1;
255 dc_next->len = min(len, 255);
256 dc_next->prefix = prev_code;
257 if_debug3('w', "[w]decoding anomalous 0x%x=0x%x+%c\n",
258 next_code, prev_code, dc_next->datum);
259 }
260 /* See if there is enough room for the code. */
261 reset:
262 len = table[code].len;
263 if (len == 255) { /* Check for special code (reset or end). */
264 /* We set their lengths to 255 to avoid doing */
265 /* an extra check in the normal case. */
266 if (code == code_reset) {
267 if_debug1('w', "[w]reset: next_code was %d\n",
268 next_code);
269 next_code = code_0;
270 dc_next = table + code_0;
271 code_size = ss->InitialCodeLength + 1;
272 set_code_size();
273 prev_code = -1;
274 goto top;
275 } else if (code == eod) {
276 status = EOFC;
277 goto out;
278 }
279 /* The code length won't fit in a byte, */
280 /* compute it the hard way. */
281 for (c = code, len = 0; c != eod; len++)
282 c = table[c].prefix;
283 if_debug2('w', "[w]long code %d, length=%d\n", code, len);
284 }
285 if (wlimit - q < len) {
286 ss->copy_code = code;
287 ss->copy_left = ss->copy_len = len;
288 status = 1;
289 goto out;
290 }
291 /* Copy the string to the buffer (back to front). */
292 /* Optimize for short codes, which are the most frequent. */
293 dc = &table[code];
294 switch (len) {
295 default:
296 {
297 byte *q1 = q += len;
298
299 c = code;
300 do {
301 *q1-- = (dc = &table[c])->datum;
302 }
303 while ((c = dc->prefix) != eod);
304 b = q1[1];
305 }
306 break;
307 case 3:
308 q[3] = dc->datum;
309 dc = &table[dc->prefix];
310 case 2:
311 q[2] = dc->datum;
312 dc = &table[dc->prefix];
313 case 1:
314 q[1] = b = dc->datum;
315 q += len;
316 }
317 add: /* Add a new entry to the table */
318 if (prev_code >= 0) {
319 /*
320 * Unfortunately, we have to check for next_code ==
321 * lzw_decode_max every time: just checking at power
322 * of 2 boundaries stops us one code too soon.
323 */
324 if (next_code == lzw_decode_max) {
325 /*
326 * A few anomalous files have one data item too many before the
327 * reset code. We think this is a bug in the application that
328 * produced the files, but Acrobat accepts the files, so we do
329 * too.
330 */
331 if (!ss->BlockData) { /* don't do this for GIF */
332 if (bits_left < 8 && p >= rlimit && last) {
333 /* We're at EOD. */
334 goto out;
335 }
336 if (bits_left + ((rlimit - p) << 3) < code_size) {
337 /*
338 * We need more data to decide whether a reset is next.
339 * Return an error if we cannot get more.
340 */
341 if (last)
342 status = ERRC;
343 goto out;
344 }
345 if (low_order) {
346 code = bits >> (8 - bits_left);
347 code += (bits = *++p) << bits_left;
348 if (bits_left + 8 < code_size)
349 code += (bits = *++p) << (bits_left + 8);
350 } else {
351 code = bits & ((1 << bits_left) - 1);
352 code = (code << 8) + (bits = *++p);
353 if (bits_left + 8 < code_size)
354 code = (code << 8) + (bits = *++p);
355 code >>= (bits_left - code_size) & 7;
356 }
357 bits_left = (bits_left - code_size) & 7;
358 if (code == code_reset)
359 goto reset;
360 }
361 status = ERRC;
362 goto out;
363 }
364 dc_next->datum = b; /* added char of string */
365 dc_next->len = min(prev_len, 254) + 1;
366 dc_next->prefix = prev_code;
367 dc_next++;
368 if_debug4('W', "[W]adding 0x%x=0x%x+%c(%d)\n",
369 next_code, prev_code, b, min(len, 255));
370 if (++next_code == switch_code) { /* Crossed a power of 2. */
371 /* We have to make a strange special check for */
372 /* reaching the end of the code space. */
373 if (next_code < lzw_decode_max - 1) {
374 code_size++;
375 set_code_size();
376 if_debug2('w', "[w]crossed power of 2: new code_size=%d, next_code=%d\n",
377 code_size, next_code);
378 }
379 }
380 }
381 prev_code = code;
382 prev_len = len;
383 goto top;
384 out:pr->ptr = p;
385 pw->ptr = q;
386 ss->code_size = code_size;
387 ss->prev_code = prev_code;
388 ss->prev_len = prev_len;
389 ss->bits = bits;
390 ss->bits_left = bits_left;
391 ss->bytes_left = bytes_left;
392 ss->next_code = next_code;
393 if_debug3('w', "[w]decoded %d bytes, prev_code=%d, next_code=%d\n",
394 (int)(q - q0), prev_code, next_code);
395 return status;
396 }
397
398 /* Stream template */
399 const stream_template s_LZWD_template =
400 {&st_LZW_state, s_LZWD_init, s_LZWD_process, 3, 1, s_LZW_release,
401 s_LZW_set_defaults, s_LZWD_reset
402 };
403