1*2d60b848STomohiro Kusumi /* inffast.c -- fast decoding
2*2d60b848STomohiro Kusumi * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
3*2d60b848STomohiro Kusumi * For conditions of distribution and use, see copyright notice in zlib.h
4*2d60b848STomohiro Kusumi */
5*2d60b848STomohiro Kusumi
6*2d60b848STomohiro Kusumi #include "hammer2_zlib_zutil.h"
7*2d60b848STomohiro Kusumi #include "hammer2_zlib_inftrees.h"
8*2d60b848STomohiro Kusumi #include "hammer2_zlib_inflate.h"
9*2d60b848STomohiro Kusumi #include "hammer2_zlib_inffast.h"
10*2d60b848STomohiro Kusumi
11*2d60b848STomohiro Kusumi #ifndef ASMINF
12*2d60b848STomohiro Kusumi
13*2d60b848STomohiro Kusumi /* Allow machine dependent optimization for post-increment or pre-increment.
14*2d60b848STomohiro Kusumi Based on testing to date,
15*2d60b848STomohiro Kusumi Pre-increment preferred for:
16*2d60b848STomohiro Kusumi - PowerPC G3 (Adler)
17*2d60b848STomohiro Kusumi - MIPS R5000 (Randers-Pehrson)
18*2d60b848STomohiro Kusumi Post-increment preferred for:
19*2d60b848STomohiro Kusumi - none
20*2d60b848STomohiro Kusumi No measurable difference:
21*2d60b848STomohiro Kusumi - Pentium III (Anderson)
22*2d60b848STomohiro Kusumi - M68060 (Nikl)
23*2d60b848STomohiro Kusumi */
24*2d60b848STomohiro Kusumi #ifdef POSTINC
25*2d60b848STomohiro Kusumi # define OFF 0
26*2d60b848STomohiro Kusumi # define PUP(a) *(a)++
27*2d60b848STomohiro Kusumi #else
28*2d60b848STomohiro Kusumi # define OFF 1
29*2d60b848STomohiro Kusumi # define PUP(a) *++(a)
30*2d60b848STomohiro Kusumi #endif
31*2d60b848STomohiro Kusumi
32*2d60b848STomohiro Kusumi /*
33*2d60b848STomohiro Kusumi Decode literal, length, and distance codes and write out the resulting
34*2d60b848STomohiro Kusumi literal and match bytes until either not enough input or output is
35*2d60b848STomohiro Kusumi available, an end-of-block is encountered, or a data error is encountered.
36*2d60b848STomohiro Kusumi When large enough input and output buffers are supplied to inflate(), for
37*2d60b848STomohiro Kusumi example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*2d60b848STomohiro Kusumi inflate execution time is spent in this routine.
39*2d60b848STomohiro Kusumi
40*2d60b848STomohiro Kusumi Entry assumptions:
41*2d60b848STomohiro Kusumi
42*2d60b848STomohiro Kusumi state->mode == LEN
43*2d60b848STomohiro Kusumi strm->avail_in >= 6
44*2d60b848STomohiro Kusumi strm->avail_out >= 258
45*2d60b848STomohiro Kusumi start >= strm->avail_out
46*2d60b848STomohiro Kusumi state->bits < 8
47*2d60b848STomohiro Kusumi
48*2d60b848STomohiro Kusumi On return, state->mode is one of:
49*2d60b848STomohiro Kusumi
50*2d60b848STomohiro Kusumi LEN -- ran out of enough output space or enough available input
51*2d60b848STomohiro Kusumi TYPE -- reached end of block code, inflate() to interpret next block
52*2d60b848STomohiro Kusumi BAD -- error in block data
53*2d60b848STomohiro Kusumi
54*2d60b848STomohiro Kusumi Notes:
55*2d60b848STomohiro Kusumi
56*2d60b848STomohiro Kusumi - The maximum input bits used by a length/distance pair is 15 bits for the
57*2d60b848STomohiro Kusumi length code, 5 bits for the length extra, 15 bits for the distance code,
58*2d60b848STomohiro Kusumi and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59*2d60b848STomohiro Kusumi Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*2d60b848STomohiro Kusumi checking for available input while decoding.
61*2d60b848STomohiro Kusumi
62*2d60b848STomohiro Kusumi - The maximum bytes that a single length/distance pair can output is 258
63*2d60b848STomohiro Kusumi bytes, which is the maximum length that can be coded. inflate_fast()
64*2d60b848STomohiro Kusumi requires strm->avail_out >= 258 for each loop to avoid checking for
65*2d60b848STomohiro Kusumi output space.
66*2d60b848STomohiro Kusumi */
67*2d60b848STomohiro Kusumi void
68*2d60b848STomohiro Kusumi ZLIB_INTERNAL
inflate_fast(z_streamp strm,unsigned start)69*2d60b848STomohiro Kusumi inflate_fast(z_streamp strm, unsigned start) /* inflate()'s starting value for strm->avail_out */
70*2d60b848STomohiro Kusumi {
71*2d60b848STomohiro Kusumi struct inflate_state FAR *state;
72*2d60b848STomohiro Kusumi z_const unsigned char FAR *in; /* local strm->next_in */
73*2d60b848STomohiro Kusumi z_const unsigned char FAR *last; /* have enough input while in < last */
74*2d60b848STomohiro Kusumi unsigned char FAR *out; /* local strm->next_out */
75*2d60b848STomohiro Kusumi unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
76*2d60b848STomohiro Kusumi unsigned char FAR *end; /* while out < end, enough space available */
77*2d60b848STomohiro Kusumi #ifdef INFLATE_STRICT
78*2d60b848STomohiro Kusumi unsigned dmax; /* maximum distance from zlib header */
79*2d60b848STomohiro Kusumi #endif
80*2d60b848STomohiro Kusumi unsigned wsize; /* window size or zero if not using window */
81*2d60b848STomohiro Kusumi unsigned whave; /* valid bytes in the window */
82*2d60b848STomohiro Kusumi unsigned wnext; /* window write index */
83*2d60b848STomohiro Kusumi unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
84*2d60b848STomohiro Kusumi unsigned long hold; /* local strm->hold */
85*2d60b848STomohiro Kusumi unsigned bits; /* local strm->bits */
86*2d60b848STomohiro Kusumi code const FAR *lcode; /* local strm->lencode */
87*2d60b848STomohiro Kusumi code const FAR *dcode; /* local strm->distcode */
88*2d60b848STomohiro Kusumi unsigned lmask; /* mask for first level of length codes */
89*2d60b848STomohiro Kusumi unsigned dmask; /* mask for first level of distance codes */
90*2d60b848STomohiro Kusumi code here; /* retrieved table entry */
91*2d60b848STomohiro Kusumi unsigned op; /* code bits, operation, extra bits, or */
92*2d60b848STomohiro Kusumi /* window position, window bytes to copy */
93*2d60b848STomohiro Kusumi unsigned len; /* match length, unused bytes */
94*2d60b848STomohiro Kusumi unsigned dist; /* match distance */
95*2d60b848STomohiro Kusumi unsigned char FAR *from; /* where to copy match from */
96*2d60b848STomohiro Kusumi
97*2d60b848STomohiro Kusumi /* copy state to local variables */
98*2d60b848STomohiro Kusumi state = (struct inflate_state FAR *)strm->state;
99*2d60b848STomohiro Kusumi in = strm->next_in - OFF;
100*2d60b848STomohiro Kusumi last = in + (strm->avail_in - 5);
101*2d60b848STomohiro Kusumi out = strm->next_out - OFF;
102*2d60b848STomohiro Kusumi beg = out - (start - strm->avail_out);
103*2d60b848STomohiro Kusumi end = out + (strm->avail_out - 257);
104*2d60b848STomohiro Kusumi #ifdef INFLATE_STRICT
105*2d60b848STomohiro Kusumi dmax = state->dmax;
106*2d60b848STomohiro Kusumi #endif
107*2d60b848STomohiro Kusumi wsize = state->wsize;
108*2d60b848STomohiro Kusumi whave = state->whave;
109*2d60b848STomohiro Kusumi wnext = state->wnext;
110*2d60b848STomohiro Kusumi window = state->window;
111*2d60b848STomohiro Kusumi hold = state->hold;
112*2d60b848STomohiro Kusumi bits = state->bits;
113*2d60b848STomohiro Kusumi lcode = state->lencode;
114*2d60b848STomohiro Kusumi dcode = state->distcode;
115*2d60b848STomohiro Kusumi lmask = (1U << state->lenbits) - 1;
116*2d60b848STomohiro Kusumi dmask = (1U << state->distbits) - 1;
117*2d60b848STomohiro Kusumi
118*2d60b848STomohiro Kusumi /* decode literals and length/distances until end-of-block or not enough
119*2d60b848STomohiro Kusumi input data or output space */
120*2d60b848STomohiro Kusumi do {
121*2d60b848STomohiro Kusumi if (bits < 15) {
122*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
123*2d60b848STomohiro Kusumi bits += 8;
124*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
125*2d60b848STomohiro Kusumi bits += 8;
126*2d60b848STomohiro Kusumi }
127*2d60b848STomohiro Kusumi here = lcode[hold & lmask];
128*2d60b848STomohiro Kusumi dolen:
129*2d60b848STomohiro Kusumi op = (unsigned)(here.bits);
130*2d60b848STomohiro Kusumi hold >>= op;
131*2d60b848STomohiro Kusumi bits -= op;
132*2d60b848STomohiro Kusumi op = (unsigned)(here.op);
133*2d60b848STomohiro Kusumi if (op == 0) { /* literal */
134*2d60b848STomohiro Kusumi Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
135*2d60b848STomohiro Kusumi "inflate: literal '%c'\n" :
136*2d60b848STomohiro Kusumi "inflate: literal 0x%02x\n", here.val));
137*2d60b848STomohiro Kusumi PUP(out) = (unsigned char)(here.val);
138*2d60b848STomohiro Kusumi }
139*2d60b848STomohiro Kusumi else if (op & 16) { /* length base */
140*2d60b848STomohiro Kusumi len = (unsigned)(here.val);
141*2d60b848STomohiro Kusumi op &= 15; /* number of extra bits */
142*2d60b848STomohiro Kusumi if (op) {
143*2d60b848STomohiro Kusumi if (bits < op) {
144*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
145*2d60b848STomohiro Kusumi bits += 8;
146*2d60b848STomohiro Kusumi }
147*2d60b848STomohiro Kusumi len += (unsigned)hold & ((1U << op) - 1);
148*2d60b848STomohiro Kusumi hold >>= op;
149*2d60b848STomohiro Kusumi bits -= op;
150*2d60b848STomohiro Kusumi }
151*2d60b848STomohiro Kusumi Tracevv((stderr, "inflate: length %u\n", len));
152*2d60b848STomohiro Kusumi if (bits < 15) {
153*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
154*2d60b848STomohiro Kusumi bits += 8;
155*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
156*2d60b848STomohiro Kusumi bits += 8;
157*2d60b848STomohiro Kusumi }
158*2d60b848STomohiro Kusumi here = dcode[hold & dmask];
159*2d60b848STomohiro Kusumi dodist:
160*2d60b848STomohiro Kusumi op = (unsigned)(here.bits);
161*2d60b848STomohiro Kusumi hold >>= op;
162*2d60b848STomohiro Kusumi bits -= op;
163*2d60b848STomohiro Kusumi op = (unsigned)(here.op);
164*2d60b848STomohiro Kusumi if (op & 16) { /* distance base */
165*2d60b848STomohiro Kusumi dist = (unsigned)(here.val);
166*2d60b848STomohiro Kusumi op &= 15; /* number of extra bits */
167*2d60b848STomohiro Kusumi if (bits < op) {
168*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
169*2d60b848STomohiro Kusumi bits += 8;
170*2d60b848STomohiro Kusumi if (bits < op) {
171*2d60b848STomohiro Kusumi hold += (unsigned long)(PUP(in)) << bits;
172*2d60b848STomohiro Kusumi bits += 8;
173*2d60b848STomohiro Kusumi }
174*2d60b848STomohiro Kusumi }
175*2d60b848STomohiro Kusumi dist += (unsigned)hold & ((1U << op) - 1);
176*2d60b848STomohiro Kusumi #ifdef INFLATE_STRICT
177*2d60b848STomohiro Kusumi if (dist > dmax) {
178*2d60b848STomohiro Kusumi strm->msg = (char *)"invalid distance too far back";
179*2d60b848STomohiro Kusumi state->mode = BAD;
180*2d60b848STomohiro Kusumi break;
181*2d60b848STomohiro Kusumi }
182*2d60b848STomohiro Kusumi #endif
183*2d60b848STomohiro Kusumi hold >>= op;
184*2d60b848STomohiro Kusumi bits -= op;
185*2d60b848STomohiro Kusumi Tracevv((stderr, "inflate: distance %u\n", dist));
186*2d60b848STomohiro Kusumi op = (unsigned)(out - beg); /* max distance in output */
187*2d60b848STomohiro Kusumi if (dist > op) { /* see if copy from window */
188*2d60b848STomohiro Kusumi op = dist - op; /* distance back in window */
189*2d60b848STomohiro Kusumi if (op > whave) {
190*2d60b848STomohiro Kusumi if (state->sane) {
191*2d60b848STomohiro Kusumi strm->msg =
192*2d60b848STomohiro Kusumi (char *)"invalid distance too far back";
193*2d60b848STomohiro Kusumi state->mode = BAD;
194*2d60b848STomohiro Kusumi break;
195*2d60b848STomohiro Kusumi }
196*2d60b848STomohiro Kusumi #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
197*2d60b848STomohiro Kusumi if (len <= op - whave) {
198*2d60b848STomohiro Kusumi do {
199*2d60b848STomohiro Kusumi PUP(out) = 0;
200*2d60b848STomohiro Kusumi } while (--len);
201*2d60b848STomohiro Kusumi continue;
202*2d60b848STomohiro Kusumi }
203*2d60b848STomohiro Kusumi len -= op - whave;
204*2d60b848STomohiro Kusumi do {
205*2d60b848STomohiro Kusumi PUP(out) = 0;
206*2d60b848STomohiro Kusumi } while (--op > whave);
207*2d60b848STomohiro Kusumi if (op == 0) {
208*2d60b848STomohiro Kusumi from = out - dist;
209*2d60b848STomohiro Kusumi do {
210*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
211*2d60b848STomohiro Kusumi } while (--len);
212*2d60b848STomohiro Kusumi continue;
213*2d60b848STomohiro Kusumi }
214*2d60b848STomohiro Kusumi #endif
215*2d60b848STomohiro Kusumi }
216*2d60b848STomohiro Kusumi from = window - OFF;
217*2d60b848STomohiro Kusumi if (wnext == 0) { /* very common case */
218*2d60b848STomohiro Kusumi from += wsize - op;
219*2d60b848STomohiro Kusumi if (op < len) { /* some from window */
220*2d60b848STomohiro Kusumi len -= op;
221*2d60b848STomohiro Kusumi do {
222*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
223*2d60b848STomohiro Kusumi } while (--op);
224*2d60b848STomohiro Kusumi from = out - dist; /* rest from output */
225*2d60b848STomohiro Kusumi }
226*2d60b848STomohiro Kusumi }
227*2d60b848STomohiro Kusumi else if (wnext < op) { /* wrap around window */
228*2d60b848STomohiro Kusumi from += wsize + wnext - op;
229*2d60b848STomohiro Kusumi op -= wnext;
230*2d60b848STomohiro Kusumi if (op < len) { /* some from end of window */
231*2d60b848STomohiro Kusumi len -= op;
232*2d60b848STomohiro Kusumi do {
233*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
234*2d60b848STomohiro Kusumi } while (--op);
235*2d60b848STomohiro Kusumi from = window - OFF;
236*2d60b848STomohiro Kusumi if (wnext < len) { /* some from start of window */
237*2d60b848STomohiro Kusumi op = wnext;
238*2d60b848STomohiro Kusumi len -= op;
239*2d60b848STomohiro Kusumi do {
240*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
241*2d60b848STomohiro Kusumi } while (--op);
242*2d60b848STomohiro Kusumi from = out - dist; /* rest from output */
243*2d60b848STomohiro Kusumi }
244*2d60b848STomohiro Kusumi }
245*2d60b848STomohiro Kusumi }
246*2d60b848STomohiro Kusumi else { /* contiguous in window */
247*2d60b848STomohiro Kusumi from += wnext - op;
248*2d60b848STomohiro Kusumi if (op < len) { /* some from window */
249*2d60b848STomohiro Kusumi len -= op;
250*2d60b848STomohiro Kusumi do {
251*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
252*2d60b848STomohiro Kusumi } while (--op);
253*2d60b848STomohiro Kusumi from = out - dist; /* rest from output */
254*2d60b848STomohiro Kusumi }
255*2d60b848STomohiro Kusumi }
256*2d60b848STomohiro Kusumi while (len > 2) {
257*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
258*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
259*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
260*2d60b848STomohiro Kusumi len -= 3;
261*2d60b848STomohiro Kusumi }
262*2d60b848STomohiro Kusumi if (len) {
263*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
264*2d60b848STomohiro Kusumi if (len > 1)
265*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
266*2d60b848STomohiro Kusumi }
267*2d60b848STomohiro Kusumi }
268*2d60b848STomohiro Kusumi else {
269*2d60b848STomohiro Kusumi from = out - dist; /* copy direct from output */
270*2d60b848STomohiro Kusumi do { /* minimum length is three */
271*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
272*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
273*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
274*2d60b848STomohiro Kusumi len -= 3;
275*2d60b848STomohiro Kusumi } while (len > 2);
276*2d60b848STomohiro Kusumi if (len) {
277*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
278*2d60b848STomohiro Kusumi if (len > 1)
279*2d60b848STomohiro Kusumi PUP(out) = PUP(from);
280*2d60b848STomohiro Kusumi }
281*2d60b848STomohiro Kusumi }
282*2d60b848STomohiro Kusumi }
283*2d60b848STomohiro Kusumi else if ((op & 64) == 0) { /* 2nd level distance code */
284*2d60b848STomohiro Kusumi here = dcode[here.val + (hold & ((1U << op) - 1))];
285*2d60b848STomohiro Kusumi goto dodist;
286*2d60b848STomohiro Kusumi }
287*2d60b848STomohiro Kusumi else {
288*2d60b848STomohiro Kusumi strm->msg = (char *)"invalid distance code";
289*2d60b848STomohiro Kusumi state->mode = BAD;
290*2d60b848STomohiro Kusumi break;
291*2d60b848STomohiro Kusumi }
292*2d60b848STomohiro Kusumi }
293*2d60b848STomohiro Kusumi else if ((op & 64) == 0) { /* 2nd level length code */
294*2d60b848STomohiro Kusumi here = lcode[here.val + (hold & ((1U << op) - 1))];
295*2d60b848STomohiro Kusumi goto dolen;
296*2d60b848STomohiro Kusumi }
297*2d60b848STomohiro Kusumi else if (op & 32) { /* end-of-block */
298*2d60b848STomohiro Kusumi Tracevv((stderr, "inflate: end of block\n"));
299*2d60b848STomohiro Kusumi state->mode = TYPE;
300*2d60b848STomohiro Kusumi break;
301*2d60b848STomohiro Kusumi }
302*2d60b848STomohiro Kusumi else {
303*2d60b848STomohiro Kusumi strm->msg = (char *)"invalid literal/length code";
304*2d60b848STomohiro Kusumi state->mode = BAD;
305*2d60b848STomohiro Kusumi break;
306*2d60b848STomohiro Kusumi }
307*2d60b848STomohiro Kusumi } while (in < last && out < end);
308*2d60b848STomohiro Kusumi
309*2d60b848STomohiro Kusumi /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
310*2d60b848STomohiro Kusumi len = bits >> 3;
311*2d60b848STomohiro Kusumi in -= len;
312*2d60b848STomohiro Kusumi bits -= len << 3;
313*2d60b848STomohiro Kusumi hold &= (1U << bits) - 1;
314*2d60b848STomohiro Kusumi
315*2d60b848STomohiro Kusumi /* update state and return */
316*2d60b848STomohiro Kusumi strm->next_in = in + OFF;
317*2d60b848STomohiro Kusumi strm->next_out = out + OFF;
318*2d60b848STomohiro Kusumi strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
319*2d60b848STomohiro Kusumi strm->avail_out = (unsigned)(out < end ?
320*2d60b848STomohiro Kusumi 257 + (end - out) : 257 - (out - end));
321*2d60b848STomohiro Kusumi state->hold = hold;
322*2d60b848STomohiro Kusumi state->bits = bits;
323*2d60b848STomohiro Kusumi return;
324*2d60b848STomohiro Kusumi }
325*2d60b848STomohiro Kusumi
326*2d60b848STomohiro Kusumi /*
327*2d60b848STomohiro Kusumi inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
328*2d60b848STomohiro Kusumi - Using bit fields for code structure
329*2d60b848STomohiro Kusumi - Different op definition to avoid & for extra bits (do & for table bits)
330*2d60b848STomohiro Kusumi - Three separate decoding do-loops for direct, window, and wnext == 0
331*2d60b848STomohiro Kusumi - Special case for distance > 1 copies to do overlapped load and store copy
332*2d60b848STomohiro Kusumi - Explicit branch predictions (based on measured branch probabilities)
333*2d60b848STomohiro Kusumi - Deferring match copy and interspersed it with decoding subsequent codes
334*2d60b848STomohiro Kusumi - Swapping literal/length else
335*2d60b848STomohiro Kusumi - Swapping window/direct else
336*2d60b848STomohiro Kusumi - Larger unrolled copy loops (three is about right)
337*2d60b848STomohiro Kusumi - Moving len -= 3 statement into middle of loop
338*2d60b848STomohiro Kusumi */
339*2d60b848STomohiro Kusumi
340*2d60b848STomohiro Kusumi #endif /* !ASMINF */
341