1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "gzguts.h"
7 
8 /* Local functions */
9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10 local int gz_avail OF((gz_statep));
11 local int gz_look OF((gz_statep));
12 local int gz_decomp OF((gz_statep));
13 local int gz_fetch OF((gz_statep));
14 local int gz_skip OF((gz_statep, z_off64_t));
15 
16 int ZEXPORT gzgetc_(gzFile file);
17 
18 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
19    state->fd, and update state->eof, state->err, and state->msg as appropriate.
20    This function needs to loop on read(), since read() is not guaranteed to
21    read the number of bytes requested, depending on the type of descriptor. */
gz_load(gz_statep state,unsigned char * buf,unsigned len,unsigned * have)22 local int gz_load(gz_statep state, unsigned char *buf, unsigned len, unsigned *have)
23 {
24    int ret;
25 
26    *have = 0;
27    do {
28       ret = read(state->fd, buf + *have, len - *have);
29       if (ret <= 0)
30          break;
31       *have += ret;
32    } while (*have < len);
33    if (ret < 0) {
34       gz_error(state, Z_ERRNO, zstrerror());
35       return -1;
36    }
37    if (ret == 0)
38       state->eof = 1;
39    return 0;
40 }
41 
42 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
43    error, 0 otherwise.  Note that the eof flag is set when the end of the input
44    file is reached, even though there may be unused data in the buffer.  Once
45    that data has been used, no more attempts will be made to read the file.
46    If strm->avail_in != 0, then the current data is moved to the beginning of
47    the input buffer, and then the remainder of the buffer is loaded with the
48    available data from the input file. */
gz_avail(gz_statep state)49 local int gz_avail(gz_statep state)
50 {
51    unsigned got;
52    z_streamp strm = &(state->strm);
53 
54    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
55       return -1;
56    if (state->eof == 0) {
57       if (strm->avail_in) {       /* copy what's there to the start */
58          unsigned char *p = state->in;
59          unsigned const char *q = strm->next_in;
60          unsigned n = strm->avail_in;
61          do {
62             *p++ = *q++;
63          } while (--n);
64       }
65       if (gz_load(state, state->in + strm->avail_in,
66                state->size - strm->avail_in, &got) == -1)
67          return -1;
68       strm->avail_in += got;
69       strm->next_in = state->in;
70    }
71    return 0;
72 }
73 
74 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
75    If this is the first time in, allocate required memory.  state->how will be
76    left unchanged if there is no more input data available, will be set to COPY
77    if there is no gzip header and direct copying will be performed, or it will
78    be set to GZIP for decompression.  If direct copying, then leftover input
79    data from the input buffer will be copied to the output buffer.  In that
80    case, all further file reads will be directly to either the output buffer or
81    a user buffer.  If decompressing, the inflate state will be initialized.
82    gz_look() will return 0 on success or -1 on failure. */
gz_look(gz_statep state)83 local int gz_look(gz_statep state)
84 {
85    z_streamp strm = &(state->strm);
86 
87    /* allocate read buffers and inflate memory */
88    if (state->size == 0) {
89       /* allocate buffers */
90       state->in = (unsigned char *)malloc(state->want);
91       state->out = (unsigned char *)malloc(state->want << 1);
92       if (state->in == NULL || state->out == NULL) {
93          if (state->out != NULL)
94             free(state->out);
95          if (state->in != NULL)
96             free(state->in);
97          gz_error(state, Z_MEM_ERROR, "out of memory");
98          return -1;
99       }
100       state->size = state->want;
101 
102       /* allocate inflate memory */
103       state->strm.zalloc = Z_NULL;
104       state->strm.zfree = Z_NULL;
105       state->strm.opaque = Z_NULL;
106       state->strm.avail_in = 0;
107       state->strm.next_in = Z_NULL;
108       if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
109          free(state->out);
110          free(state->in);
111          state->size = 0;
112          gz_error(state, Z_MEM_ERROR, "out of memory");
113          return -1;
114       }
115    }
116 
117    /* get at least the magic bytes in the input buffer */
118    if (strm->avail_in < 2) {
119       if (gz_avail(state) == -1)
120          return -1;
121       if (strm->avail_in == 0)
122          return 0;
123    }
124 
125    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
126       a logical dilemma here when considering the case of a partially written
127       gzip file, to wit, if a single 31 byte is written, then we cannot tell
128       whether this is a single-byte file, or just a partially written gzip
129       file -- for here we assume that if a gzip file is being written, then
130       the header will be written in a single operation, so that reading a
131       single byte is sufficient indication that it is not a gzip file) */
132    if (strm->avail_in > 1 &&
133          strm->next_in[0] == 31 && strm->next_in[1] == 139) {
134       inflateReset(strm);
135       state->how = MODE_GZIP;
136       state->direct = 0;
137       return 0;
138    }
139 
140    /* no gzip header -- if we were decoding gzip before, then this is trailing
141       garbage.  Ignore the trailing garbage and finish. */
142    if (state->direct == 0) {
143       strm->avail_in = 0;
144       state->eof = 1;
145       state->x.have = 0;
146       return 0;
147    }
148 
149    /* doing raw i/o, copy any leftover input to output -- this assumes that
150       the output buffer is larger than the input buffer, which also assures
151       space for gzungetc() */
152    state->x.next = state->out;
153    if (strm->avail_in) {
154       memcpy(state->x.next, strm->next_in, strm->avail_in);
155       state->x.have = strm->avail_in;
156       strm->avail_in = 0;
157    }
158    state->how = MODE_COPY;
159    state->direct = 1;
160    return 0;
161 }
162 
163 /* Decompress from input to the provided next_out and avail_out in the state.
164    On return, state->x.have and state->x.next point to the just decompressed
165    data.  If the gzip stream completes, state->how is reset to LOOK to look for
166    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
167    on success, -1 on failure. */
gz_decomp(gz_statep state)168 local int gz_decomp(gz_statep state)
169 {
170    int ret = Z_OK;
171    unsigned had;
172    z_streamp strm = &(state->strm);
173 
174    /* fill output buffer up to end of deflate stream */
175    had = strm->avail_out;
176    do {
177       /* get more input for inflate() */
178       if (strm->avail_in == 0 && gz_avail(state) == -1)
179          return -1;
180       if (strm->avail_in == 0) {
181          gz_error(state, Z_BUF_ERROR, "unexpected end of file");
182          break;
183       }
184 
185       /* decompress and handle errors */
186       ret = inflate(strm, Z_NO_FLUSH);
187       if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
188          gz_error(state, Z_STREAM_ERROR,
189                "internal error: inflate stream corrupt");
190          return -1;
191       }
192       if (ret == Z_MEM_ERROR) {
193          gz_error(state, Z_MEM_ERROR, "out of memory");
194          return -1;
195       }
196       if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
197          gz_error(state, Z_DATA_ERROR,
198                strm->msg == NULL ? "compressed data error" : strm->msg);
199          return -1;
200       }
201    } while (strm->avail_out && ret != Z_STREAM_END);
202 
203    /* update available output */
204    state->x.have = had - strm->avail_out;
205    state->x.next = strm->next_out - state->x.have;
206 
207    /* if the gzip stream completed successfully, look for another */
208    if (ret == Z_STREAM_END)
209       state->how = LOOK;
210 
211    /* good decompression */
212    return 0;
213 }
214 
215 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
216    Data is either copied from the input file or decompressed from the input
217    file depending on state->how.  If state->how is LOOK, then a gzip header is
218    looked for to determine whether to copy or decompress.  Returns -1 on error,
219    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
220    end of the input file has been reached and all data has been processed.  */
gz_fetch(gz_statep state)221 local int gz_fetch(gz_statep state)
222 {
223    z_streamp strm = &(state->strm);
224 
225    do {
226       switch(state->how) {
227          case LOOK:      /* -> LOOK, MODE_COPY (only if never GZIP), or MODE_GZIP */
228             if (gz_look(state) == -1)
229                return -1;
230             if (state->how == LOOK)
231                return 0;
232             break;
233          case MODE_COPY:      /* -> MODE_COPY */
234             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
235                   == -1)
236                return -1;
237             state->x.next = state->out;
238             return 0;
239          case MODE_GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
240             strm->avail_out = state->size << 1;
241             strm->next_out = state->out;
242             if (gz_decomp(state) == -1)
243                return -1;
244       }
245    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
246    return 0;
247 }
248 
249 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
gz_skip(gz_statep state,z_off64_t len)250 local int gz_skip(gz_statep state, z_off64_t len)
251 {
252    unsigned n;
253 
254    /* skip over len bytes or reach end-of-file, whichever comes first */
255    while (len)
256       /* skip over whatever is in output buffer */
257       if (state->x.have) {
258          n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
259             (unsigned)len : state->x.have;
260          state->x.have -= n;
261          state->x.next += n;
262          state->x.pos += n;
263          len -= n;
264       }
265 
266    /* output buffer empty -- return if we're at the end of the input */
267       else if (state->eof && state->strm.avail_in == 0)
268          break;
269 
270    /* need more data to skip -- load up output buffer */
271       else {
272          /* get more output, looking for header if required */
273          if (gz_fetch(state) == -1)
274             return -1;
275       }
276    return 0;
277 }
278 
279 /* -- see zlib.h -- */
gzread(gzFile file,voidp buf,unsigned len)280 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len)
281 {
282    unsigned got, n;
283    gz_statep state;
284    z_streamp strm;
285 
286    /* get internal structure */
287    if (file == NULL)
288       return -1;
289    state = (gz_statep)file;
290    strm = &(state->strm);
291 
292    /* check that we're reading and that there's no (serious) error */
293    if (state->mode != GZ_READ ||
294          (state->err != Z_OK && state->err != Z_BUF_ERROR))
295       return -1;
296 
297    /* since an int is returned, make sure len fits in one, otherwise return
298       with an error (this avoids the flaw in the interface) */
299    if ((int)len < 0) {
300       gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
301       return -1;
302    }
303 
304    /* if len is zero, avoid unnecessary operations */
305    if (len == 0)
306       return 0;
307 
308    /* process a skip request */
309    if (state->seek) {
310       state->seek = 0;
311       if (gz_skip(state, state->skip) == -1)
312          return -1;
313    }
314 
315    /* get len bytes to buf, or less than len if at the end */
316    got = 0;
317    n = 0;
318    do {
319       /* first just try copying data from the output buffer */
320       if (state->x.have) {
321          n = state->x.have > len ? len : state->x.have;
322          memcpy(buf, state->x.next, n);
323          state->x.next += n;
324          state->x.have -= n;
325       }
326 
327       /* output buffer empty -- return if we're at the end of the input */
328       else if (state->eof && strm->avail_in == 0) {
329          state->past = 1;        /* tried to read past end */
330          break;
331       }
332 
333       /* need output data -- for small len or new stream load up our output
334          buffer */
335       else if (state->how == LOOK || len < (state->size << 1)) {
336          /* get more output, looking for header if required */
337          if (gz_fetch(state) == -1)
338             return -1;
339          continue;       /* no progress yet -- go back to copy above */
340          /* the copy above assures that we will leave with space in the
341             output buffer, allowing at least one gzungetc() to succeed */
342       }
343 
344       /* large len -- read directly into user buffer */
345       else if (state->how == MODE_COPY) {      /* read directly */
346          if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
347             return -1;
348       }
349 
350       /* large len -- decompress directly into user buffer */
351       else {  /* state->how == GZIP */
352          strm->avail_out = len;
353          strm->next_out = (unsigned char *)buf;
354          if (gz_decomp(state) == -1)
355             return -1;
356          n = state->x.have;
357          state->x.have = 0;
358       }
359 
360       /* update progress */
361       len -= n;
362       buf = (char *)buf + n;
363       got += n;
364       state->x.pos += n;
365    } while (len);
366 
367    /* return number of bytes read into user buffer (will fit in int) */
368    return (int)got;
369 }
370 
371 /* -- see zlib.h -- */
372 #ifdef Z_PREFIX_SET
373 #  undef z_gzgetc
374 #else
375 #  undef gzgetc
376 #endif
gzgetc(gzFile file)377 int ZEXPORT gzgetc(gzFile file)
378 {
379    int ret;
380    unsigned char buf[1];
381    gz_statep state;
382 
383    /* get internal structure */
384    if (file == NULL)
385       return -1;
386    state = (gz_statep)file;
387 
388    /* check that we're reading and that there's no (serious) error */
389    if (state->mode != GZ_READ ||
390          (state->err != Z_OK && state->err != Z_BUF_ERROR))
391       return -1;
392 
393    /* try output buffer (no need to check for skip request) */
394    if (state->x.have) {
395       state->x.have--;
396       state->x.pos++;
397       return *(state->x.next)++;
398    }
399 
400    /* nothing there -- try gzread() */
401    ret = gzread(file, buf, 1);
402    return ret < 1 ? -1 : buf[0];
403 }
404 
gzgetc_(gzFile file)405 int ZEXPORT gzgetc_(gzFile file)
406 {
407    return gzgetc(file);
408 }
409 
410 /* -- see zlib.h -- */
gzungetc(int c,gzFile file)411 int ZEXPORT gzungetc(int c, gzFile file)
412 {
413    gz_statep state;
414 
415    /* get internal structure */
416    if (file == NULL)
417       return -1;
418    state = (gz_statep)file;
419 
420    /* check that we're reading and that there's no (serious) error */
421    if (state->mode != GZ_READ ||
422          (state->err != Z_OK && state->err != Z_BUF_ERROR))
423       return -1;
424 
425    /* process a skip request */
426    if (state->seek) {
427       state->seek = 0;
428       if (gz_skip(state, state->skip) == -1)
429          return -1;
430    }
431 
432    /* can't push EOF */
433    if (c < 0)
434       return -1;
435 
436    /* if output buffer empty, put byte at end (allows more pushing) */
437    if (state->x.have == 0) {
438       state->x.have = 1;
439       state->x.next = state->out + (state->size << 1) - 1;
440       state->x.next[0] = c;
441       state->x.pos--;
442       state->past = 0;
443       return c;
444    }
445 
446    /* if no room, give up (must have already done a gzungetc()) */
447    if (state->x.have == (state->size << 1)) {
448       gz_error(state, Z_DATA_ERROR, "out of room to push characters");
449       return -1;
450    }
451 
452    /* slide output data if needed and insert byte before existing data */
453    if (state->x.next == state->out) {
454       unsigned char *src = state->out + state->x.have;
455       unsigned char *dest = state->out + (state->size << 1);
456       while (src > state->out)
457          *--dest = *--src;
458       state->x.next = dest;
459    }
460    state->x.have++;
461    state->x.next--;
462    state->x.next[0] = c;
463    state->x.pos--;
464    state->past = 0;
465    return c;
466 }
467 
468 /* -- see zlib.h -- */
gzgets(gzFile file,char * buf,int len)469 char * ZEXPORT gzgets(gzFile file, char *buf, int len)
470 {
471    unsigned left, n;
472    char *str;
473    unsigned char *eol;
474    gz_statep state;
475 
476    /* check parameters and get internal structure */
477    if (file == NULL || buf == NULL || len < 1)
478       return NULL;
479    state = (gz_statep)file;
480 
481    /* check that we're reading and that there's no (serious) error */
482    if (state->mode != GZ_READ ||
483          (state->err != Z_OK && state->err != Z_BUF_ERROR))
484       return NULL;
485 
486    /* process a skip request */
487    if (state->seek) {
488       state->seek = 0;
489       if (gz_skip(state, state->skip) == -1)
490          return NULL;
491    }
492 
493    /* copy output bytes up to new line or len - 1, whichever comes first --
494       append a terminating zero to the string (we don't check for a zero in
495       the contents, let the user worry about that) */
496    str = buf;
497    left = (unsigned)len - 1;
498    if (left) do {
499       /* assure that something is in the output buffer */
500       if (state->x.have == 0 && gz_fetch(state) == -1)
501          return NULL;                /* error */
502       if (state->x.have == 0) {       /* end of file */
503          state->past = 1;            /* read past end */
504          break;                      /* return what we have */
505       }
506 
507       /* look for end-of-line in current output buffer */
508       n = state->x.have > left ? left : state->x.have;
509       eol = (unsigned char *)memchr(state->x.next, '\n', n);
510       if (eol != NULL)
511          n = (unsigned)(eol - state->x.next) + 1;
512 
513       /* copy through end-of-line, or remainder if not found */
514       memcpy(buf, state->x.next, n);
515       state->x.have -= n;
516       state->x.next += n;
517       state->x.pos += n;
518       left -= n;
519       buf += n;
520    } while (left && eol == NULL);
521 
522    /* return terminated string, or if nothing, end of file */
523    if (buf == str)
524       return NULL;
525    buf[0] = 0;
526    return str;
527 }
528 
529 /* -- see zlib.h -- */
gzdirect(gzFile file)530 int ZEXPORT gzdirect(gzFile file)
531 {
532    gz_statep state;
533 
534    /* get internal structure */
535    if (file == NULL)
536       return 0;
537    state = (gz_statep)file;
538 
539    /* if the state is not known, but we can find out, then do so (this is
540       mainly for right after a gzopen() or gzdopen()) */
541    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
542       (void)gz_look(state);
543 
544    /* return 1 if transparent, 0 if processing a gzip stream */
545    return state->direct;
546 }
547 
548 /* -- see zlib.h -- */
gzclose_r(gzFile file)549 int gzclose_r(gzFile file)
550 {
551    int ret, err;
552    gz_statep state;
553 
554    /* get internal structure */
555    if (file == NULL)
556       return Z_STREAM_ERROR;
557    state = (gz_statep)file;
558 
559    /* check that we're reading */
560    if (state->mode != GZ_READ)
561       return Z_STREAM_ERROR;
562 
563    /* free memory and close file */
564    if (state->size) {
565       inflateEnd(&(state->strm));
566       free(state->out);
567       free(state->in);
568    }
569    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
570    gz_error(state, Z_OK, NULL);
571    free(state->path);
572    ret = close(state->fd);
573    free(state);
574    return ret ? Z_ERRNO : err;
575 }
576