xref: /openbsd/lib/libz/gzread.c (revision 73471bf0)
1 /*	$OpenBSD: gzread.c,v 1.1 2021/07/04 14:24:49 tb Exp $ */
2 /* gzread.c -- zlib functions for reading gzip files
3  * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
4  * For conditions of distribution and use, see copyright notice in zlib.h
5  */
6 
7 #include "gzguts.h"
8 
9 /* Local functions */
10 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
11 local int gz_avail OF((gz_statep));
12 local int gz_look OF((gz_statep));
13 local int gz_decomp OF((gz_statep));
14 local int gz_fetch OF((gz_statep));
15 local int gz_skip OF((gz_statep, z_off64_t));
16 local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
17 
18 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
19    state->fd, and update state->eof, state->err, and state->msg as appropriate.
20    This function needs to loop on read(), since read() is not guaranteed to
21    read the number of bytes requested, depending on the type of descriptor. */
22 local int gz_load(state, buf, len, have)
23     gz_statep state;
24     unsigned char *buf;
25     unsigned len;
26     unsigned *have;
27 {
28     int ret;
29     unsigned get, max = ((unsigned)-1 >> 2) + 1;
30 
31     *have = 0;
32     do {
33         get = len - *have;
34         if (get > max)
35             get = max;
36         ret = read(state->fd, buf + *have, get);
37         if (ret <= 0)
38             break;
39         *have += (unsigned)ret;
40     } while (*have < len);
41     if (ret < 0) {
42         gz_error(state, Z_ERRNO, zstrerror());
43         return -1;
44     }
45     if (ret == 0)
46         state->eof = 1;
47     return 0;
48 }
49 
50 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
51    error, 0 otherwise.  Note that the eof flag is set when the end of the input
52    file is reached, even though there may be unused data in the buffer.  Once
53    that data has been used, no more attempts will be made to read the file.
54    If strm->avail_in != 0, then the current data is moved to the beginning of
55    the input buffer, and then the remainder of the buffer is loaded with the
56    available data from the input file. */
57 local int gz_avail(state)
58     gz_statep state;
59 {
60     unsigned got;
61     z_streamp strm = &(state->strm);
62 
63     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
64         return -1;
65     if (state->eof == 0) {
66         if (strm->avail_in) {       /* copy what's there to the start */
67             unsigned char *p = state->in;
68             unsigned const char *q = strm->next_in;
69             unsigned n = strm->avail_in;
70             do {
71                 *p++ = *q++;
72             } while (--n);
73         }
74         if (gz_load(state, state->in + strm->avail_in,
75                     state->size - strm->avail_in, &got) == -1)
76             return -1;
77         strm->avail_in += got;
78         strm->next_in = state->in;
79     }
80     return 0;
81 }
82 
83 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
84    If this is the first time in, allocate required memory.  state->how will be
85    left unchanged if there is no more input data available, will be set to COPY
86    if there is no gzip header and direct copying will be performed, or it will
87    be set to GZIP for decompression.  If direct copying, then leftover input
88    data from the input buffer will be copied to the output buffer.  In that
89    case, all further file reads will be directly to either the output buffer or
90    a user buffer.  If decompressing, the inflate state will be initialized.
91    gz_look() will return 0 on success or -1 on failure. */
92 local int gz_look(state)
93     gz_statep state;
94 {
95     z_streamp strm = &(state->strm);
96 
97     /* allocate read buffers and inflate memory */
98     if (state->size == 0) {
99         /* allocate buffers */
100         state->in = (unsigned char *)malloc(state->want);
101         state->out = (unsigned char *)malloc(state->want << 1);
102         if (state->in == NULL || state->out == NULL) {
103             free(state->out);
104             free(state->in);
105             gz_error(state, Z_MEM_ERROR, "out of memory");
106             return -1;
107         }
108         state->size = state->want;
109 
110         /* allocate inflate memory */
111         state->strm.zalloc = Z_NULL;
112         state->strm.zfree = Z_NULL;
113         state->strm.opaque = Z_NULL;
114         state->strm.avail_in = 0;
115         state->strm.next_in = Z_NULL;
116         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
117             free(state->out);
118             free(state->in);
119             state->size = 0;
120             gz_error(state, Z_MEM_ERROR, "out of memory");
121             return -1;
122         }
123     }
124 
125     /* get at least the magic bytes in the input buffer */
126     if (strm->avail_in < 2) {
127         if (gz_avail(state) == -1)
128             return -1;
129         if (strm->avail_in == 0)
130             return 0;
131     }
132 
133     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
134        a logical dilemma here when considering the case of a partially written
135        gzip file, to wit, if a single 31 byte is written, then we cannot tell
136        whether this is a single-byte file, or just a partially written gzip
137        file -- for here we assume that if a gzip file is being written, then
138        the header will be written in a single operation, so that reading a
139        single byte is sufficient indication that it is not a gzip file) */
140     if (strm->avail_in > 1 &&
141             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
142         inflateReset(strm);
143         state->how = GZIP;
144         state->direct = 0;
145         return 0;
146     }
147 
148     /* no gzip header -- if we were decoding gzip before, then this is trailing
149        garbage.  Ignore the trailing garbage and finish. */
150     if (state->direct == 0) {
151         strm->avail_in = 0;
152         state->eof = 1;
153         state->x.have = 0;
154         return 0;
155     }
156 
157     /* doing raw i/o, copy any leftover input to output -- this assumes that
158        the output buffer is larger than the input buffer, which also assures
159        space for gzungetc() */
160     state->x.next = state->out;
161     if (strm->avail_in) {
162         memcpy(state->x.next, strm->next_in, strm->avail_in);
163         state->x.have = strm->avail_in;
164         strm->avail_in = 0;
165     }
166     state->how = COPY;
167     state->direct = 1;
168     return 0;
169 }
170 
171 /* Decompress from input to the provided next_out and avail_out in the state.
172    On return, state->x.have and state->x.next point to the just decompressed
173    data.  If the gzip stream completes, state->how is reset to LOOK to look for
174    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
175    on success, -1 on failure. */
176 local int gz_decomp(state)
177     gz_statep state;
178 {
179     int ret = Z_OK;
180     unsigned had;
181     z_streamp strm = &(state->strm);
182 
183     /* fill output buffer up to end of deflate stream */
184     had = strm->avail_out;
185     do {
186         /* get more input for inflate() */
187         if (strm->avail_in == 0 && gz_avail(state) == -1)
188             return -1;
189         if (strm->avail_in == 0) {
190             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
191             break;
192         }
193 
194         /* decompress and handle errors */
195         ret = inflate(strm, Z_NO_FLUSH);
196         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
197             gz_error(state, Z_STREAM_ERROR,
198                      "internal error: inflate stream corrupt");
199             return -1;
200         }
201         if (ret == Z_MEM_ERROR) {
202             gz_error(state, Z_MEM_ERROR, "out of memory");
203             return -1;
204         }
205         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
206             gz_error(state, Z_DATA_ERROR,
207                      strm->msg == NULL ? "compressed data error" : strm->msg);
208             return -1;
209         }
210     } while (strm->avail_out && ret != Z_STREAM_END);
211 
212     /* update available output */
213     state->x.have = had - strm->avail_out;
214     state->x.next = strm->next_out - state->x.have;
215 
216     /* if the gzip stream completed successfully, look for another */
217     if (ret == Z_STREAM_END)
218         state->how = LOOK;
219 
220     /* good decompression */
221     return 0;
222 }
223 
224 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
225    Data is either copied from the input file or decompressed from the input
226    file depending on state->how.  If state->how is LOOK, then a gzip header is
227    looked for to determine whether to copy or decompress.  Returns -1 on error,
228    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
229    end of the input file has been reached and all data has been processed.  */
230 local int gz_fetch(state)
231     gz_statep state;
232 {
233     z_streamp strm = &(state->strm);
234 
235     do {
236         switch(state->how) {
237         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
238             if (gz_look(state) == -1)
239                 return -1;
240             if (state->how == LOOK)
241                 return 0;
242             break;
243         case COPY:      /* -> COPY */
244             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
245                     == -1)
246                 return -1;
247             state->x.next = state->out;
248             return 0;
249         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
250             strm->avail_out = state->size << 1;
251             strm->next_out = state->out;
252             if (gz_decomp(state) == -1)
253                 return -1;
254         }
255     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
256     return 0;
257 }
258 
259 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
260 local int gz_skip(state, len)
261     gz_statep state;
262     z_off64_t len;
263 {
264     unsigned n;
265 
266     /* skip over len bytes or reach end-of-file, whichever comes first */
267     while (len)
268         /* skip over whatever is in output buffer */
269         if (state->x.have) {
270             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
271                 (unsigned)len : state->x.have;
272             state->x.have -= n;
273             state->x.next += n;
274             state->x.pos += n;
275             len -= n;
276         }
277 
278         /* output buffer empty -- return if we're at the end of the input */
279         else if (state->eof && state->strm.avail_in == 0)
280             break;
281 
282         /* need more data to skip -- load up output buffer */
283         else {
284             /* get more output, looking for header if required */
285             if (gz_fetch(state) == -1)
286                 return -1;
287         }
288     return 0;
289 }
290 
291 /* Read len bytes into buf from file, or less than len up to the end of the
292    input.  Return the number of bytes read.  If zero is returned, either the
293    end of file was reached, or there was an error.  state->err must be
294    consulted in that case to determine which. */
295 local z_size_t gz_read(state, buf, len)
296     gz_statep state;
297     voidp buf;
298     z_size_t len;
299 {
300     z_size_t got;
301     unsigned n;
302 
303     /* if len is zero, avoid unnecessary operations */
304     if (len == 0)
305         return 0;
306 
307     /* process a skip request */
308     if (state->seek) {
309         state->seek = 0;
310         if (gz_skip(state, state->skip) == -1)
311             return 0;
312     }
313 
314     /* get len bytes to buf, or less than len if at the end */
315     got = 0;
316     do {
317         /* set n to the maximum amount of len that fits in an unsigned int */
318         n = -1;
319         if (n > len)
320             n = len;
321 
322         /* first just try copying data from the output buffer */
323         if (state->x.have) {
324             if (state->x.have < n)
325                 n = state->x.have;
326             memcpy(buf, state->x.next, n);
327             state->x.next += n;
328             state->x.have -= n;
329         }
330 
331         /* output buffer empty -- return if we're at the end of the input */
332         else if (state->eof && state->strm.avail_in == 0) {
333             state->past = 1;        /* tried to read past end */
334             break;
335         }
336 
337         /* need output data -- for small len or new stream load up our output
338            buffer */
339         else if (state->how == LOOK || n < (state->size << 1)) {
340             /* get more output, looking for header if required */
341             if (gz_fetch(state) == -1)
342                 return 0;
343             continue;       /* no progress yet -- go back to copy above */
344             /* the copy above assures that we will leave with space in the
345                output buffer, allowing at least one gzungetc() to succeed */
346         }
347 
348         /* large len -- read directly into user buffer */
349         else if (state->how == COPY) {      /* read directly */
350             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
351                 return 0;
352         }
353 
354         /* large len -- decompress directly into user buffer */
355         else {  /* state->how == GZIP */
356             state->strm.avail_out = n;
357             state->strm.next_out = (unsigned char *)buf;
358             if (gz_decomp(state) == -1)
359                 return 0;
360             n = state->x.have;
361             state->x.have = 0;
362         }
363 
364         /* update progress */
365         len -= n;
366         buf = (char *)buf + n;
367         got += n;
368         state->x.pos += n;
369     } while (len);
370 
371     /* return number of bytes read into user buffer */
372     return got;
373 }
374 
375 /* -- see zlib.h -- */
376 int ZEXPORT gzread(file, buf, len)
377     gzFile file;
378     voidp buf;
379     unsigned len;
380 {
381     gz_statep state;
382 
383     /* get internal structure */
384     if (file == NULL)
385         return -1;
386     state = (gz_statep)file;
387 
388     /* check that we're reading and that there's no (serious) error */
389     if (state->mode != GZ_READ ||
390             (state->err != Z_OK && state->err != Z_BUF_ERROR))
391         return -1;
392 
393     /* since an int is returned, make sure len fits in one, otherwise return
394        with an error (this avoids a flaw in the interface) */
395     if ((int)len < 0) {
396         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
397         return -1;
398     }
399 
400     /* read len or fewer bytes to buf */
401     len = gz_read(state, buf, len);
402 
403     /* check for an error */
404     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
405         return -1;
406 
407     /* return the number of bytes read (this is assured to fit in an int) */
408     return (int)len;
409 }
410 
411 /* -- see zlib.h -- */
412 z_size_t ZEXPORT gzfread(buf, size, nitems, file)
413     voidp buf;
414     z_size_t size;
415     z_size_t nitems;
416     gzFile file;
417 {
418     z_size_t len;
419     gz_statep state;
420 
421     /* get internal structure */
422     if (file == NULL)
423         return 0;
424     state = (gz_statep)file;
425 
426     /* check that we're reading and that there's no (serious) error */
427     if (state->mode != GZ_READ ||
428             (state->err != Z_OK && state->err != Z_BUF_ERROR))
429         return 0;
430 
431     /* compute bytes to read -- error on overflow */
432     len = nitems * size;
433     if (size && len / size != nitems) {
434         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
435         return 0;
436     }
437 
438     /* read len or fewer bytes to buf, return the number of full items read */
439     return len ? gz_read(state, buf, len) / size : 0;
440 }
441 
442 /* -- see zlib.h -- */
443 #ifdef Z_PREFIX_SET
444 #  undef z_gzgetc
445 #else
446 #  undef gzgetc
447 #endif
448 int ZEXPORT gzgetc(file)
449     gzFile file;
450 {
451     int ret;
452     unsigned char buf[1];
453     gz_statep state;
454 
455     /* get internal structure */
456     if (file == NULL)
457         return -1;
458     state = (gz_statep)file;
459 
460     /* check that we're reading and that there's no (serious) error */
461     if (state->mode != GZ_READ ||
462         (state->err != Z_OK && state->err != Z_BUF_ERROR))
463         return -1;
464 
465     /* try output buffer (no need to check for skip request) */
466     if (state->x.have) {
467         state->x.have--;
468         state->x.pos++;
469         return *(state->x.next)++;
470     }
471 
472     /* nothing there -- try gz_read() */
473     ret = gz_read(state, buf, 1);
474     return ret < 1 ? -1 : buf[0];
475 }
476 
477 int ZEXPORT gzgetc_(file)
478 gzFile file;
479 {
480     return gzgetc(file);
481 }
482 
483 /* -- see zlib.h -- */
484 int ZEXPORT gzungetc(c, file)
485     int c;
486     gzFile file;
487 {
488     gz_statep state;
489 
490     /* get internal structure */
491     if (file == NULL)
492         return -1;
493     state = (gz_statep)file;
494 
495     /* check that we're reading and that there's no (serious) error */
496     if (state->mode != GZ_READ ||
497         (state->err != Z_OK && state->err != Z_BUF_ERROR))
498         return -1;
499 
500     /* process a skip request */
501     if (state->seek) {
502         state->seek = 0;
503         if (gz_skip(state, state->skip) == -1)
504             return -1;
505     }
506 
507     /* can't push EOF */
508     if (c < 0)
509         return -1;
510 
511     /* if output buffer empty, put byte at end (allows more pushing) */
512     if (state->x.have == 0) {
513         state->x.have = 1;
514         state->x.next = state->out + (state->size << 1) - 1;
515         state->x.next[0] = (unsigned char)c;
516         state->x.pos--;
517         state->past = 0;
518         return c;
519     }
520 
521     /* if no room, give up (must have already done a gzungetc()) */
522     if (state->x.have == (state->size << 1)) {
523         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
524         return -1;
525     }
526 
527     /* slide output data if needed and insert byte before existing data */
528     if (state->x.next == state->out) {
529         unsigned char *src = state->out + state->x.have;
530         unsigned char *dest = state->out + (state->size << 1);
531         while (src > state->out)
532             *--dest = *--src;
533         state->x.next = dest;
534     }
535     state->x.have++;
536     state->x.next--;
537     state->x.next[0] = (unsigned char)c;
538     state->x.pos--;
539     state->past = 0;
540     return c;
541 }
542 
543 /* -- see zlib.h -- */
544 char * ZEXPORT gzgets(file, buf, len)
545     gzFile file;
546     char *buf;
547     int len;
548 {
549     unsigned left, n;
550     char *str;
551     unsigned char *eol;
552     gz_statep state;
553 
554     /* check parameters and get internal structure */
555     if (file == NULL || buf == NULL || len < 1)
556         return NULL;
557     state = (gz_statep)file;
558 
559     /* check that we're reading and that there's no (serious) error */
560     if (state->mode != GZ_READ ||
561         (state->err != Z_OK && state->err != Z_BUF_ERROR))
562         return NULL;
563 
564     /* process a skip request */
565     if (state->seek) {
566         state->seek = 0;
567         if (gz_skip(state, state->skip) == -1)
568             return NULL;
569     }
570 
571     /* copy output bytes up to new line or len - 1, whichever comes first --
572        append a terminating zero to the string (we don't check for a zero in
573        the contents, let the user worry about that) */
574     str = buf;
575     left = (unsigned)len - 1;
576     if (left) do {
577         /* assure that something is in the output buffer */
578         if (state->x.have == 0 && gz_fetch(state) == -1)
579             return NULL;                /* error */
580         if (state->x.have == 0) {       /* end of file */
581             state->past = 1;            /* read past end */
582             break;                      /* return what we have */
583         }
584 
585         /* look for end-of-line in current output buffer */
586         n = state->x.have > left ? left : state->x.have;
587         eol = (unsigned char *)memchr(state->x.next, '\n', n);
588         if (eol != NULL)
589             n = (unsigned)(eol - state->x.next) + 1;
590 
591         /* copy through end-of-line, or remainder if not found */
592         memcpy(buf, state->x.next, n);
593         state->x.have -= n;
594         state->x.next += n;
595         state->x.pos += n;
596         left -= n;
597         buf += n;
598     } while (left && eol == NULL);
599 
600     /* return terminated string, or if nothing, end of file */
601     if (buf == str)
602         return NULL;
603     buf[0] = 0;
604     return str;
605 }
606 
607 /* -- see zlib.h -- */
608 int ZEXPORT gzdirect(file)
609     gzFile file;
610 {
611     gz_statep state;
612 
613     /* get internal structure */
614     if (file == NULL)
615         return 0;
616     state = (gz_statep)file;
617 
618     /* if the state is not known, but we can find out, then do so (this is
619        mainly for right after a gzopen() or gzdopen()) */
620     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
621         (void)gz_look(state);
622 
623     /* return 1 if transparent, 0 if processing a gzip stream */
624     return state->direct;
625 }
626 
627 /* -- see zlib.h -- */
628 int ZEXPORT gzclose_r(file)
629     gzFile file;
630 {
631     int ret, err;
632     gz_statep state;
633 
634     /* get internal structure */
635     if (file == NULL)
636         return Z_STREAM_ERROR;
637     state = (gz_statep)file;
638 
639     /* check that we're reading */
640     if (state->mode != GZ_READ)
641         return Z_STREAM_ERROR;
642 
643     /* free memory and close file */
644     if (state->size) {
645         inflateEnd(&(state->strm));
646         free(state->out);
647         free(state->in);
648     }
649     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
650     gz_error(state, Z_OK, NULL);
651     free(state->path);
652     ret = close(state->fd);
653     free(state);
654     return ret ? Z_ERRNO : err;
655 }
656