1 /* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004-2017 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #include "gzguts.h"
7
8 /* Local functions */
9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10 local int gz_avail OF((gz_statep));
11 local int gz_look OF((gz_statep));
12 local int gz_decomp OF((gz_statep));
13 local int gz_fetch OF((gz_statep));
14 local int gz_skip OF((gz_statep, z_off64_t));
15 local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
16
17 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
18 state->fd, and update state->eof, state->err, and state->msg as appropriate.
19 This function needs to loop on read(), since read() is not guaranteed to
20 read the number of bytes requested, depending on the type of descriptor. */
gz_load(state,buf,len,have)21 local int gz_load(state, buf, len, have)
22 gz_statep state;
23 unsigned char *buf;
24 unsigned len;
25 unsigned *have;
26 {
27 int ret;
28 unsigned get, max = ((unsigned)-1 >> 2) + 1;
29
30 *have = 0;
31 do {
32 get = len - *have;
33 if (get > max)
34 get = max;
35 ret = read(state->fd, buf + *have, get);
36 if (ret <= 0)
37 break;
38 *have += (unsigned)ret;
39 } while (*have < len);
40 if (ret < 0) {
41 gz_error(state, Z_ERRNO, zstrerror());
42 return -1;
43 }
44 if (ret == 0)
45 state->eof = 1;
46 return 0;
47 }
48
49 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
50 error, 0 otherwise. Note that the eof flag is set when the end of the input
51 file is reached, even though there may be unused data in the buffer. Once
52 that data has been used, no more attempts will be made to read the file.
53 If strm->avail_in != 0, then the current data is moved to the beginning of
54 the input buffer, and then the remainder of the buffer is loaded with the
55 available data from the input file. */
gz_avail(state)56 local int gz_avail(state)
57 gz_statep state;
58 {
59 unsigned got;
60 z_streamp strm = &(state->strm);
61
62 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
63 return -1;
64 if (state->eof == 0) {
65 if (strm->avail_in) { /* copy what's there to the start */
66 unsigned char *p = state->in;
67 unsigned const char *q = strm->next_in;
68 unsigned n = strm->avail_in;
69 do {
70 *p++ = *q++;
71 } while (--n);
72 }
73 if (gz_load(state, state->in + strm->avail_in,
74 state->size - strm->avail_in, &got) == -1)
75 return -1;
76 strm->avail_in += got;
77 strm->next_in = state->in;
78 }
79 return 0;
80 }
81
82 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
83 If this is the first time in, allocate required memory. state->how will be
84 left unchanged if there is no more input data available, will be set to COPY
85 if there is no gzip header and direct copying will be performed, or it will
86 be set to GZIP for decompression. If direct copying, then leftover input
87 data from the input buffer will be copied to the output buffer. In that
88 case, all further file reads will be directly to either the output buffer or
89 a user buffer. If decompressing, the inflate state will be initialized.
90 gz_look() will return 0 on success or -1 on failure. */
gz_look(state)91 local int gz_look(state)
92 gz_statep state;
93 {
94 z_streamp strm = &(state->strm);
95
96 /* allocate read buffers and inflate memory */
97 if (state->size == 0) {
98 /* allocate buffers */
99 state->in = (unsigned char *)malloc(state->want);
100 state->out = (unsigned char *)malloc(state->want << 1);
101 if (state->in == NULL || state->out == NULL) {
102 free(state->out);
103 free(state->in);
104 gz_error(state, Z_MEM_ERROR, "out of memory");
105 return -1;
106 }
107 state->size = state->want;
108
109 /* allocate inflate memory */
110 state->strm.zalloc = Z_NULL;
111 state->strm.zfree = Z_NULL;
112 state->strm.opaque = Z_NULL;
113 state->strm.avail_in = 0;
114 state->strm.next_in = Z_NULL;
115 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
116 free(state->out);
117 free(state->in);
118 state->size = 0;
119 gz_error(state, Z_MEM_ERROR, "out of memory");
120 return -1;
121 }
122 }
123
124 /* get at least the magic bytes in the input buffer */
125 if (strm->avail_in < 2) {
126 if (gz_avail(state) == -1)
127 return -1;
128 if (strm->avail_in == 0)
129 return 0;
130 }
131
132 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
133 a logical dilemma here when considering the case of a partially written
134 gzip file, to wit, if a single 31 byte is written, then we cannot tell
135 whether this is a single-byte file, or just a partially written gzip
136 file -- for here we assume that if a gzip file is being written, then
137 the header will be written in a single operation, so that reading a
138 single byte is sufficient indication that it is not a gzip file) */
139 if (strm->avail_in > 1 &&
140 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
141 inflateReset(strm);
142 state->how = GZIP;
143 state->direct = 0;
144 return 0;
145 }
146
147 /* no gzip header -- if we were decoding gzip before, then this is trailing
148 garbage. Ignore the trailing garbage and finish. */
149 if (state->direct == 0) {
150 strm->avail_in = 0;
151 state->eof = 1;
152 state->x.have = 0;
153 return 0;
154 }
155
156 /* doing raw i/o, copy any leftover input to output -- this assumes that
157 the output buffer is larger than the input buffer, which also assures
158 space for gzungetc() */
159 state->x.next = state->out;
160 memcpy(state->x.next, strm->next_in, strm->avail_in);
161 state->x.have = strm->avail_in;
162 strm->avail_in = 0;
163 state->how = COPY;
164 state->direct = 1;
165 return 0;
166 }
167
168 /* Decompress from input to the provided next_out and avail_out in the state.
169 On return, state->x.have and state->x.next point to the just decompressed
170 data. If the gzip stream completes, state->how is reset to LOOK to look for
171 the next gzip stream or raw data, once state->x.have is depleted. Returns 0
172 on success, -1 on failure. */
gz_decomp(state)173 local int gz_decomp(state)
174 gz_statep state;
175 {
176 int ret = Z_OK;
177 unsigned had;
178 z_streamp strm = &(state->strm);
179
180 /* fill output buffer up to end of deflate stream */
181 had = strm->avail_out;
182 do {
183 /* get more input for inflate() */
184 if (strm->avail_in == 0 && gz_avail(state) == -1)
185 return -1;
186 if (strm->avail_in == 0) {
187 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
188 break;
189 }
190
191 /* decompress and handle errors */
192 ret = inflate(strm, Z_NO_FLUSH);
193 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
194 gz_error(state, Z_STREAM_ERROR,
195 "internal error: inflate stream corrupt");
196 return -1;
197 }
198 if (ret == Z_MEM_ERROR) {
199 gz_error(state, Z_MEM_ERROR, "out of memory");
200 return -1;
201 }
202 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
203 gz_error(state, Z_DATA_ERROR,
204 strm->msg == NULL ? "compressed data error" : strm->msg);
205 return -1;
206 }
207 } while (strm->avail_out && ret != Z_STREAM_END);
208
209 /* update available output */
210 state->x.have = had - strm->avail_out;
211 state->x.next = strm->next_out - state->x.have;
212
213 /* if the gzip stream completed successfully, look for another */
214 if (ret == Z_STREAM_END)
215 state->how = LOOK;
216
217 /* good decompression */
218 return 0;
219 }
220
221 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
222 Data is either copied from the input file or decompressed from the input
223 file depending on state->how. If state->how is LOOK, then a gzip header is
224 looked for to determine whether to copy or decompress. Returns -1 on error,
225 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
226 end of the input file has been reached and all data has been processed. */
gz_fetch(state)227 local int gz_fetch(state)
228 gz_statep state;
229 {
230 z_streamp strm = &(state->strm);
231
232 do {
233 switch(state->how) {
234 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
235 if (gz_look(state) == -1)
236 return -1;
237 if (state->how == LOOK)
238 return 0;
239 break;
240 case COPY: /* -> COPY */
241 if (gz_load(state, state->out, state->size << 1, &(state->x.have))
242 == -1)
243 return -1;
244 state->x.next = state->out;
245 return 0;
246 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
247 strm->avail_out = state->size << 1;
248 strm->next_out = state->out;
249 if (gz_decomp(state) == -1)
250 return -1;
251 }
252 } while (state->x.have == 0 && (!state->eof || strm->avail_in));
253 return 0;
254 }
255
256 /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
gz_skip(state,len)257 local int gz_skip(state, len)
258 gz_statep state;
259 z_off64_t len;
260 {
261 unsigned n;
262
263 /* skip over len bytes or reach end-of-file, whichever comes first */
264 while (len)
265 /* skip over whatever is in output buffer */
266 if (state->x.have) {
267 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
268 (unsigned)len : state->x.have;
269 state->x.have -= n;
270 state->x.next += n;
271 state->x.pos += n;
272 len -= n;
273 }
274
275 /* output buffer empty -- return if we're at the end of the input */
276 else if (state->eof && state->strm.avail_in == 0)
277 break;
278
279 /* need more data to skip -- load up output buffer */
280 else {
281 /* get more output, looking for header if required */
282 if (gz_fetch(state) == -1)
283 return -1;
284 }
285 return 0;
286 }
287
288 /* Read len bytes into buf from file, or less than len up to the end of the
289 input. Return the number of bytes read. If zero is returned, either the
290 end of file was reached, or there was an error. state->err must be
291 consulted in that case to determine which. */
gz_read(state,buf,len)292 local z_size_t gz_read(state, buf, len)
293 gz_statep state;
294 voidp buf;
295 z_size_t len;
296 {
297 z_size_t got;
298 unsigned n;
299
300 /* if len is zero, avoid unnecessary operations */
301 if (len == 0)
302 return 0;
303
304 /* process a skip request */
305 if (state->seek) {
306 state->seek = 0;
307 if (gz_skip(state, state->skip) == -1)
308 return 0;
309 }
310
311 /* get len bytes to buf, or less than len if at the end */
312 got = 0;
313 do {
314 /* set n to the maximum amount of len that fits in an unsigned int */
315 n = (unsigned)-1;
316 if (n > len)
317 n = (unsigned)len;
318
319 /* first just try copying data from the output buffer */
320 if (state->x.have) {
321 if (state->x.have < n)
322 n = state->x.have;
323 memcpy(buf, state->x.next, n);
324 state->x.next += n;
325 state->x.have -= n;
326 }
327
328 /* output buffer empty -- return if we're at the end of the input */
329 else if (state->eof && state->strm.avail_in == 0) {
330 state->past = 1; /* tried to read past end */
331 break;
332 }
333
334 /* need output data -- for small len or new stream load up our output
335 buffer */
336 else if (state->how == LOOK || n < (state->size << 1)) {
337 /* get more output, looking for header if required */
338 if (gz_fetch(state) == -1)
339 return 0;
340 continue; /* no progress yet -- go back to copy above */
341 /* the copy above assures that we will leave with space in the
342 output buffer, allowing at least one gzungetc() to succeed */
343 }
344
345 /* large len -- read directly into user buffer */
346 else if (state->how == COPY) { /* read directly */
347 if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
348 return 0;
349 }
350
351 /* large len -- decompress directly into user buffer */
352 else { /* state->how == GZIP */
353 state->strm.avail_out = n;
354 state->strm.next_out = (unsigned char *)buf;
355 if (gz_decomp(state) == -1)
356 return 0;
357 n = state->x.have;
358 state->x.have = 0;
359 }
360
361 /* update progress */
362 len -= n;
363 buf = (char *)buf + n;
364 got += n;
365 state->x.pos += n;
366 } while (len);
367
368 /* return number of bytes read into user buffer */
369 return got;
370 }
371
372 /* -- see zlib.h -- */
gzread(file,buf,len)373 int ZEXPORT gzread(file, buf, len)
374 gzFile file;
375 voidp buf;
376 unsigned len;
377 {
378 gz_statep state;
379
380 /* get internal structure */
381 if (file == NULL)
382 return -1;
383 state = (gz_statep)file;
384
385 /* check that we're reading and that there's no (serious) error */
386 if (state->mode != GZ_READ ||
387 (state->err != Z_OK && state->err != Z_BUF_ERROR))
388 return -1;
389
390 /* since an int is returned, make sure len fits in one, otherwise return
391 with an error (this avoids a flaw in the interface) */
392 if ((int)len < 0) {
393 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
394 return -1;
395 }
396
397 /* read len or fewer bytes to buf */
398 len = (unsigned)gz_read(state, buf, len);
399
400 /* check for an error */
401 if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
402 return -1;
403
404 /* return the number of bytes read (this is assured to fit in an int) */
405 return (int)len;
406 }
407
408 /* -- see zlib.h -- */
gzfread(buf,size,nitems,file)409 z_size_t ZEXPORT gzfread(buf, size, nitems, file)
410 voidp buf;
411 z_size_t size;
412 z_size_t nitems;
413 gzFile file;
414 {
415 z_size_t len;
416 gz_statep state;
417
418 /* get internal structure */
419 if (file == NULL)
420 return 0;
421 state = (gz_statep)file;
422
423 /* check that we're reading and that there's no (serious) error */
424 if (state->mode != GZ_READ ||
425 (state->err != Z_OK && state->err != Z_BUF_ERROR))
426 return 0;
427
428 /* compute bytes to read -- error on overflow */
429 len = nitems * size;
430 if (size && len / size != nitems) {
431 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
432 return 0;
433 }
434
435 /* read len or fewer bytes to buf, return the number of full items read */
436 return len ? gz_read(state, buf, len) / size : 0;
437 }
438
439 /* -- see zlib.h -- */
440 #ifdef Z_PREFIX_SET
441 # undef z_gzgetc
442 #else
443 # undef gzgetc
444 #endif
gzgetc(file)445 int ZEXPORT gzgetc(file)
446 gzFile file;
447 {
448 unsigned char buf[1];
449 gz_statep state;
450
451 /* get internal structure */
452 if (file == NULL)
453 return -1;
454 state = (gz_statep)file;
455
456 /* check that we're reading and that there's no (serious) error */
457 if (state->mode != GZ_READ ||
458 (state->err != Z_OK && state->err != Z_BUF_ERROR))
459 return -1;
460
461 /* try output buffer (no need to check for skip request) */
462 if (state->x.have) {
463 state->x.have--;
464 state->x.pos++;
465 return *(state->x.next)++;
466 }
467
468 /* nothing there -- try gz_read() */
469 return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
470 }
471
gzgetc_(file)472 int ZEXPORT gzgetc_(file)
473 gzFile file;
474 {
475 return gzgetc(file);
476 }
477
478 /* -- see zlib.h -- */
gzungetc(c,file)479 int ZEXPORT gzungetc(c, file)
480 int c;
481 gzFile file;
482 {
483 gz_statep state;
484
485 /* get internal structure */
486 if (file == NULL)
487 return -1;
488 state = (gz_statep)file;
489
490 /* check that we're reading and that there's no (serious) error */
491 if (state->mode != GZ_READ ||
492 (state->err != Z_OK && state->err != Z_BUF_ERROR))
493 return -1;
494
495 /* process a skip request */
496 if (state->seek) {
497 state->seek = 0;
498 if (gz_skip(state, state->skip) == -1)
499 return -1;
500 }
501
502 /* can't push EOF */
503 if (c < 0)
504 return -1;
505
506 /* if output buffer empty, put byte at end (allows more pushing) */
507 if (state->x.have == 0) {
508 state->x.have = 1;
509 state->x.next = state->out + (state->size << 1) - 1;
510 state->x.next[0] = (unsigned char)c;
511 state->x.pos--;
512 state->past = 0;
513 return c;
514 }
515
516 /* if no room, give up (must have already done a gzungetc()) */
517 if (state->x.have == (state->size << 1)) {
518 gz_error(state, Z_DATA_ERROR, "out of room to push characters");
519 return -1;
520 }
521
522 /* slide output data if needed and insert byte before existing data */
523 if (state->x.next == state->out) {
524 unsigned char *src = state->out + state->x.have;
525 unsigned char *dest = state->out + (state->size << 1);
526 while (src > state->out)
527 *--dest = *--src;
528 state->x.next = dest;
529 }
530 state->x.have++;
531 state->x.next--;
532 state->x.next[0] = (unsigned char)c;
533 state->x.pos--;
534 state->past = 0;
535 return c;
536 }
537
538 /* -- see zlib.h -- */
gzgets(file,buf,len)539 char * ZEXPORT gzgets(file, buf, len)
540 gzFile file;
541 char *buf;
542 int len;
543 {
544 unsigned left, n;
545 char *str;
546 unsigned char *eol;
547 gz_statep state;
548
549 /* check parameters and get internal structure */
550 if (file == NULL || buf == NULL || len < 1)
551 return NULL;
552 state = (gz_statep)file;
553
554 /* check that we're reading and that there's no (serious) error */
555 if (state->mode != GZ_READ ||
556 (state->err != Z_OK && state->err != Z_BUF_ERROR))
557 return NULL;
558
559 /* process a skip request */
560 if (state->seek) {
561 state->seek = 0;
562 if (gz_skip(state, state->skip) == -1)
563 return NULL;
564 }
565
566 /* copy output bytes up to new line or len - 1, whichever comes first --
567 append a terminating zero to the string (we don't check for a zero in
568 the contents, let the user worry about that) */
569 str = buf;
570 left = (unsigned)len - 1;
571 if (left) do {
572 /* assure that something is in the output buffer */
573 if (state->x.have == 0 && gz_fetch(state) == -1)
574 return NULL; /* error */
575 if (state->x.have == 0) { /* end of file */
576 state->past = 1; /* read past end */
577 break; /* return what we have */
578 }
579
580 /* look for end-of-line in current output buffer */
581 n = state->x.have > left ? left : state->x.have;
582 eol = (unsigned char *)memchr(state->x.next, '\n', n);
583 if (eol != NULL)
584 n = (unsigned)(eol - state->x.next) + 1;
585
586 /* copy through end-of-line, or remainder if not found */
587 memcpy(buf, state->x.next, n);
588 state->x.have -= n;
589 state->x.next += n;
590 state->x.pos += n;
591 left -= n;
592 buf += n;
593 } while (left && eol == NULL);
594
595 /* return terminated string, or if nothing, end of file */
596 if (buf == str)
597 return NULL;
598 buf[0] = 0;
599 return str;
600 }
601
602 /* -- see zlib.h -- */
gzdirect(file)603 int ZEXPORT gzdirect(file)
604 gzFile file;
605 {
606 gz_statep state;
607
608 /* get internal structure */
609 if (file == NULL)
610 return 0;
611 state = (gz_statep)file;
612
613 /* if the state is not known, but we can find out, then do so (this is
614 mainly for right after a gzopen() or gzdopen()) */
615 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
616 (void)gz_look(state);
617
618 /* return 1 if transparent, 0 if processing a gzip stream */
619 return state->direct;
620 }
621
622 /* -- see zlib.h -- */
gzclose_r(file)623 int ZEXPORT gzclose_r(file)
624 gzFile file;
625 {
626 int ret, err;
627 gz_statep state;
628
629 /* get internal structure */
630 if (file == NULL)
631 return Z_STREAM_ERROR;
632 state = (gz_statep)file;
633
634 /* check that we're reading */
635 if (state->mode != GZ_READ)
636 return Z_STREAM_ERROR;
637
638 /* free memory and close file */
639 if (state->size) {
640 inflateEnd(&(state->strm));
641 free(state->out);
642 free(state->in);
643 }
644 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
645 gz_error(state, Z_OK, NULL);
646 free(state->path);
647 ret = close(state->fd);
648 free(state);
649 return ret ? Z_ERRNO : err;
650 }
651