xref: /openbsd/usr.bin/compress/gzopen.c (revision d89ec533)
1 /*	$OpenBSD: gzopen.c,v 1.34 2016/09/03 12:29:30 tedu Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 #include <sys/stat.h>
62 #include <sys/uio.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <errno.h>
67 #include <unistd.h>
68 #include <limits.h>
69 #include <zlib.h>
70 #include "compress.h"
71 
72 /* gzip flag byte */
73 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
74 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
75 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
76 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
77 #define COMMENT      0x10 /* bit 4 set: file comment present */
78 #define RESERVED     0xE0 /* bits 5..7: reserved */
79 
80 #define DEF_MEM_LEVEL 8
81 #define OS_CODE 0x03 /* unix */
82 
83 typedef
84 struct gz_stream {
85 	int	z_fd;		/* .gz file */
86 	int	z_eof;		/* set if end of input file */
87 	z_stream z_stream;	/* libz stream */
88 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
89 	char	z_mode;		/* 'w' or 'r' */
90 	u_int32_t z_time;	/* timestamp (mtime) */
91 	u_int32_t z_crc;	/* crc32 of uncompressed data */
92 	u_int32_t z_hlen;	/* length of the gz header */
93 	u_int64_t z_total_in;	/* # bytes in */
94 	u_int64_t z_total_out;	/* # bytes out */
95 } gz_stream;
96 
97 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
98 
99 static u_int32_t get_int32(gz_stream *);
100 static int get_header(gz_stream *, char *, int);
101 static int get_byte(gz_stream *);
102 
103 void *
104 gz_ropen(int fd, char *name, int gotmagic)
105 {
106 	gz_stream *s;
107 
108 	if (fd < 0)
109 		return NULL;
110 
111 	if ((s = calloc(1, sizeof(gz_stream))) == NULL)
112 		return NULL;
113 
114 	s->z_stream.zalloc = (alloc_func)0;
115 	s->z_stream.zfree = (free_func)0;
116 	s->z_stream.opaque = (voidpf)0;
117 	s->z_stream.next_in = Z_NULL;
118 	s->z_stream.next_out = Z_NULL;
119 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
120 	s->z_fd = 0;
121 	s->z_eof = 0;
122 	s->z_time = 0;
123 	s->z_hlen = 0;
124 	s->z_total_in = 0;
125 	s->z_total_out = 0;
126 	s->z_crc = crc32(0L, Z_NULL, 0);
127 	s->z_mode = 'r';
128 
129 	if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
130 		free (s);
131 		return NULL;
132 	}
133 	s->z_stream.next_in = s->z_buf;
134 	s->z_stream.avail_out = Z_BUFSIZE;
135 
136 	errno = 0;
137 	s->z_fd = fd;
138 
139 	/* read the .gz header */
140 	if (get_header(s, name, gotmagic) != 0) {
141 		gz_close(s, NULL, NULL, NULL);
142 		s = NULL;
143 	}
144 
145 	return s;
146 }
147 
148 static int
149 get_byte(gz_stream *s)
150 {
151 	if (s->z_eof)
152 		return EOF;
153 
154 	if (s->z_stream.avail_in == 0) {
155 		errno = 0;
156 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
157 		if ((int)s->z_stream.avail_in <= 0) {
158 			s->z_eof = 1;
159 			return EOF;
160 		}
161 		s->z_stream.next_in = s->z_buf;
162 	}
163 	s->z_stream.avail_in--;
164 	return *s->z_stream.next_in++;
165 }
166 
167 static u_int32_t
168 get_int32(gz_stream *s)
169 {
170 	u_int32_t x;
171 
172 	x  = ((u_int32_t)(get_byte(s) & 0xff));
173 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
174 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
175 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
176 	return x;
177 }
178 
179 static int
180 get_header(gz_stream *s, char *name, int gotmagic)
181 {
182 	int method; /* method byte */
183 	int flags;  /* flags byte */
184 	char *ep;
185 	uInt len;
186 	int c;
187 
188 	/* Check the gzip magic header */
189 	if (!gotmagic) {
190 		for (len = 0; len < 2; len++) {
191 			c = get_byte(s);
192 			if (c != gz_magic[len]) {
193 				errno = EFTYPE;
194 				return -1;
195 			}
196 		}
197 	}
198 
199 	method = get_byte(s);
200 	flags = get_byte(s);
201 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
202 		errno = EFTYPE;
203 		return -1;
204 	}
205 
206 	/* Stash timestamp (mtime) */
207 	s->z_time = get_int32(s);
208 
209 	/* Discard xflags and OS code */
210 	(void)get_byte(s);
211 	(void)get_byte(s);
212 
213 	s->z_hlen += 10; /* magic, method, flags, time, xflags, OS code */
214 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
215 		len  =  (uInt)get_byte(s);
216 		len += ((uInt)get_byte(s))<<8;
217 		s->z_hlen += 2;
218 		/* len is garbage if EOF but the loop below will quit anyway */
219 		while (len-- != 0 && get_byte(s) != EOF)
220 			s->z_hlen++;
221 	}
222 
223 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
224 		if ((ep = name) != NULL)
225 			ep += PATH_MAX - 1;
226 		while ((c = get_byte(s)) != EOF) {
227 			s->z_hlen++;
228 			if (c == '\0')
229 				break;
230 			if (name < ep)
231 				*name++ = c;
232 		}
233 		if (name != NULL)
234 			*name = '\0';
235 	}
236 
237 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
238 		while ((c = get_byte(s)) != EOF) {
239 			s->z_hlen++;
240 			if (c == '\0')
241 				break;
242 		}
243 	}
244 
245 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
246 		(void)get_byte(s);
247 		(void)get_byte(s);
248 		s->z_hlen += 2;
249 	}
250 
251 	if (s->z_eof) {
252 		errno = EFTYPE;
253 		return -1;
254 	}
255 
256 	return 0;
257 }
258 
259 int
260 gz_read(void *cookie, char *buf, int len)
261 {
262 	gz_stream *s = (gz_stream*)cookie;
263 	u_char *start = buf; /* starting point for crc computation */
264 	int error = Z_OK;
265 
266 	s->z_stream.next_out = buf;
267 	s->z_stream.avail_out = len;
268 
269 	while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
270 
271 		if (s->z_stream.avail_in == 0) {
272 
273 			errno = 0;
274 			s->z_stream.avail_in = read(s->z_fd, s->z_buf,
275 			    Z_BUFSIZE);
276 			if ((int)s->z_stream.avail_in <= 0)
277 				s->z_eof = 1;
278 			s->z_stream.next_in = s->z_buf;
279 		}
280 
281 		error = inflate(&(s->z_stream), Z_NO_FLUSH);
282 
283 		if (error == Z_DATA_ERROR) {
284 			errno = EINVAL;
285 			goto bad;
286 		}
287 		if (error == Z_BUF_ERROR) {
288 			errno = EIO;
289 			goto bad;
290 		}
291 		if (error == Z_STREAM_END) {
292 			/* Check CRC and original size */
293 			s->z_crc = crc32(s->z_crc, start,
294 			    (uInt)(s->z_stream.next_out - start));
295 			start = s->z_stream.next_out;
296 
297 			if (get_int32(s) != s->z_crc) {
298 				errno = EINVAL;
299 				goto bad;
300 			}
301 			if (get_int32(s) != (u_int32_t)s->z_stream.total_out) {
302 				errno = EIO;
303 				return -1;
304 			}
305 			s->z_hlen += 2 * sizeof(int32_t);
306 
307 			/* Add byte counts from the finished stream. */
308 			s->z_total_in += s->z_stream.total_in;
309 			s->z_total_out += s->z_stream.total_out;
310 
311 			/* Check for the existence of an appended file. */
312 			if (get_header(s, NULL, 0) != 0) {
313 				s->z_eof = 1;
314 				break;
315 			}
316 			inflateReset(&(s->z_stream));
317 			s->z_crc = crc32(0L, Z_NULL, 0);
318 			error = Z_OK;
319 		}
320 	}
321 	s->z_crc = crc32(s->z_crc, start,
322 	    (uInt)(s->z_stream.next_out - start));
323 	len -= s->z_stream.avail_out;
324 
325 	return (len);
326 bad:
327 	/* Add byte counts from the finished stream. */
328 	s->z_total_in += s->z_stream.total_in;
329 	s->z_total_out += s->z_stream.total_out;
330 	return (-1);
331 }
332 
333 #ifndef SMALL
334 static int
335 put_int32(gz_stream *s, u_int32_t x)
336 {
337 	u_int32_t y = htole32(x);
338 
339 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
340 		return Z_ERRNO;
341 	return 0;
342 }
343 
344 static int
345 put_header(gz_stream *s, char *name, u_int32_t mtime, int bits)
346 {
347 	struct iovec iov[2];
348 	u_char buf[10];
349 
350 	buf[0] = gz_magic[0];
351 	buf[1] = gz_magic[1];
352 	buf[2] = Z_DEFLATED;
353 	buf[3] = name ? ORIG_NAME : 0;
354 	buf[4] = mtime & 0xff;
355 	buf[5] = (mtime >> 8) & 0xff;
356 	buf[6] = (mtime >> 16) & 0xff;
357 	buf[7] = (mtime >> 24) & 0xff;
358 	buf[8] = bits == 1 ? 4 : bits == 9 ? 2 : 0;	/* xflags */
359 	buf[9] = OS_CODE;
360 	iov[0].iov_base = buf;
361 	iov[0].iov_len = sizeof(buf);
362 	s->z_hlen = sizeof(buf);
363 
364 	if (name != NULL) {
365 		iov[1].iov_base = name;
366 		iov[1].iov_len = strlen(name) + 1;
367 		s->z_hlen += iov[1].iov_len;
368 	}
369 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
370 		return (-1);
371 	return (0);
372 }
373 
374 void *
375 gz_wopen(int fd, char *name, int bits, u_int32_t mtime)
376 {
377 	gz_stream *s;
378 
379 	if (fd < 0)
380 		return NULL;
381 
382 	if (bits < 0 || bits > Z_BEST_COMPRESSION) {
383 		errno = EINVAL;
384 		return NULL;
385 	}
386 	if ((s = calloc(1, sizeof(gz_stream))) == NULL)
387 		return NULL;
388 
389 	s->z_stream.zalloc = (alloc_func)0;
390 	s->z_stream.zfree = (free_func)0;
391 	s->z_stream.opaque = (voidpf)0;
392 	s->z_stream.next_in = Z_NULL;
393 	s->z_stream.next_out = Z_NULL;
394 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
395 	s->z_fd = 0;
396 	s->z_eof = 0;
397 	s->z_time = 0;
398 	s->z_hlen = 0;
399 	s->z_total_in = 0;
400 	s->z_total_out = 0;
401 	s->z_crc = crc32(0L, Z_NULL, 0);
402 	s->z_mode = 'w';
403 
404 	/* windowBits is passed < 0 to suppress zlib header */
405 	if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
406 			 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
407 		free (s);
408 		return NULL;
409 	}
410 	s->z_stream.next_out = s->z_buf;
411 	s->z_stream.avail_out = Z_BUFSIZE;
412 
413 	errno = 0;
414 	s->z_fd = fd;
415 
416 	/* write the .gz header */
417 	if (put_header(s, name, mtime, bits) != 0) {
418 		gz_close(s, NULL, NULL, NULL);
419 		s = NULL;
420 	}
421 
422 	return s;
423 }
424 int
425 gz_write(void *cookie, const char *buf, int len)
426 {
427 	gz_stream *s = (gz_stream*)cookie;
428 
429 	s->z_stream.next_in = (char *)buf;
430 	s->z_stream.avail_in = len;
431 
432 	while (s->z_stream.avail_in != 0) {
433 		if (s->z_stream.avail_out == 0) {
434 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
435 				break;
436 			s->z_stream.next_out = s->z_buf;
437 			s->z_stream.avail_out = Z_BUFSIZE;
438 		}
439 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
440 			break;
441 	}
442 	s->z_crc = crc32(s->z_crc, buf, len);
443 
444 	return (int)(len - s->z_stream.avail_in);
445 }
446 
447 int
448 gz_flush(void *cookie, int flush)
449 {
450 	gz_stream *s = (gz_stream*)cookie;
451 	size_t len;
452 	int done = 0;
453 	int err;
454 
455 	if (s == NULL || s->z_mode != 'w') {
456 		errno = EBADF;
457 		return Z_ERRNO;
458 	}
459 
460 	s->z_stream.avail_in = 0; /* should be zero already anyway */
461 
462 	for (;;) {
463 		len = Z_BUFSIZE - s->z_stream.avail_out;
464 
465 		if (len != 0) {
466 			if (write(s->z_fd, s->z_buf, len) != len)
467 				return Z_ERRNO;
468 			s->z_stream.next_out = s->z_buf;
469 			s->z_stream.avail_out = Z_BUFSIZE;
470 		}
471 		if (done)
472 			break;
473 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
474 		    err != Z_STREAM_END)
475 			return err;
476 
477 		/* deflate has finished flushing only when it hasn't
478 		 * used up all the available space in the output buffer
479 		 */
480 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
481 	}
482 	return 0;
483 }
484 #endif
485 
486 int
487 gz_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
488 {
489 	gz_stream *s = (gz_stream*)cookie;
490 	int err = 0;
491 
492 	if (s == NULL)
493 		return -1;
494 
495 #ifndef SMALL
496 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
497 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
498 			s->z_hlen += sizeof(int32_t);
499 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
500 				s->z_hlen += sizeof(int32_t);
501 		}
502 	}
503 #endif
504 	if (!err && s->z_stream.state != NULL) {
505 		if (s->z_mode == 'w')
506 #ifndef SMALL
507 			err = deflateEnd(&s->z_stream);
508 #else
509 			err = -1;
510 #endif
511 		else if (s->z_mode == 'r')
512 			err = inflateEnd(&s->z_stream);
513 	}
514 
515 	if (info != NULL) {
516 		info->mtime = s->z_time;
517 		info->crc = s->z_crc;
518 		info->hlen = s->z_hlen;
519 		if (s->z_mode == 'r') {
520 			info->total_in = s->z_total_in;
521 			info->total_out = s->z_total_out;
522 		} else {
523 			info->total_in = s->z_stream.total_in;
524 			info->total_out = s->z_stream.total_out;
525 		}
526 
527 	}
528 
529 	setfile(name, s->z_fd, sb);
530 	if (!err)
531 		err = close(s->z_fd);
532 	else
533 		(void)close(s->z_fd);
534 
535 	free(s);
536 
537 	return err;
538 }
539 
540