xref: /openbsd/usr.bin/compress/gzopen.c (revision af40fb16)
1 /*	$OpenBSD: gzopen.c,v 1.35 2022/06/18 03:23:19 gkoehler Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 #include <sys/stat.h>
62 #include <sys/uio.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <errno.h>
67 #include <unistd.h>
68 #include <limits.h>
69 #include <zlib.h>
70 #include "compress.h"
71 
72 /* gzip flag byte */
73 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
74 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
75 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
76 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
77 #define COMMENT      0x10 /* bit 4 set: file comment present */
78 #define RESERVED     0xE0 /* bits 5..7: reserved */
79 
80 #define DEF_MEM_LEVEL 8
81 #define OS_CODE 0x03 /* unix */
82 
83 typedef
84 struct gz_stream {
85 	int	z_fd;		/* .gz file */
86 	int	z_eof;		/* set if end of input file */
87 	z_stream z_stream;	/* libz stream */
88 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
89 	char	z_mode;		/* 'w' or 'r' */
90 	u_int32_t z_time;	/* timestamp (mtime) */
91 	u_int32_t z_crc;	/* crc32 of uncompressed data */
92 	u_int32_t z_hlen;	/* length of the gz header */
93 	u_int64_t z_total_in;	/* # bytes in */
94 	u_int64_t z_total_out;	/* # bytes out */
95 } gz_stream;
96 
97 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
98 
99 static u_int32_t get_int32(gz_stream *);
100 static int get_header(gz_stream *, char *, int);
101 static int get_byte(gz_stream *);
102 
103 void *
gz_ropen(int fd,char * name,int gotmagic)104 gz_ropen(int fd, char *name, int gotmagic)
105 {
106 	gz_stream *s;
107 
108 	if (fd < 0)
109 		return NULL;
110 
111 	if ((s = calloc(1, sizeof(gz_stream))) == NULL)
112 		return NULL;
113 
114 	s->z_stream.zalloc = (alloc_func)0;
115 	s->z_stream.zfree = (free_func)0;
116 	s->z_stream.opaque = (voidpf)0;
117 	s->z_stream.next_in = Z_NULL;
118 	s->z_stream.next_out = Z_NULL;
119 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
120 	s->z_fd = 0;
121 	s->z_eof = 0;
122 	s->z_time = 0;
123 	s->z_hlen = 0;
124 	s->z_total_in = 0;
125 	s->z_total_out = 0;
126 	s->z_crc = crc32(0L, Z_NULL, 0);
127 	s->z_mode = 'r';
128 
129 	if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
130 		free (s);
131 		return NULL;
132 	}
133 	s->z_stream.next_in = s->z_buf;
134 	s->z_stream.avail_out = Z_BUFSIZE;
135 
136 	errno = 0;
137 	s->z_fd = fd;
138 
139 	/* read the .gz header */
140 	if (get_header(s, name, gotmagic) != 0) {
141 		gz_close(s, NULL, NULL, NULL);
142 		s = NULL;
143 	}
144 
145 	return s;
146 }
147 
148 static int
get_byte(gz_stream * s)149 get_byte(gz_stream *s)
150 {
151 	if (s->z_eof)
152 		return EOF;
153 
154 	if (s->z_stream.avail_in == 0) {
155 		errno = 0;
156 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
157 		if ((int)s->z_stream.avail_in <= 0) {
158 			s->z_eof = 1;
159 			return EOF;
160 		}
161 		s->z_stream.next_in = s->z_buf;
162 	}
163 	s->z_stream.avail_in--;
164 	return *s->z_stream.next_in++;
165 }
166 
167 static u_int32_t
get_int32(gz_stream * s)168 get_int32(gz_stream *s)
169 {
170 	u_int32_t x;
171 
172 	x  = ((u_int32_t)(get_byte(s) & 0xff));
173 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
174 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
175 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
176 	return x;
177 }
178 
179 static int
get_header(gz_stream * s,char * name,int gotmagic)180 get_header(gz_stream *s, char *name, int gotmagic)
181 {
182 	int method; /* method byte */
183 	int flags;  /* flags byte */
184 	char *ep;
185 	uInt len;
186 	int c;
187 
188 	/* Check the gzip magic header */
189 	if (!gotmagic) {
190 		for (len = 0; len < 2; len++) {
191 			c = get_byte(s);
192 			if (c != gz_magic[len]) {
193 				errno = EFTYPE;
194 				return -1;
195 			}
196 		}
197 	}
198 
199 	method = get_byte(s);
200 	flags = get_byte(s);
201 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
202 		errno = EFTYPE;
203 		return -1;
204 	}
205 
206 	/* Stash timestamp (mtime) */
207 	s->z_time = get_int32(s);
208 
209 	/* Discard xflags and OS code */
210 	(void)get_byte(s);
211 	(void)get_byte(s);
212 
213 	s->z_hlen += 10; /* magic, method, flags, time, xflags, OS code */
214 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
215 		len  =  (uInt)get_byte(s);
216 		len += ((uInt)get_byte(s))<<8;
217 		s->z_hlen += 2;
218 		/* len is garbage if EOF but the loop below will quit anyway */
219 		while (len-- != 0 && get_byte(s) != EOF)
220 			s->z_hlen++;
221 	}
222 
223 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
224 		if ((ep = name) != NULL)
225 			ep += PATH_MAX - 1;
226 		while ((c = get_byte(s)) != EOF) {
227 			s->z_hlen++;
228 			if (c == '\0')
229 				break;
230 			if (name < ep)
231 				*name++ = c;
232 		}
233 		if (name != NULL)
234 			*name = '\0';
235 	}
236 
237 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
238 		while ((c = get_byte(s)) != EOF) {
239 			s->z_hlen++;
240 			if (c == '\0')
241 				break;
242 		}
243 	}
244 
245 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
246 		(void)get_byte(s);
247 		(void)get_byte(s);
248 		s->z_hlen += 2;
249 	}
250 
251 	if (s->z_eof) {
252 		errno = EFTYPE;
253 		return -1;
254 	}
255 
256 	return 0;
257 }
258 
259 int
gz_read(void * cookie,char * buf,int len)260 gz_read(void *cookie, char *buf, int len)
261 {
262 	gz_stream *s = (gz_stream*)cookie;
263 	uLong old_total_in;
264 	u_char *start = buf; /* starting point for crc computation */
265 	int error = Z_OK;
266 
267 	/* z_stream.total_in might overflow uLong. */
268 	old_total_in = s->z_stream.total_in;
269 
270 	s->z_stream.next_out = buf;
271 	s->z_stream.avail_out = len;
272 
273 	while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
274 
275 		if (s->z_stream.avail_in == 0) {
276 
277 			errno = 0;
278 			s->z_stream.avail_in = read(s->z_fd, s->z_buf,
279 			    Z_BUFSIZE);
280 			if ((int)s->z_stream.avail_in <= 0)
281 				s->z_eof = 1;
282 			s->z_stream.next_in = s->z_buf;
283 		}
284 
285 		error = inflate(&(s->z_stream), Z_NO_FLUSH);
286 
287 		if (error == Z_DATA_ERROR) {
288 			errno = EINVAL;
289 			goto bad;
290 		}
291 		if (error == Z_BUF_ERROR) {
292 			errno = EIO;
293 			goto bad;
294 		}
295 		if (error == Z_STREAM_END) {
296 			/* Check CRC and original size */
297 			s->z_crc = crc32(s->z_crc, start,
298 			    (uInt)(s->z_stream.next_out - start));
299 			start = s->z_stream.next_out;
300 
301 			if (get_int32(s) != s->z_crc) {
302 				errno = EINVAL;
303 				goto bad;
304 			}
305 			if (get_int32(s) != (u_int32_t)s->z_stream.total_out) {
306 				errno = EIO;
307 				return -1;
308 			}
309 			s->z_hlen += 2 * sizeof(int32_t);
310 
311 			/* Check for the existence of an appended file. */
312 			if (get_header(s, NULL, 0) != 0) {
313 				s->z_eof = 1;
314 				break;
315 			}
316 			s->z_total_in += (uLong)(s->z_stream.total_in -
317 			    old_total_in);
318 			inflateReset(&(s->z_stream));
319 			s->z_crc = crc32(0L, Z_NULL, 0);
320 			old_total_in = 0;
321 			error = Z_OK;
322 		}
323 	}
324 	s->z_crc = crc32(s->z_crc, start,
325 	    (uInt)(s->z_stream.next_out - start));
326 	len -= s->z_stream.avail_out;
327 	s->z_total_in += (uLong)(s->z_stream.total_in - old_total_in);
328 	s->z_total_out += len;
329 	return (len);
330 bad:
331 	s->z_total_in += (uLong)(s->z_stream.total_in - old_total_in);
332 	s->z_total_out += (len - s->z_stream.avail_out);
333 	return (-1);
334 }
335 
336 #ifndef SMALL
337 static int
put_int32(gz_stream * s,u_int32_t x)338 put_int32(gz_stream *s, u_int32_t x)
339 {
340 	u_int32_t y = htole32(x);
341 
342 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
343 		return Z_ERRNO;
344 	return 0;
345 }
346 
347 static int
put_header(gz_stream * s,char * name,u_int32_t mtime,int bits)348 put_header(gz_stream *s, char *name, u_int32_t mtime, int bits)
349 {
350 	struct iovec iov[2];
351 	u_char buf[10];
352 
353 	buf[0] = gz_magic[0];
354 	buf[1] = gz_magic[1];
355 	buf[2] = Z_DEFLATED;
356 	buf[3] = name ? ORIG_NAME : 0;
357 	buf[4] = mtime & 0xff;
358 	buf[5] = (mtime >> 8) & 0xff;
359 	buf[6] = (mtime >> 16) & 0xff;
360 	buf[7] = (mtime >> 24) & 0xff;
361 	buf[8] = bits == 1 ? 4 : bits == 9 ? 2 : 0;	/* xflags */
362 	buf[9] = OS_CODE;
363 	iov[0].iov_base = buf;
364 	iov[0].iov_len = sizeof(buf);
365 	s->z_hlen = sizeof(buf);
366 
367 	if (name != NULL) {
368 		iov[1].iov_base = name;
369 		iov[1].iov_len = strlen(name) + 1;
370 		s->z_hlen += iov[1].iov_len;
371 	}
372 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
373 		return (-1);
374 	return (0);
375 }
376 
377 void *
gz_wopen(int fd,char * name,int bits,u_int32_t mtime)378 gz_wopen(int fd, char *name, int bits, u_int32_t mtime)
379 {
380 	gz_stream *s;
381 
382 	if (fd < 0)
383 		return NULL;
384 
385 	if (bits < 0 || bits > Z_BEST_COMPRESSION) {
386 		errno = EINVAL;
387 		return NULL;
388 	}
389 	if ((s = calloc(1, sizeof(gz_stream))) == NULL)
390 		return NULL;
391 
392 	s->z_stream.zalloc = (alloc_func)0;
393 	s->z_stream.zfree = (free_func)0;
394 	s->z_stream.opaque = (voidpf)0;
395 	s->z_stream.next_in = Z_NULL;
396 	s->z_stream.next_out = Z_NULL;
397 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
398 	s->z_fd = 0;
399 	s->z_eof = 0;
400 	s->z_time = 0;
401 	s->z_hlen = 0;
402 	s->z_total_in = 0;
403 	s->z_total_out = 0;
404 	s->z_crc = crc32(0L, Z_NULL, 0);
405 	s->z_mode = 'w';
406 
407 	/* windowBits is passed < 0 to suppress zlib header */
408 	if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
409 			 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
410 		free (s);
411 		return NULL;
412 	}
413 	s->z_stream.next_out = s->z_buf;
414 	s->z_stream.avail_out = Z_BUFSIZE;
415 
416 	errno = 0;
417 	s->z_fd = fd;
418 
419 	/* write the .gz header */
420 	if (put_header(s, name, mtime, bits) != 0) {
421 		gz_close(s, NULL, NULL, NULL);
422 		s = NULL;
423 	}
424 
425 	return s;
426 }
427 int
gz_write(void * cookie,const char * buf,int len)428 gz_write(void *cookie, const char *buf, int len)
429 {
430 	gz_stream *s = (gz_stream*)cookie;
431 
432 	s->z_stream.next_in = (char *)buf;
433 	s->z_stream.avail_in = len;
434 
435 	while (s->z_stream.avail_in != 0) {
436 		if (s->z_stream.avail_out == 0) {
437 			s->z_total_out += Z_BUFSIZE;
438 
439 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
440 				break;
441 			s->z_stream.next_out = s->z_buf;
442 			s->z_stream.avail_out = Z_BUFSIZE;
443 		}
444 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
445 			break;
446 	}
447 	s->z_crc = crc32(s->z_crc, buf, len);
448 
449 	len -= s->z_stream.avail_in;
450 	s->z_total_in += len;
451 	return len;
452 }
453 
454 int
gz_flush(void * cookie,int flush)455 gz_flush(void *cookie, int flush)
456 {
457 	gz_stream *s = (gz_stream*)cookie;
458 	size_t len;
459 	int done = 0;
460 	int err;
461 
462 	if (s == NULL || s->z_mode != 'w') {
463 		errno = EBADF;
464 		return Z_ERRNO;
465 	}
466 
467 	s->z_stream.avail_in = 0; /* should be zero already anyway */
468 
469 	for (;;) {
470 		len = Z_BUFSIZE - s->z_stream.avail_out;
471 
472 		if (len != 0) {
473 			s->z_total_out += len;
474 
475 			if (write(s->z_fd, s->z_buf, len) != len)
476 				return Z_ERRNO;
477 			s->z_stream.next_out = s->z_buf;
478 			s->z_stream.avail_out = Z_BUFSIZE;
479 		}
480 		if (done)
481 			break;
482 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
483 		    err != Z_STREAM_END)
484 			return err;
485 
486 		/* deflate has finished flushing only when it hasn't
487 		 * used up all the available space in the output buffer
488 		 */
489 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
490 	}
491 	return 0;
492 }
493 #endif
494 
495 int
gz_close(void * cookie,struct z_info * info,const char * name,struct stat * sb)496 gz_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
497 {
498 	gz_stream *s = (gz_stream*)cookie;
499 	int err = 0;
500 
501 	if (s == NULL)
502 		return -1;
503 
504 #ifndef SMALL
505 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
506 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
507 			s->z_hlen += sizeof(int32_t);
508 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
509 				s->z_hlen += sizeof(int32_t);
510 		}
511 	}
512 #endif
513 	if (!err && s->z_stream.state != NULL) {
514 		if (s->z_mode == 'w')
515 #ifndef SMALL
516 			err = deflateEnd(&s->z_stream);
517 #else
518 			err = -1;
519 #endif
520 		else if (s->z_mode == 'r')
521 			err = inflateEnd(&s->z_stream);
522 	}
523 
524 	if (info != NULL) {
525 		info->mtime = s->z_time;
526 		info->crc = s->z_crc;
527 		info->hlen = s->z_hlen;
528 		info->total_in = s->z_total_in;
529 		info->total_out = s->z_total_out;
530 	}
531 
532 	setfile(name, s->z_fd, sb);
533 	if (!err)
534 		err = close(s->z_fd);
535 	else
536 		(void)close(s->z_fd);
537 
538 	free(s);
539 
540 	return err;
541 }
542 
543