xref: /minix/external/bsd/file/dist/src/compress.c (revision 0a6a1f1d)
1 /*	$NetBSD: compress.c,v 1.10 2015/01/02 21:15:32 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * compress routines:
32  *	zmagic() - returns 0 if not recognized, uncompresses and prints
33  *		   information if recognized
34  *	uncompress(method, old, n, newch) - uncompress old into new,
35  *					    using method, return sizeof new
36  */
37 #include "file.h"
38 
39 #ifndef lint
40 #if 0
41 FILE_RCSID("@(#)$File: compress.c,v 1.77 2014/12/12 16:33:01 christos Exp $")
42 #else
43 __RCSID("$NetBSD: compress.c,v 1.10 2015/01/02 21:15:32 christos Exp $");
44 #endif
45 #endif
46 
47 #include "magic.h"
48 #include <stdlib.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #include <string.h>
53 #include <errno.h>
54 #include <signal.h>
55 #if !defined(__MINGW32__) && !defined(WIN32)
56 #include <sys/ioctl.h>
57 #endif
58 #ifdef HAVE_SYS_WAIT_H
59 #include <sys/wait.h>
60 #endif
61 #if defined(HAVE_SYS_TIME_H)
62 #include <sys/time.h>
63 #endif
64 #if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
65 #define BUILTIN_DECOMPRESS
66 #include <zlib.h>
67 #endif
68 
69 private const struct {
70 	const char magic[8];
71 	size_t maglen;
72 	const char *argv[3];
73 	int silent;
74 } compr[] = {
75 	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
76 	/* Uncompress can get stuck; so use gzip first if we have it
77 	 * Idea from Damien Clark, thanks! */
78 	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
79 	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
80 	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
81 	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
82 	/* the standard pack utilities do not accept standard input */
83 	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
84 	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
85 					    /* ...only first file examined */
86 	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
87 	{ "LZIP",     4, { "lzip", "-cdq", NULL }, 1 },
88  	{ "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 },		/* XZ Utils */
89  	{ "LRZI",     4, { "lrzip", "-dqo-", NULL }, 1 },	/* LRZIP */
90  	{ "\004\"M\030", 4, { "lz4", "-cd", NULL }, 1 },	/* LZ4 */
91 };
92 
93 #define NODATA ((size_t)~0)
94 
95 private ssize_t swrite(int, const void *, size_t);
96 #if HAVE_FORK
97 private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
98 private size_t uncompressbuf(struct magic_set *, int, size_t,
99     const unsigned char *, unsigned char **, size_t);
100 #ifdef BUILTIN_DECOMPRESS
101 private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
102     unsigned char **, size_t);
103 #endif
104 
105 protected int
file_zmagic(struct magic_set * ms,int fd,const char * name,const unsigned char * buf,size_t nbytes)106 file_zmagic(struct magic_set *ms, int fd, const char *name,
107     const unsigned char *buf, size_t nbytes)
108 {
109 	unsigned char *newbuf = NULL;
110 	size_t i, nsz;
111 	int rv = 0;
112 	int mime = ms->flags & MAGIC_MIME;
113 	sig_t osigpipe;
114 
115 	if ((ms->flags & MAGIC_COMPRESS) == 0)
116 		return 0;
117 
118 	osigpipe = signal(SIGPIPE, SIG_IGN);
119 	for (i = 0; i < ncompr; i++) {
120 		if (nbytes < compr[i].maglen)
121 			continue;
122 		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
123 		    (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
124 		    nbytes)) != NODATA) {
125 			ms->flags &= ~MAGIC_COMPRESS;
126 			rv = -1;
127 			if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
128 				goto error;
129 
130 			if (mime == MAGIC_MIME || mime == 0) {
131 				if (file_printf(ms, mime ?
132 				    " compressed-encoding=" : " (") == -1)
133 					goto error;
134 				if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
135 					goto error;
136 				if (!mime && file_printf(ms, ")") == -1)
137 					goto error;
138 			}
139 
140 			rv = 1;
141 			break;
142 		}
143 	}
144 error:
145 	(void)signal(SIGPIPE, osigpipe);
146 	free(newbuf);
147 	ms->flags |= MAGIC_COMPRESS;
148 	return rv;
149 }
150 #endif
151 /*
152  * `safe' write for sockets and pipes.
153  */
154 private ssize_t
swrite(int fd,const void * buf,size_t n)155 swrite(int fd, const void *buf, size_t n)
156 {
157 	ssize_t rv;
158 	size_t rn = n;
159 
160 	do
161 		switch (rv = write(fd, buf, n)) {
162 		case -1:
163 			if (errno == EINTR)
164 				continue;
165 			return -1;
166 		default:
167 			n -= rv;
168 			buf = CAST(const char *, buf) + rv;
169 			break;
170 		}
171 	while (n > 0);
172 	return rn;
173 }
174 
175 
176 /*
177  * `safe' read for sockets and pipes.
178  */
179 protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)180 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
181 {
182 	ssize_t rv;
183 #ifdef FIONREAD
184 	int t = 0;
185 #endif
186 	size_t rn = n;
187 
188 	if (fd == STDIN_FILENO)
189 		goto nocheck;
190 
191 #ifdef FIONREAD
192 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
193 #ifdef FD_ZERO
194 		ssize_t cnt;
195 		for (cnt = 0;; cnt++) {
196 			fd_set check;
197 			struct timeval tout = {0, 100 * 1000};
198 			int selrv;
199 
200 			FD_ZERO(&check);
201 			FD_SET(fd, &check);
202 
203 			/*
204 			 * Avoid soft deadlock: do not read if there
205 			 * is nothing to read from sockets and pipes.
206 			 */
207 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
208 			if (selrv == -1) {
209 				if (errno == EINTR || errno == EAGAIN)
210 					continue;
211 			} else if (selrv == 0 && cnt >= 5) {
212 				return 0;
213 			} else
214 				break;
215 		}
216 #endif
217 		(void)ioctl(fd, FIONREAD, &t);
218 	}
219 
220 	if (t > 0 && (size_t)t < n) {
221 		n = t;
222 		rn = n;
223 	}
224 #endif
225 
226 nocheck:
227 	do
228 		switch ((rv = read(fd, buf, n))) {
229 		case -1:
230 			if (errno == EINTR)
231 				continue;
232 			return -1;
233 		case 0:
234 			return rn - n;
235 		default:
236 			n -= rv;
237 			buf = ((char *)buf) + rv;
238 			break;
239 		}
240 	while (n > 0);
241 	return rn;
242 }
243 
244 protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)245 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
246     size_t nbytes)
247 {
248 	char buf[4096];
249 	ssize_t r;
250 	int tfd;
251 
252 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
253 #ifndef HAVE_MKSTEMP
254 	{
255 		char *ptr = mktemp(buf);
256 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
257 		r = errno;
258 		(void)unlink(ptr);
259 		errno = r;
260 	}
261 #else
262 	{
263 		int te;
264 		tfd = mkstemp(buf);
265 		te = errno;
266 		(void)unlink(buf);
267 		errno = te;
268 	}
269 #endif
270 	if (tfd == -1) {
271 		file_error(ms, errno,
272 		    "cannot create temporary file for pipe copy");
273 		return -1;
274 	}
275 
276 	if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
277 		r = 1;
278 	else {
279 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
280 			if (swrite(tfd, buf, (size_t)r) != r)
281 				break;
282 	}
283 
284 	switch (r) {
285 	case -1:
286 		file_error(ms, errno, "error copying from pipe to temp file");
287 		return -1;
288 	case 0:
289 		break;
290 	default:
291 		file_error(ms, errno, "error while writing to temp file");
292 		return -1;
293 	}
294 
295 	/*
296 	 * We duplicate the file descriptor, because fclose on a
297 	 * tmpfile will delete the file, but any open descriptors
298 	 * can still access the phantom inode.
299 	 */
300 	if ((fd = dup2(tfd, fd)) == -1) {
301 		file_error(ms, errno, "could not dup descriptor for temp file");
302 		return -1;
303 	}
304 	(void)close(tfd);
305 	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
306 		file_badseek(ms);
307 		return -1;
308 	}
309 	return fd;
310 }
311 #if HAVE_FORK
312 #ifdef BUILTIN_DECOMPRESS
313 
314 #define FHCRC		(1 << 1)
315 #define FEXTRA		(1 << 2)
316 #define FNAME		(1 << 3)
317 #define FCOMMENT	(1 << 4)
318 
319 private size_t
uncompressgzipped(struct magic_set * ms,const unsigned char * old,unsigned char ** newch,size_t n)320 uncompressgzipped(struct magic_set *ms, const unsigned char *old,
321     unsigned char **newch, size_t n)
322 {
323 	unsigned char flg = old[3];
324 	size_t data_start = 10;
325 	z_stream z;
326 	int rc;
327 
328 	if (flg & FEXTRA) {
329 		if (data_start+1 >= n)
330 			return 0;
331 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
332 	}
333 	if (flg & FNAME) {
334 		while(data_start < n && old[data_start])
335 			data_start++;
336 		data_start++;
337 	}
338 	if(flg & FCOMMENT) {
339 		while(data_start < n && old[data_start])
340 			data_start++;
341 		data_start++;
342 	}
343 	if(flg & FHCRC)
344 		data_start += 2;
345 
346 	if (data_start >= n)
347 		return 0;
348 	if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) {
349 		return 0;
350 	}
351 
352 	/* XXX: const castaway, via strchr */
353 	z.next_in = (Bytef *)strchr((const char *)old + data_start,
354 	    old[data_start]);
355 	z.avail_in = CAST(uint32_t, (n - data_start));
356 	z.next_out = *newch;
357 	z.avail_out = HOWMANY;
358 	z.zalloc = Z_NULL;
359 	z.zfree = Z_NULL;
360 	z.opaque = Z_NULL;
361 
362 	/* LINTED bug in header macro */
363 	rc = inflateInit2(&z, -15);
364 	if (rc != Z_OK) {
365 		file_error(ms, 0, "zlib: %s", z.msg);
366 		return 0;
367 	}
368 
369 	rc = inflate(&z, Z_SYNC_FLUSH);
370 	if (rc != Z_OK && rc != Z_STREAM_END) {
371 		file_error(ms, 0, "zlib: %s", z.msg);
372 		return 0;
373 	}
374 
375 	n = (size_t)z.total_out;
376 	(void)inflateEnd(&z);
377 
378 	/* let's keep the nul-terminate tradition */
379 	(*newch)[n] = '\0';
380 
381 	return n;
382 }
383 #endif
384 
385 private size_t
uncompressbuf(struct magic_set * ms,int fd,size_t method,const unsigned char * old,unsigned char ** newch,size_t n)386 uncompressbuf(struct magic_set *ms, int fd, size_t method,
387     const unsigned char *old, unsigned char **newch, size_t n)
388 {
389 	int fdin[2], fdout[2];
390 	int status;
391 	ssize_t r;
392 
393 #ifdef BUILTIN_DECOMPRESS
394         /* FIXME: This doesn't cope with bzip2 */
395 	if (method == 2)
396 		return uncompressgzipped(ms, old, newch, n);
397 #endif
398 	(void)fflush(stdout);
399 	(void)fflush(stderr);
400 
401 	if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
402 		file_error(ms, errno, "cannot create pipe");
403 		return NODATA;
404 	}
405 	switch (fork()) {
406 	case 0:	/* child */
407 		(void) close(0);
408 		if (fd != -1) {
409 		    if (dup(fd) == -1)
410 			_exit(1);
411 		    (void) lseek(0, (off_t)0, SEEK_SET);
412 		} else {
413 		    if (dup(fdin[0]) == -1)
414 			_exit(1);
415 		    (void) close(fdin[0]);
416 		    (void) close(fdin[1]);
417 		}
418 
419 		(void) close(1);
420 		if (dup(fdout[1]) == -1)
421 			_exit(1);
422 		(void) close(fdout[0]);
423 		(void) close(fdout[1]);
424 #ifndef DEBUG
425 		if (compr[method].silent)
426 			(void)close(2);
427 #endif
428 
429 		(void)execvp(compr[method].argv[0],
430 		    (char *const *)(intptr_t)compr[method].argv);
431 #ifdef DEBUG
432 		(void)fprintf(stderr, "exec `%s' failed (%s)\n",
433 		    compr[method].argv[0], strerror(errno));
434 #endif
435 		exit(1);
436 		/*NOTREACHED*/
437 	case -1:
438 		file_error(ms, errno, "could not fork");
439 		return NODATA;
440 
441 	default: /* parent */
442 		(void) close(fdout[1]);
443 		if (fd == -1) {
444 			(void) close(fdin[0]);
445 			/*
446 			 * fork again, to avoid blocking because both
447 			 * pipes filled
448 			 */
449 			switch (fork()) {
450 			case 0: /* child */
451 				(void)close(fdout[0]);
452 				if (swrite(fdin[1], old, n) != (ssize_t)n) {
453 #ifdef DEBUG
454 					(void)fprintf(stderr,
455 					    "Write failed (%s)\n",
456 					    strerror(errno));
457 #endif
458 					exit(1);
459 				}
460 				exit(0);
461 				/*NOTREACHED*/
462 
463 			case -1:
464 #ifdef DEBUG
465 				(void)fprintf(stderr, "Fork failed (%s)\n",
466 				    strerror(errno));
467 #endif
468 				exit(1);
469 				/*NOTREACHED*/
470 
471 			default:  /* parent */
472 				if (wait(&status) == -1) {
473 #ifdef DEBUG
474 					(void)fprintf(stderr,
475 					    "Wait failed (%s)\n",
476 					    strerror(errno));
477 #endif
478 					exit(1);
479 				}
480 				exit(WIFEXITED(status) ?
481 				    WEXITSTATUS(status) : 1);
482 				/*NOTREACHED*/
483 			}
484 			(void) close(fdin[1]);
485 			fdin[1] = -1;
486 		}
487 
488 		if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
489 #ifdef DEBUG
490 			(void)fprintf(stderr, "Malloc failed (%s)\n",
491 			    strerror(errno));
492 #endif
493 			n = NODATA;
494 			goto err;
495 		}
496 		if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
497 #ifdef DEBUG
498 			(void)fprintf(stderr, "Read failed (%s)\n",
499 			    strerror(errno));
500 #endif
501 			free(*newch);
502 			n = NODATA;
503 			*newch = NULL;
504 			goto err;
505 		} else {
506 			n = r;
507 		}
508  		/* NUL terminate, as every buffer is handled here. */
509  		(*newch)[n] = '\0';
510 err:
511 		if (fdin[1] != -1)
512 			(void) close(fdin[1]);
513 		(void) close(fdout[0]);
514 		if (wait(&status) == -1) {
515 #ifdef DEBUG
516 			(void)fprintf(stderr, "Wait failed (%s)\n",
517 			    strerror(errno));
518 #endif
519 			n = NODATA;
520 		} else if (!WIFEXITED(status)) {
521 #ifdef DEBUG
522 			(void)fprintf(stderr, "Child not exited (0x%x)\n",
523 			    status);
524 #endif
525 		} else if (WEXITSTATUS(status) != 0) {
526 #ifdef DEBUG
527 			(void)fprintf(stderr, "Child exited (0x%d)\n",
528 			    WEXITSTATUS(status));
529 #endif
530 		}
531 
532 		(void) close(fdin[0]);
533 
534 		return n;
535 	}
536 }
537 #endif
538