xref: /illumos-gate/usr/src/cmd/pack/pack.c (revision e8031f0a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  *	Huffman encoding program
34  *	Usage:	pack [[ -f ] [ - ] filename ... ] filename ...
35  *		- option: enable/disable listing of statistics
36  */
37 
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <unistd.h>
42 #include <locale.h>
43 #include <stdarg.h>
44 #include <errno.h>
45 #include <sys/isa_defs.h>
46 #include <stdlib.h>
47 #include <limits.h>
48 #include <sys/param.h>
49 #include <fcntl.h>
50 #include <utime.h>
51 #include <string.h>
52 #include <dirent.h>
53 #include <unistd.h>
54 #include <sys/acl.h>
55 #include <aclutils.h>
56 
57 #undef lint
58 
59 #define	END	256
60 #define	PACKED 017436 /* <US><RS> - Unlikely value */
61 #define	SUF0	'.'
62 #define	SUF1	'z'
63 
64 struct stat status, ostatus;
65 static struct utimbuf u_times;
66 
67 /* union for overlaying a long int with a set of four characters */
68 union FOUR {
69 	struct { long int lng; } lint;
70 	struct { char c0, c1, c2, c3; } chars;
71 };
72 
73 /* character counters */
74 long	count [END+1];
75 union	FOUR insize;
76 long	outsize;
77 long	dictsize;
78 int	diffbytes;
79 
80 /* i/o stuff */
81 char	vflag = 0;
82 int	force = 0;	/* allow forced packing for consistency in directory */
83 
84 static	char filename [MAXPATHLEN];
85 static int max_name;
86 static int max_path = MAXPATHLEN;
87 
88 int	infile;		/* unpacked file */
89 int	outfile;	/* packed file */
90 char	inbuff [BUFSIZ];
91 char	outbuff [BUFSIZ+4];
92 
93 /* variables associated with the tree */
94 int	maxlev;
95 int	levcount [25];
96 int	lastnode;
97 int	parent [2*END+1];
98 
99 /* variables associated with the encoding process */
100 char	length [END+1];
101 long	bits [END+1];
102 union	FOUR mask;
103 long	inc;
104 #if defined(_LITTLE_ENDIAN)
105 char	*maskshuff[4]  = {&(mask.chars.c3),
106 			    &(mask.chars.c2),
107 			    &(mask.chars.c1),
108 			    &(mask.chars.c0)};
109 #elif defined(_BIG_ENDIAN)
110 char	*maskshuff[4]  = {&(mask.chars.c0),
111 			    &(mask.chars.c1),
112 			    &(mask.chars.c2),
113 			    &(mask.chars.c3)};
114 #else
115 #error Unknown byte ordering!
116 #endif
117 
118 /* the heap */
119 int	n;
120 struct	heap {
121 	long int count;
122 	int node;
123 } heap [END+2];
124 #define	hmove(a, b) {(b).count = (a).count; (b).node = (a).node; }
125 
126 static void heapify(int i);
127 static int mv_xattrs(int, int, char *, int);
128 
129 /* gather character frequency statistics */
130 /* return 1 if successful, 0 otherwise */
131 int
132 input(char *source)
133 {
134 	register int i;
135 	for (i = 0; i < END; i++)
136 		count[i] = 0;
137 	while ((i = read(infile, inbuff, BUFSIZ)) > 0)
138 		while (i > 0)
139 			count[inbuff[--i]&0377] += 2;
140 	if (i == 0)
141 		return (1);
142 	fprintf(stderr, gettext(
143 		"pack: %s: read error - file unchanged: "), source);
144 	perror("");
145 	return (0);
146 }
147 
148 /* encode the current file */
149 /* return 1 if successful, 0 otherwise */
150 int
151 output(char *source)
152 {
153 	int c, i, inleft;
154 	char *inp;
155 	register char **q, *outp;
156 	register int bitsleft;
157 	long temp;
158 
159 	/* output ``PACKED'' header */
160 	outbuff[0] = 037; 	/* ascii US */
161 	outbuff[1] = 036; 	/* ascii RS */
162 	/* output the length and the dictionary */
163 	temp = insize.lint.lng;
164 	for (i = 5; i >= 2; i--) {
165 		outbuff[i] =  (char)(temp & 0377);
166 		temp >>= 8;
167 	}
168 	outp = &outbuff[6];
169 	*outp++ = maxlev;
170 	for (i = 1; i < maxlev; i++)
171 		*outp++ = levcount[i];
172 	*outp++ = levcount[maxlev]-2;
173 	for (i = 1; i <= maxlev; i++)
174 		for (c = 0; c < END; c++)
175 			if (length[c] == i)
176 				*outp++ = c;
177 	dictsize = outp-&outbuff[0];
178 
179 	/* output the text */
180 	lseek(infile, 0L, 0);
181 	outsize = 0;
182 	bitsleft = 8;
183 	inleft = 0;
184 	do {
185 		if (inleft <= 0) {
186 			inleft = read(infile, inp = &inbuff[0], BUFSIZ);
187 			if (inleft < 0) {
188 				fprintf(stderr, gettext(
189 				    "pack: %s: read error - file unchanged: "),
190 					    source);
191 				perror("");
192 				return (0);
193 			}
194 		}
195 		c = (--inleft < 0) ? END : (*inp++ & 0377);
196 		mask.lint.lng = bits[c]<<bitsleft;
197 		q = &maskshuff[0];
198 		if (bitsleft == 8)
199 			*outp = **q++;
200 		else
201 			*outp |= **q++;
202 		bitsleft -= length[c];
203 		while (bitsleft < 0) {
204 			*++outp = **q++;
205 			bitsleft += 8;
206 		}
207 		if (outp >= &outbuff[BUFSIZ]) {
208 			if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) {
209 wrerr:				fprintf(stderr, gettext(
210 				"pack: %s.z: write error - file unchanged: "),
211 					source);
212 				perror("");
213 				return (0);
214 			}
215 			((union FOUR *)outbuff)->lint.lng =
216 				    ((union FOUR *)&outbuff[BUFSIZ])->lint.lng;
217 			outp -= BUFSIZ;
218 			outsize += BUFSIZ;
219 		}
220 	} while (c != END);
221 	if (bitsleft < 8)
222 		outp++;
223 	c = outp-outbuff;
224 	if (write(outfile, outbuff, c) != c)
225 		goto wrerr;
226 	outsize += c;
227 	return (1);
228 }
229 
230 /* makes a heap out of heap[i],...,heap[n] */
231 void
232 heapify(int i)
233 {
234 	register int k;
235 	int lastparent;
236 	struct heap heapsubi;
237 	hmove(heap[i], heapsubi);
238 	lastparent = n/2;
239 	while (i <= lastparent) {
240 		k = 2*i;
241 		if (heap[k].count > heap[k+1].count && k < n)
242 			k++;
243 		if (heapsubi.count < heap[k].count)
244 			break;
245 		hmove(heap[k], heap[i]);
246 		i = k;
247 	}
248 	hmove(heapsubi, heap[i]);
249 }
250 
251 /* return 1 after successful packing, 0 otherwise */
252 int
253 packfile(char *source)
254 {
255 	register int c, i, p;
256 	long bitsout;
257 
258 	/* gather frequency statistics */
259 	if (input(source) == 0)
260 		return (0);
261 
262 	/* put occurring chars in heap with their counts */
263 	diffbytes = -1;
264 	count[END] = 1;
265 	insize.lint.lng = n = 0;
266 	for (i = END; i >= 0; i--) {
267 		parent[i] = 0;
268 		if (count[i] > 0) {
269 			diffbytes++;
270 			insize.lint.lng += count[i];
271 			heap[++n].count = count[i];
272 			heap[n].node = i;
273 		}
274 	}
275 	if (diffbytes == 1) {
276 		fprintf(stderr, gettext(
277 			"pack: %s: trivial file - file unchanged\n"), source);
278 		return (0);
279 	}
280 	insize.lint.lng >>= 1;
281 	for (i = n/2; i >= 1; i--)
282 		heapify(i);
283 
284 	/* build Huffman tree */
285 	lastnode = END;
286 	while (n > 1) {
287 		parent[heap[1].node] = ++lastnode;
288 		inc = heap[1].count;
289 		hmove(heap[n], heap[1]);
290 		n--;
291 		heapify(1);
292 		parent[heap[1].node] = lastnode;
293 		heap[1].node = lastnode;
294 		heap[1].count += inc;
295 		heapify(1);
296 	}
297 	parent[lastnode] = 0;
298 
299 	/* assign lengths to encoding for each character */
300 	bitsout = maxlev = 0;
301 	for (i = 1; i <= 24; i++)
302 		levcount[i] = 0;
303 	for (i = 0; i <= END; i++) {
304 		c = 0;
305 		for (p = parent[i]; p != 0; p = parent[p])
306 			c++;
307 		levcount[c]++;
308 		length[i] = c;
309 		if (c > maxlev)
310 			maxlev = c;
311 		bitsout += c*(count[i]>>1);
312 	}
313 	if (maxlev > 24) {
314 		/* can't occur unless insize.lint.lng >= 2**24 */
315 		fprintf(stderr, gettext(
316 	"pack: %s: Huffman tree has too many levels - file unchanged\n"),
317 			source);
318 		return (0);
319 	}
320 
321 	/* don't bother if no compression results */
322 	outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
323 	if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <=
324 				    (outsize+BUFSIZ-1)/BUFSIZ && !force) {
325 		printf(gettext(
326 			"pack: %s: no saving - file unchanged\n"), source);
327 		return (0);
328 	}
329 
330 	/* compute bit patterns for each character */
331 	inc = 1L << 24;
332 	inc >>= maxlev;
333 	mask.lint.lng = 0;
334 	for (i = maxlev; i > 0; i--) {
335 		for (c = 0; c <= END; c++)
336 			if (length[c] == i) {
337 				bits[c] = mask.lint.lng;
338 				mask.lint.lng += inc;
339 			}
340 		mask.lint.lng &= ~inc;
341 		inc <<= 1;
342 	}
343 
344 	return (output(source));
345 }
346 
347 int
348 main(int argc, char *argv[])
349 {
350 	extern  int optind;
351 	register int i;
352 	register char *cp;
353 	int k, sep, errflg = 0;
354 	int c;
355 	int error;
356 	int fcount = 0; /* count failures */
357 	acl_t *aclp = NULL;
358 
359 	(void) setlocale(LC_ALL, "");
360 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
361 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
362 #endif
363 	(void) textdomain(TEXT_DOMAIN);
364 
365 	while ((c = getopt(argc, argv, "f-")) != EOF) {
366 		if (c == 'f')
367 			force++;
368 		else
369 			++errflg;
370 	}
371 	/*
372 	 * Check for invalid option.  Also check for missing
373 	 * file operand, ie: "pack" or "pack -".
374 	 */
375 	argc -= optind;
376 	argv = &argv[optind];
377 	if (errflg || argc < 1 ||
378 		(argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) {
379 		fprintf(stderr, gettext(
380 			"usage: pack [-f] [-] file...\n"));
381 		if (argc < 1 ||
382 			(argc == 1 && argv[0][0] == '-' &&
383 				argv[0][1] == '\0')) {
384 			/*
385 			 * return 1 for usage error when no file was specified
386 			 */
387 			return (1);
388 		}
389 	}
390 	/* loop through the file names */
391 	for (k = 0; k < argc; k++) {
392 		if (argv[k][0] == '-' && argv[k][1] == '\0') {
393 			vflag = 1 - vflag;
394 			continue;
395 		}
396 		fcount++; /* increase failure count - expect the worst */
397 		if (errflg) {
398 			/*
399 			 * invalid option; just count the number of files not
400 			 * packed
401 			 */
402 			continue;
403 		}
404 		/* remove any .z suffix the user may have added */
405 		for (cp = argv[k]; *cp != '\0'; ++cp)
406 			;
407 		if (cp[-1] == SUF1 && cp[-2] == SUF0) {
408 			*cp-- = '\0'; *cp-- = '\0'; *cp = '\0';
409 		}
410 		sep = -1;  cp = filename;
411 		max_name = pathconf(argv[k], _PC_NAME_MAX);
412 		if (max_name == -1) {
413 			/* pathname invalid or no limit on length of filename */
414 			max_name = _POSIX_NAME_MAX;
415 		}
416 		/* copy argv[k] to filename and count chars in base name */
417 		for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++)
418 			if (*cp++ == '/') sep = i;
419 		if ((infile = open(filename, 0)) < 0) {
420 			fprintf(stderr, gettext(
421 				"pack: %s: cannot open: "), filename);
422 			perror("");
423 			continue;
424 		}
425 		if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) {
426 			fprintf(stderr, gettext(
427 				"pack: %s: file name too long\n"), argv[k]);
428 			continue;
429 		}
430 		fstat(infile, &status);
431 		if (S_ISDIR(status.st_mode)) {
432 			fprintf(stderr, gettext(
433 				"pack: %s: cannot pack a directory\n"),
434 				    argv[k]);
435 			goto closein;
436 		}
437 		if (status.st_size == 0) {
438 			fprintf(stderr, gettext(
439 				"pack: %s: cannot pack a zero length file\n"),
440 				argv[k]);
441 			goto closein;
442 		}
443 		if (status.st_nlink != 1) {
444 			fprintf(stderr, gettext(
445 				"pack: %s: has links\n"),
446 				argv[k]);
447 			goto closein;
448 		}
449 		*cp++ = SUF0;  *cp++ = SUF1;  *cp = '\0';
450 		if (stat(filename, &ostatus) != -1) {
451 			fprintf(stderr, gettext(
452 				"pack: %s: already exists\n"), filename);
453 			goto closein;
454 		}
455 
456 		if ((outfile = creat(filename, status.st_mode)) < 0) {
457 			fprintf(stderr, gettext(
458 				"pack: %s: cannot create: "), filename);
459 			perror("");
460 			goto closein;
461 		}
462 
463 		error = facl_get(infile, ACL_NO_TRIVIAL, &aclp);
464 
465 		if (error != 0) {
466 			fprintf(stderr, gettext(
467 			    "pack: %s: cannot retrieve ACL: %s\n"), argv[k],
468 			    acl_strerror(error));
469 		}
470 		if (packfile(argv[k]) &&
471 		    ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) ||
472 				(mv_xattrs(infile, outfile,
473 					argv[k], 0) == 0))) {
474 			if (unlink(argv[k]) != 0) {
475 				fprintf(stderr, gettext(
476 					"pack: %s: cannot unlink: "),
477 					argv[k]);
478 				perror("");
479 			}
480 			printf(gettext(
481 				"pack: %s: %.1f%% Compression\n"),
482 				argv[k],
483 	    ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100);
484 			/* output statistics */
485 			if (vflag) {
486 				printf(gettext("\tfrom %ld to %ld bytes\n"),
487 					insize.lint.lng, outsize);
488 				printf(gettext(
489 				"\tHuffman tree has %d levels below root\n"),
490 				    maxlev);
491 				printf(gettext(
492 					"\t%d distinct bytes in input\n"),
493 					diffbytes);
494 				printf(gettext(
495 					"\tdictionary overhead = %ld bytes\n"),
496 					dictsize);
497 				printf(gettext(
498 				    "\teffective  entropy  = %.2f bits/byte\n"),
499 			    ((double)outsize / (double)insize.lint.lng) * 8);
500 				printf(gettext(
501 				    "\tasymptotic entropy  = %.2f bits/byte\n"),
502 					((double)(outsize-dictsize) /
503 					(double)insize.lint.lng) * 8);
504 			}
505 			u_times.actime = status.st_atime;
506 			u_times.modtime = status.st_mtime;
507 			if (utime(filename, &u_times) != 0) {
508 				errflg++;
509 				fprintf(stderr,
510 					gettext(
511 					"pack: cannot change times on %s: "),
512 					filename);
513 				perror("");
514 			}
515 			if (chmod(filename, status.st_mode) != 0) {
516 				errflg++;
517 				fprintf(stderr,
518 					gettext(
519 				"pack: can't change mode to %o on %s: "),
520 					status.st_mode, filename);
521 				perror("");
522 			}
523 			chown(filename, status.st_uid, status.st_gid);
524 			if (aclp && (facl_set(outfile, aclp) < 0)) {
525 				fprintf(stderr, gettext(
526 				    "pack: %s: failed to set acl entries\n"),
527 				    filename);
528 				perror("");
529 			}
530 			if (!errflg)
531 				fcount--;  /* success after all */
532 		} else {
533 			if (pathconf(filename, _PC_XATTR_EXISTS) == 1) {
534 				(void) mv_xattrs(outfile, infile, filename, 1);
535 			}
536 			unlink(filename);
537 		}
538 
539 		if (aclp)
540 			acl_free(aclp);
541 
542 closein:	close(outfile);
543 		close(infile);
544 	}
545 	return (fcount);
546 }
547 
548 /*
549  * mv_xattrs - move (via renameat) all of the extended attributes
550  *	associated with the file referenced by infd to the file
551  *	referenced by outfd.  The infile and silent arguments are
552  *	provided for error message processing.  This function
553  *	returns 0 on success and -1 on error.
554  */
555 static int
556 mv_xattrs(int infd, int outfd, char *infile, int silent)
557 {
558 	int indfd, outdfd, tmpfd;
559 	DIR *dirp = NULL;
560 	struct dirent *dp = NULL;
561 	int error = 0;
562 	char *etext;
563 
564 	indfd = outdfd = tmpfd = -1;
565 
566 	if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) {
567 		etext = gettext("cannot open source");
568 		error = -1;
569 		goto out;
570 	}
571 
572 	if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) {
573 		etext = gettext("cannot open target");
574 		error = -1;
575 		goto out;
576 	}
577 
578 	if ((tmpfd = dup(indfd)) == -1) {
579 		etext = gettext("cannot dup descriptor");
580 		error = -1;
581 		goto out;
582 
583 	}
584 	if ((dirp = fdopendir(tmpfd)) == NULL) {
585 		etext = gettext("cannot access source");
586 		error = -1;
587 		goto out;
588 	}
589 
590 	while (dp = readdir(dirp)) {
591 		if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') ||
592 		    (dp->d_name[0] == '.' && dp->d_name[1] == '.' &&
593 		    dp->d_name[2] == '\0'))
594 			continue;
595 		if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) {
596 			etext = dp->d_name;
597 			error = -1;
598 			goto out;
599 		}
600 	}
601 out:
602 	if (error == -1 && silent == 0) {
603 		fprintf(stderr, gettext(
604 			"pack: %s: cannot move extended attributes, "),
605 			infile);
606 		perror(etext);
607 	}
608 	if (dirp)
609 		closedir(dirp);
610 	if (indfd != -1)
611 		close(indfd);
612 	if (outdfd != -1)
613 		close(outdfd);
614 	return (error);
615 }
616