xref: /openbsd/bin/pax/tar.c (revision db3296cf)
1 /*	$OpenBSD: tar.c,v 1.31 2003/06/26 00:10:18 deraadt Exp $	*/
2 /*	$NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1992 Keith Muller.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Keith Muller of the University of California, San Diego.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 #if 0
39 static const char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
40 #else
41 static const char rcsid[] = "$OpenBSD: tar.c,v 1.31 2003/06/26 00:10:18 deraadt Exp $";
42 #endif
43 #endif /* not lint */
44 
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <string.h>
50 #include <stdio.h>
51 #include <unistd.h>
52 #include <stdlib.h>
53 #include "pax.h"
54 #include "extern.h"
55 #include "tar.h"
56 
57 /*
58  * Routines for reading, writing and header identify of various versions of tar
59  */
60 
61 static u_long tar_chksm(char *, int);
62 static char *name_split(char *, int);
63 static int ul_oct(u_long, char *, int, int);
64 #ifndef LONG_OFF_T
65 static int uqd_oct(u_quad_t, char *, int, int);
66 #endif
67 
68 /*
69  * Routines common to all versions of tar
70  */
71 
72 static int tar_nodir;			/* do not write dirs under old tar */
73 char *gnu_hack_string;			/* GNU ././@LongLink hackery */
74 
75 /*
76  * tar_endwr()
77  *	add the tar trailer of two null blocks
78  * Return:
79  *	0 if ok, -1 otherwise (what wr_skip returns)
80  */
81 
82 int
83 tar_endwr(void)
84 {
85 	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
86 }
87 
88 /*
89  * tar_endrd()
90  *	no cleanup needed here, just return size of trailer (for append)
91  * Return:
92  *	size of trailer (2 * BLKMULT)
93  */
94 
95 off_t
96 tar_endrd(void)
97 {
98 	return((off_t)(NULLCNT*BLKMULT));
99 }
100 
101 /*
102  * tar_trail()
103  *	Called to determine if a header block is a valid trailer. We are passed
104  *	the block, the in_sync flag (which tells us we are in resync mode;
105  *	looking for a valid header), and cnt (which starts at zero) which is
106  *	used to count the number of empty blocks we have seen so far.
107  * Return:
108  *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
109  *	could never contain a header.
110  */
111 
112 int
113 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
114 {
115 	int i;
116 
117 	/*
118 	 * look for all zero, trailer is two consecutive blocks of zero
119 	 */
120 	for (i = 0; i < BLKMULT; ++i) {
121 		if (buf[i] != '\0')
122 			break;
123 	}
124 
125 	/*
126 	 * if not all zero it is not a trailer, but MIGHT be a header.
127 	 */
128 	if (i != BLKMULT)
129 		return(-1);
130 
131 	/*
132 	 * When given a zero block, we must be careful!
133 	 * If we are not in resync mode, check for the trailer. Have to watch
134 	 * out that we do not mis-identify file data as the trailer, so we do
135 	 * NOT try to id a trailer during resync mode. During resync mode we
136 	 * might as well throw this block out since a valid header can NEVER be
137 	 * a block of all 0 (we must have a valid file name).
138 	 */
139 	if (!in_resync && (++*cnt >= NULLCNT))
140 		return(0);
141 	return(1);
142 }
143 
144 /*
145  * ul_oct()
146  *	convert an unsigned long to an octal string. many oddball field
147  *	termination characters are used by the various versions of tar in the
148  *	different fields. term selects which kind to use. str is '0' padded
149  *	at the front to len. we are unable to use only one format as many old
150  *	tar readers are very cranky about this.
151  * Return:
152  *	0 if the number fit into the string, -1 otherwise
153  */
154 
155 static int
156 ul_oct(u_long val, char *str, int len, int term)
157 {
158 	char *pt;
159 
160 	/*
161 	 * term selects the appropriate character(s) for the end of the string
162 	 */
163 	pt = str + len - 1;
164 	switch(term) {
165 	case 3:
166 		*pt-- = '\0';
167 		break;
168 	case 2:
169 		*pt-- = ' ';
170 		*pt-- = '\0';
171 		break;
172 	case 1:
173 		*pt-- = ' ';
174 		break;
175 	case 0:
176 	default:
177 		*pt-- = '\0';
178 		*pt-- = ' ';
179 		break;
180 	}
181 
182 	/*
183 	 * convert and blank pad if there is space
184 	 */
185 	while (pt >= str) {
186 		*pt-- = '0' + (char)(val & 0x7);
187 		if ((val = val >> 3) == (u_long)0)
188 			break;
189 	}
190 
191 	while (pt >= str)
192 		*pt-- = '0';
193 	if (val != (u_long)0)
194 		return(-1);
195 	return(0);
196 }
197 
198 #ifndef LONG_OFF_T
199 /*
200  * uqd_oct()
201  *	convert an u_quad_t to an octal string. one of many oddball field
202  *	termination characters are used by the various versions of tar in the
203  *	different fields. term selects which kind to use. str is '0' padded
204  *	at the front to len. we are unable to use only one format as many old
205  *	tar readers are very cranky about this.
206  * Return:
207  *	0 if the number fit into the string, -1 otherwise
208  */
209 
210 static int
211 uqd_oct(u_quad_t val, char *str, int len, int term)
212 {
213 	char *pt;
214 
215 	/*
216 	 * term selects the appropriate character(s) for the end of the string
217 	 */
218 	pt = str + len - 1;
219 	switch(term) {
220 	case 3:
221 		*pt-- = '\0';
222 		break;
223 	case 2:
224 		*pt-- = ' ';
225 		*pt-- = '\0';
226 		break;
227 	case 1:
228 		*pt-- = ' ';
229 		break;
230 	case 0:
231 	default:
232 		*pt-- = '\0';
233 		*pt-- = ' ';
234 		break;
235 	}
236 
237 	/*
238 	 * convert and blank pad if there is space
239 	 */
240 	while (pt >= str) {
241 		*pt-- = '0' + (char)(val & 0x7);
242 		if ((val = val >> 3) == 0)
243 			break;
244 	}
245 
246 	while (pt >= str)
247 		*pt-- = '0';
248 	if (val != (u_quad_t)0)
249 		return(-1);
250 	return(0);
251 }
252 #endif
253 
254 /*
255  * tar_chksm()
256  *	calculate the checksum for a tar block counting the checksum field as
257  *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
258  *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
259  *	pad headers with 0.
260  * Return:
261  *	unsigned long checksum
262  */
263 
264 static u_long
265 tar_chksm(char *blk, int len)
266 {
267 	char *stop;
268 	char *pt;
269 	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
270 
271 	/*
272 	 * add the part of the block before the checksum field
273 	 */
274 	pt = blk;
275 	stop = blk + CHK_OFFSET;
276 	while (pt < stop)
277 		chksm += (u_long)(*pt++ & 0xff);
278 	/*
279 	 * move past the checksum field and keep going, spec counts the
280 	 * checksum field as the sum of 8 blanks (which is pre-computed as
281 	 * BLNKSUM).
282 	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
283 	 * starts, no point in summing zero's)
284 	 */
285 	pt += CHK_LEN;
286 	stop = blk + len;
287 	while (pt < stop)
288 		chksm += (u_long)(*pt++ & 0xff);
289 	return(chksm);
290 }
291 
292 /*
293  * Routines for old BSD style tar (also made portable to sysV tar)
294  */
295 
296 /*
297  * tar_id()
298  *	determine if a block given to us is a valid tar header (and not a USTAR
299  *	header). We have to be on the lookout for those pesky blocks of	all
300  *	zero's.
301  * Return:
302  *	0 if a tar header, -1 otherwise
303  */
304 
305 int
306 tar_id(char *blk, int size)
307 {
308 	HD_TAR *hd;
309 	HD_USTAR *uhd;
310 
311 	if (size < BLKMULT)
312 		return(-1);
313 	hd = (HD_TAR *)blk;
314 	uhd = (HD_USTAR *)blk;
315 
316 	/*
317 	 * check for block of zero's first, a simple and fast test, then make
318 	 * sure this is not a ustar header by looking for the ustar magic
319 	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
320 	 * wrong and create archives missing the \0. Last we check the
321 	 * checksum. If this is ok we have to assume it is a valid header.
322 	 */
323 	if (hd->name[0] == '\0')
324 		return(-1);
325 	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
326 		return(-1);
327 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
328 		return(-1);
329 	force_one_volume = 1;
330 	return(0);
331 }
332 
333 /*
334  * tar_opt()
335  *	handle tar format specific -o options
336  * Return:
337  *	0 if ok -1 otherwise
338  */
339 
340 int
341 tar_opt(void)
342 {
343 	OPLIST *opt;
344 
345 	while ((opt = opt_next()) != NULL) {
346 		if (strcmp(opt->name, TAR_OPTION) ||
347 		    strcmp(opt->value, TAR_NODIR)) {
348 			paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
349 			    opt->name, opt->value);
350 			paxwarn(1,"%s=%s is the only supported tar format option",
351 			    TAR_OPTION, TAR_NODIR);
352 			return(-1);
353 		}
354 
355 		/*
356 		 * we only support one option, and only when writing
357 		 */
358 		if ((act != APPND) && (act != ARCHIVE)) {
359 			paxwarn(1, "%s=%s is only supported when writing.",
360 			    opt->name, opt->value);
361 			return(-1);
362 		}
363 		tar_nodir = 1;
364 	}
365 	return(0);
366 }
367 
368 
369 /*
370  * tar_rd()
371  *	extract the values out of block already determined to be a tar header.
372  *	store the values in the ARCHD parameter.
373  * Return:
374  *	0
375  */
376 
377 int
378 tar_rd(ARCHD *arcn, char *buf)
379 {
380 	HD_TAR *hd;
381 	char *pt;
382 
383 	/*
384 	 * we only get proper sized buffers passed to us
385 	 */
386 	if (tar_id(buf, BLKMULT) < 0)
387 		return(-1);
388 	arcn->org_name = arcn->name;
389 	arcn->sb.st_nlink = 1;
390 	arcn->pat = NULL;
391 
392 	/*
393 	 * copy out the name and values in the stat buffer
394 	 */
395 	hd = (HD_TAR *)buf;
396 	if (gnu_hack_string) {
397 		arcn->nlen = strlcpy(arcn->name, gnu_hack_string,
398 		    sizeof(arcn->name));
399 		free(gnu_hack_string);
400 		gnu_hack_string = NULL;
401 	} else {
402 		arcn->nlen = strlcpy(arcn->name, hd->name, sizeof(arcn->name));
403 	}
404 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
405 	    0xfff);
406 	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
407 	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
408 #ifdef LONG_OFF_T
409 	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
410 #else
411 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
412 #endif
413 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
414 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
415 
416 	/*
417 	 * have to look at the last character, it may be a '/' and that is used
418 	 * to encode this as a directory
419 	 */
420 	pt = &(arcn->name[arcn->nlen - 1]);
421 	arcn->pad = 0;
422 	arcn->skip = 0;
423 	switch(hd->linkflag) {
424 	case SYMTYPE:
425 		/*
426 		 * symbolic link, need to get the link name and set the type in
427 		 * the st_mode so -v printing will look correct.
428 		 */
429 		arcn->type = PAX_SLK;
430 		arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname,
431 			sizeof(arcn->ln_name));
432 		arcn->sb.st_mode |= S_IFLNK;
433 		break;
434 	case LNKTYPE:
435 		/*
436 		 * hard link, need to get the link name, set the type in the
437 		 * st_mode and st_nlink so -v printing will look better.
438 		 */
439 		arcn->type = PAX_HLK;
440 		arcn->sb.st_nlink = 2;
441 		arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname,
442 			sizeof(arcn->ln_name));
443 
444 		/*
445 		 * no idea of what type this thing really points at, but
446 		 * we set something for printing only.
447 		 */
448 		arcn->sb.st_mode |= S_IFREG;
449 		break;
450 	case LONGLINKTYPE:
451 		arcn->type = PAX_GLL;
452 		/* FALLTHROUGH */
453 	case LONGNAMETYPE:
454 		/*
455 		 * GNU long link/file; we tag these here and let the
456 		 * pax internals deal with it -- too ugly otherwise.
457 		 */
458 		if (hd->linkflag != LONGLINKTYPE)
459 			arcn->type = PAX_GLF;
460 		arcn->pad = TAR_PAD(arcn->sb.st_size);
461 		arcn->skip = arcn->sb.st_size;
462 		arcn->ln_name[0] = '\0';
463 		arcn->ln_nlen = 0;
464 		break;
465 	case DIRTYPE:
466 		/*
467 		 * It is a directory, set the mode for -v printing
468 		 */
469 		arcn->type = PAX_DIR;
470 		arcn->sb.st_mode |= S_IFDIR;
471 		arcn->sb.st_nlink = 2;
472 		arcn->ln_name[0] = '\0';
473 		arcn->ln_nlen = 0;
474 		break;
475 	case AREGTYPE:
476 	case REGTYPE:
477 	default:
478 		/*
479 		 * If we have a trailing / this is a directory and NOT a file.
480 		 */
481 		arcn->ln_name[0] = '\0';
482 		arcn->ln_nlen = 0;
483 		if (*pt == '/') {
484 			/*
485 			 * it is a directory, set the mode for -v printing
486 			 */
487 			arcn->type = PAX_DIR;
488 			arcn->sb.st_mode |= S_IFDIR;
489 			arcn->sb.st_nlink = 2;
490 		} else {
491 			/*
492 			 * have a file that will be followed by data. Set the
493 			 * skip value to the size field and calculate the size
494 			 * of the padding.
495 			 */
496 			arcn->type = PAX_REG;
497 			arcn->sb.st_mode |= S_IFREG;
498 			arcn->pad = TAR_PAD(arcn->sb.st_size);
499 			arcn->skip = arcn->sb.st_size;
500 		}
501 		break;
502 	}
503 
504 	/*
505 	 * strip off any trailing slash.
506 	 */
507 	if (*pt == '/') {
508 		*pt = '\0';
509 		--arcn->nlen;
510 	}
511 	return(0);
512 }
513 
514 /*
515  * tar_wr()
516  *	write a tar header for the file specified in the ARCHD to the archive.
517  *	Have to check for file types that cannot be stored and file names that
518  *	are too long. Be careful of the term (last arg) to ul_oct, each field
519  *	of tar has it own spec for the termination character(s).
520  *	ASSUMED: space after header in header block is zero filled
521  * Return:
522  *	0 if file has data to be written after the header, 1 if file has NO
523  *	data to write after the header, -1 if archive write failed
524  */
525 
526 int
527 tar_wr(ARCHD *arcn)
528 {
529 	HD_TAR *hd;
530 	int len;
531 	char hdblk[sizeof(HD_TAR)];
532 
533 	/*
534 	 * check for those file system types which tar cannot store
535 	 */
536 	switch(arcn->type) {
537 	case PAX_DIR:
538 		/*
539 		 * user asked that dirs not be written to the archive
540 		 */
541 		if (tar_nodir)
542 			return(1);
543 		break;
544 	case PAX_CHR:
545 		paxwarn(1, "Tar cannot archive a character device %s",
546 		    arcn->org_name);
547 		return(1);
548 	case PAX_BLK:
549 		paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
550 		return(1);
551 	case PAX_SCK:
552 		paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
553 		return(1);
554 	case PAX_FIF:
555 		paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
556 		return(1);
557 	case PAX_SLK:
558 	case PAX_HLK:
559 	case PAX_HRG:
560 		if (arcn->ln_nlen > sizeof(hd->linkname)) {
561 			paxwarn(1,"Link name too long for tar %s", arcn->ln_name);
562 			return(1);
563 		}
564 		break;
565 	case PAX_REG:
566 	case PAX_CTG:
567 	default:
568 		break;
569 	}
570 
571 	/*
572 	 * check file name len, remember extra char for dirs (the / at the end)
573 	 */
574 	len = arcn->nlen;
575 	if (arcn->type == PAX_DIR)
576 		++len;
577 	if (len >= sizeof(hd->name)) {
578 		paxwarn(1, "File name too long for tar %s", arcn->name);
579 		return(1);
580 	}
581 
582 	/*
583 	 * Copy the data out of the ARCHD into the tar header based on the type
584 	 * of the file. Remember, many tar readers want all fields to be
585 	 * padded with zero so we zero the header first.  We then set the
586 	 * linkflag field (type), the linkname, the size, and set the padding
587 	 * (if any) to be added after the file data (0 for all other types,
588 	 * as they only have a header).
589 	 */
590 	memset(hdblk, 0, sizeof(hdblk));
591 	hd = (HD_TAR *)hdblk;
592 	strlcpy(hd->name, arcn->name, sizeof(hd->name));
593 	arcn->pad = 0;
594 
595 	if (arcn->type == PAX_DIR) {
596 		/*
597 		 * directories are the same as files, except have a filename
598 		 * that ends with a /, we add the slash here. No data follows
599 		 * dirs, so no pad.
600 		 */
601 		hd->linkflag = AREGTYPE;
602 		hd->name[len-1] = '/';
603 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
604 			goto out;
605 	} else if (arcn->type == PAX_SLK) {
606 		/*
607 		 * no data follows this file, so no pad
608 		 */
609 		hd->linkflag = SYMTYPE;
610 		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
611 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
612 			goto out;
613 	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
614 		/*
615 		 * no data follows this file, so no pad
616 		 */
617 		hd->linkflag = LNKTYPE;
618 		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
619 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
620 			goto out;
621 	} else {
622 		/*
623 		 * data follows this file, so set the pad
624 		 */
625 		hd->linkflag = AREGTYPE;
626 #		ifdef LONG_OFF_T
627 		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
628 		    sizeof(hd->size), 1)) {
629 #		else
630 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
631 		    sizeof(hd->size), 1)) {
632 #		endif
633 			paxwarn(1,"File is too large for tar %s", arcn->org_name);
634 			return(1);
635 		}
636 		arcn->pad = TAR_PAD(arcn->sb.st_size);
637 	}
638 
639 	/*
640 	 * copy those fields that are independent of the type
641 	 */
642 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
643 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
644 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
645 	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
646 		goto out;
647 
648 	/*
649 	 * calculate and add the checksum, then write the header. A return of
650 	 * 0 tells the caller to now write the file data, 1 says no data needs
651 	 * to be written
652 	 */
653 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
654 	    sizeof(hd->chksum), 3))
655 		goto out;
656 	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
657 		return(-1);
658 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
659 		return(-1);
660 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
661 		return(0);
662 	return(1);
663 
664     out:
665 	/*
666 	 * header field is out of range
667 	 */
668 	paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
669 	return(1);
670 }
671 
672 /*
673  * Routines for POSIX ustar
674  */
675 
676 /*
677  * ustar_strd()
678  *	initialization for ustar read
679  * Return:
680  *	0 if ok, -1 otherwise
681  */
682 
683 int
684 ustar_strd(void)
685 {
686 	if ((usrtb_start() < 0) || (grptb_start() < 0))
687 		return(-1);
688 	return(0);
689 }
690 
691 /*
692  * ustar_stwr()
693  *	initialization for ustar write
694  * Return:
695  *	0 if ok, -1 otherwise
696  */
697 
698 int
699 ustar_stwr(void)
700 {
701 	if ((uidtb_start() < 0) || (gidtb_start() < 0))
702 		return(-1);
703 	return(0);
704 }
705 
706 /*
707  * ustar_id()
708  *	determine if a block given to us is a valid ustar header. We have to
709  *	be on the lookout for those pesky blocks of all zero's
710  * Return:
711  *	0 if a ustar header, -1 otherwise
712  */
713 
714 int
715 ustar_id(char *blk, int size)
716 {
717 	HD_USTAR *hd;
718 
719 	if (size < BLKMULT)
720 		return(-1);
721 	hd = (HD_USTAR *)blk;
722 
723 	/*
724 	 * check for block of zero's first, a simple and fast test then check
725 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
726 	 * programs are fouled up and create archives missing the \0. Last we
727 	 * check the checksum. If ok we have to assume it is a valid header.
728 	 */
729 	if (hd->name[0] == '\0')
730 		return(-1);
731 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
732 		return(-1);
733 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
734 		return(-1);
735 	return(0);
736 }
737 
738 /*
739  * ustar_rd()
740  *	extract the values out of block already determined to be a ustar header.
741  *	store the values in the ARCHD parameter.
742  * Return:
743  *	0
744  */
745 
746 int
747 ustar_rd(ARCHD *arcn, char *buf)
748 {
749 	HD_USTAR *hd;
750 	char *dest;
751 	int cnt = 0;
752 	dev_t devmajor;
753 	dev_t devminor;
754 
755 	/*
756 	 * we only get proper sized buffers
757 	 */
758 	if (ustar_id(buf, BLKMULT) < 0)
759 		return(-1);
760 	arcn->org_name = arcn->name;
761 	arcn->sb.st_nlink = 1;
762 	arcn->pat = NULL;
763 	arcn->nlen = 0;
764 	hd = (HD_USTAR *)buf;
765 
766 	/*
767 	 * see if the filename is split into two parts. if, so joint the parts.
768 	 * we copy the prefix first and add a / between the prefix and name.
769 	 */
770 	dest = arcn->name;
771 	if (*(hd->prefix) != '\0') {
772 		cnt = strlcpy(dest, hd->prefix, sizeof(arcn->name) - 1);
773 		dest += cnt;
774 		*dest++ = '/';
775 		cnt++;
776 	}
777 	if (gnu_hack_string) {
778 		arcn->nlen = strlcpy(dest, gnu_hack_string,
779 		    sizeof(arcn->name) - cnt);
780 		free(gnu_hack_string);
781 		gnu_hack_string = NULL;
782 	} else {
783 		arcn->nlen = strlcpy(dest, hd->name, sizeof(arcn->name) - cnt);
784 	}
785 
786 	/*
787 	 * follow the spec to the letter. we should only have mode bits, strip
788 	 * off all other crud we may be passed.
789 	 */
790 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
791 	    0xfff);
792 #ifdef LONG_OFF_T
793 	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
794 #else
795 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
796 #endif
797 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
798 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
799 
800 	/*
801 	 * If we can find the ascii names for gname and uname in the password
802 	 * and group files we will use the uid's and gid they bind. Otherwise
803 	 * we use the uid and gid values stored in the header. (This is what
804 	 * the posix spec wants).
805 	 */
806 	hd->gname[sizeof(hd->gname) - 1] = '\0';
807 	if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
808 		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
809 	hd->uname[sizeof(hd->uname) - 1] = '\0';
810 	if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
811 		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
812 
813 	/*
814 	 * set the defaults, these may be changed depending on the file type
815 	 */
816 	arcn->ln_name[0] = '\0';
817 	arcn->ln_nlen = 0;
818 	arcn->pad = 0;
819 	arcn->skip = 0;
820 	arcn->sb.st_rdev = (dev_t)0;
821 
822 	/*
823 	 * set the mode and PAX type according to the typeflag in the header
824 	 */
825 	switch(hd->typeflag) {
826 	case FIFOTYPE:
827 		arcn->type = PAX_FIF;
828 		arcn->sb.st_mode |= S_IFIFO;
829 		break;
830 	case DIRTYPE:
831 		arcn->type = PAX_DIR;
832 		arcn->sb.st_mode |= S_IFDIR;
833 		arcn->sb.st_nlink = 2;
834 
835 		/*
836 		 * Some programs that create ustar archives append a '/'
837 		 * to the pathname for directories. This clearly violates
838 		 * ustar specs, but we will silently strip it off anyway.
839 		 */
840 		if (arcn->name[arcn->nlen - 1] == '/')
841 			arcn->name[--arcn->nlen] = '\0';
842 		break;
843 	case BLKTYPE:
844 	case CHRTYPE:
845 		/*
846 		 * this type requires the rdev field to be set.
847 		 */
848 		if (hd->typeflag == BLKTYPE) {
849 			arcn->type = PAX_BLK;
850 			arcn->sb.st_mode |= S_IFBLK;
851 		} else {
852 			arcn->type = PAX_CHR;
853 			arcn->sb.st_mode |= S_IFCHR;
854 		}
855 		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
856 		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
857 		arcn->sb.st_rdev = TODEV(devmajor, devminor);
858 		break;
859 	case SYMTYPE:
860 	case LNKTYPE:
861 		if (hd->typeflag == SYMTYPE) {
862 			arcn->type = PAX_SLK;
863 			arcn->sb.st_mode |= S_IFLNK;
864 		} else {
865 			arcn->type = PAX_HLK;
866 			/*
867 			 * so printing looks better
868 			 */
869 			arcn->sb.st_mode |= S_IFREG;
870 			arcn->sb.st_nlink = 2;
871 		}
872 		/*
873 		 * copy the link name
874 		 */
875 		arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname,
876 			sizeof(arcn->ln_name));
877 		break;
878 	case LONGLINKTYPE:
879 	case LONGNAMETYPE:
880 		/*
881 		 * GNU long link/file; we tag these here and let the
882 		 * pax internals deal with it -- too ugly otherwise.
883 		 */
884 		arcn->type =
885 		    hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
886 		arcn->pad = TAR_PAD(arcn->sb.st_size);
887 		arcn->skip = arcn->sb.st_size;
888 		arcn->ln_name[0] = '\0';
889 		arcn->ln_nlen = 0;
890 		break;
891 	case CONTTYPE:
892 	case AREGTYPE:
893 	case REGTYPE:
894 	default:
895 		/*
896 		 * these types have file data that follows. Set the skip and
897 		 * pad fields.
898 		 */
899 		arcn->type = PAX_REG;
900 		arcn->pad = TAR_PAD(arcn->sb.st_size);
901 		arcn->skip = arcn->sb.st_size;
902 		arcn->sb.st_mode |= S_IFREG;
903 		break;
904 	}
905 	return(0);
906 }
907 
908 /*
909  * ustar_wr()
910  *	write a ustar header for the file specified in the ARCHD to the archive
911  *	Have to check for file types that cannot be stored and file names that
912  *	are too long. Be careful of the term (last arg) to ul_oct, we only use
913  *	'\0' for the termination character (this is different than picky tar)
914  *	ASSUMED: space after header in header block is zero filled
915  * Return:
916  *	0 if file has data to be written after the header, 1 if file has NO
917  *	data to write after the header, -1 if archive write failed
918  */
919 
920 int
921 ustar_wr(ARCHD *arcn)
922 {
923 	HD_USTAR *hd;
924 	char *pt;
925 	char hdblk[sizeof(HD_USTAR)];
926 
927 	/*
928 	 * check for those file system types ustar cannot store
929 	 */
930 	if (arcn->type == PAX_SCK) {
931 		paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
932 		return(1);
933 	}
934 
935 	/*
936 	 * check the length of the linkname
937 	 */
938 	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
939 	    (arcn->type == PAX_HRG)) && (arcn->ln_nlen >= sizeof(hd->linkname))){
940 		paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
941 		return(1);
942 	}
943 
944 	/*
945 	 * split the path name into prefix and name fields (if needed). if
946 	 * pt != arcn->name, the name has to be split
947 	 */
948 	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
949 		paxwarn(1, "File name too long for ustar %s", arcn->name);
950 		return(1);
951 	}
952 
953 	/*
954 	 * zero out the header so we don't have to worry about zero fill below
955 	 */
956 	memset(hdblk, 0, sizeof(hdblk));
957 	hd = (HD_USTAR *)hdblk;
958 	arcn->pad = 0L;
959 
960 	/*
961 	 * split the name, or zero out the prefix
962 	 */
963 	if (pt != arcn->name) {
964 		/*
965 		 * name was split, pt points at the / where the split is to
966 		 * occur, we remove the / and copy the first part to the prefix
967 		 */
968 		*pt = '\0';
969 		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
970 		*pt++ = '/';
971 	}
972 
973 	/*
974 	 * copy the name part. this may be the whole path or the part after
975 	 * the prefix
976 	 */
977 	strlcpy(hd->name, pt, sizeof(hd->name));
978 
979 	/*
980 	 * set the fields in the header that are type dependent
981 	 */
982 	switch(arcn->type) {
983 	case PAX_DIR:
984 		hd->typeflag = DIRTYPE;
985 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
986 			goto out;
987 		break;
988 	case PAX_CHR:
989 	case PAX_BLK:
990 		if (arcn->type == PAX_CHR)
991 			hd->typeflag = CHRTYPE;
992 		else
993 			hd->typeflag = BLKTYPE;
994 		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
995 		   sizeof(hd->devmajor), 3) ||
996 		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
997 		   sizeof(hd->devminor), 3) ||
998 		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
999 			goto out;
1000 		break;
1001 	case PAX_FIF:
1002 		hd->typeflag = FIFOTYPE;
1003 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1004 			goto out;
1005 		break;
1006 	case PAX_SLK:
1007 	case PAX_HLK:
1008 	case PAX_HRG:
1009 		if (arcn->type == PAX_SLK)
1010 			hd->typeflag = SYMTYPE;
1011 		else
1012 			hd->typeflag = LNKTYPE;
1013 		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1014 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1015 			goto out;
1016 		break;
1017 	case PAX_REG:
1018 	case PAX_CTG:
1019 	default:
1020 		/*
1021 		 * file data with this type, set the padding
1022 		 */
1023 		if (arcn->type == PAX_CTG)
1024 			hd->typeflag = CONTTYPE;
1025 		else
1026 			hd->typeflag = REGTYPE;
1027 		arcn->pad = TAR_PAD(arcn->sb.st_size);
1028 #		ifdef LONG_OFF_T
1029 		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1030 		    sizeof(hd->size), 3)) {
1031 #		else
1032 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1033 		    sizeof(hd->size), 3)) {
1034 #		endif
1035 			paxwarn(1,"File is too long for ustar %s",arcn->org_name);
1036 			return(1);
1037 		}
1038 		break;
1039 	}
1040 
1041 	strncpy(hd->magic, TMAGIC, TMAGLEN);
1042 	strncpy(hd->version, TVERSION, TVERSLEN);
1043 
1044 	/*
1045 	 * set the remaining fields. Some versions want all 16 bits of mode
1046 	 * we better humor them (they really do not meet spec though)....
1047 	 */
1048 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1049 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1050 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1051 	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1052 		goto out;
1053 	strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname));
1054 	strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname));
1055 
1056 	/*
1057 	 * calculate and store the checksum write the header to the archive
1058 	 * return 0 tells the caller to now write the file data, 1 says no data
1059 	 * needs to be written
1060 	 */
1061 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1062 	   sizeof(hd->chksum), 3))
1063 		goto out;
1064 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1065 		return(-1);
1066 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1067 		return(-1);
1068 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1069 		return(0);
1070 	return(1);
1071 
1072     out:
1073 	/*
1074 	 * header field is out of range
1075 	 */
1076 	paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1077 	return(1);
1078 }
1079 
1080 /*
1081  * name_split()
1082  *	see if the name has to be split for storage in a ustar header. We try
1083  *	to fit the entire name in the name field without splitting if we can.
1084  *	The split point is always at a /
1085  * Return
1086  *	character pointer to split point (always the / that is to be removed
1087  *	if the split is not needed, the points is set to the start of the file
1088  *	name (it would violate the spec to split there). A NULL is returned if
1089  *	the file name is too long
1090  */
1091 
1092 static char *
1093 name_split(char *name, int len)
1094 {
1095 	char *start;
1096 
1097 	/*
1098 	 * check to see if the file name is small enough to fit in the name
1099 	 * field. if so just return a pointer to the name.
1100 	 */
1101 	if (len < TNMSZ)
1102 		return(name);
1103 	if (len > (TPFSZ + TNMSZ))
1104 		return(NULL);
1105 
1106 	/*
1107 	 * we start looking at the biggest sized piece that fits in the name
1108 	 * field. We walk forward looking for a slash to split at. The idea is
1109 	 * to find the biggest piece to fit in the name field (or the smallest
1110 	 * prefix we can find)
1111 	 */
1112 	start = name + len - TNMSZ;
1113 	while ((*start != '\0') && (*start != '/'))
1114 		++start;
1115 
1116 	/*
1117 	 * if we hit the end of the string, this name cannot be split, so we
1118 	 * cannot store this file.
1119 	 */
1120 	if (*start == '\0')
1121 		return(NULL);
1122 	len = start - name;
1123 
1124 	/*
1125 	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1126 	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1127 	 * the file would then expand on extract to //str. The len == 0 below
1128 	 * makes this special case follow the spec to the letter.
1129 	 */
1130 	if ((len >= TPFSZ) || (len == 0))
1131 		return(NULL);
1132 
1133 	/*
1134 	 * ok have a split point, return it to the caller
1135 	 */
1136 	return(start);
1137 }
1138