xref: /netbsd/bin/pax/tar.c (revision bf9ec67e)
1 /*	$NetBSD: tar.c,v 1.21 2002/01/31 19:27:54 tv Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include <sys/cdefs.h>
41 #if defined(__RCSID) && !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: tar.c,v 1.21 2002/01/31 19:27:54 tv Exp $");
46 #endif
47 #endif /* not lint */
48 
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 
54 #include <ctype.h>
55 #include <errno.h>
56 #include <grp.h>
57 #include <pwd.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 
63 #include "pax.h"
64 #include "extern.h"
65 #include "tar.h"
66 
67 /*
68  * Routines for reading, writing and header identify of various versions of tar
69  */
70 
71 static u_long tar_chksm(char *, int);
72 static char *name_split(char *, int);
73 static int ul_oct(u_long, char *, int, int);
74 #ifndef NET2_STAT
75 static int ull_oct(unsigned long long, char *, int, int);
76 #endif
77 
78 /*
79  * Routines common to all versions of tar
80  */
81 
82 static int tar_nodir;			/* do not write dirs under old tar */
83 int is_oldgnutar;			/* skip end-ofvolume checks */
84 char *gnu_hack_string;			/* ././@LongLink hackery */
85 
86 /*
87  * tar_endwr()
88  *	add the tar trailer of two null blocks
89  * Return:
90  *	0 if ok, -1 otherwise (what wr_skip returns)
91  */
92 
93 int
94 tar_endwr(void)
95 {
96 	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
97 }
98 
99 /*
100  * tar_endrd()
101  *	no cleanup needed here, just return size of trailer (for append)
102  * Return:
103  *	size of trailer (2 * BLKMULT)
104  */
105 
106 off_t
107 tar_endrd(void)
108 {
109 	return((off_t)(NULLCNT*BLKMULT));
110 }
111 
112 /*
113  * tar_trail()
114  *	Called to determine if a header block is a valid trailer. We are passed
115  *	the block, the in_sync flag (which tells us we are in resync mode;
116  *	looking for a valid header), and cnt (which starts at zero) which is
117  *	used to count the number of empty blocks we have seen so far.
118  * Return:
119  *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
120  *	could never contain a header.
121  */
122 
123 int
124 tar_trail(char *buf, int in_resync, int *cnt)
125 {
126 	int i;
127 
128 	/*
129 	 * look for all zero, trailer is two consecutive blocks of zero
130 	 */
131 	for (i = 0; i < BLKMULT; ++i) {
132 		if (buf[i] != '\0')
133 			break;
134 	}
135 
136 	/*
137 	 * if not all zero it is not a trailer, but MIGHT be a header.
138 	 */
139 	if (i != BLKMULT)
140 		return(-1);
141 
142 	/*
143 	 * When given a zero block, we must be careful!
144 	 * If we are not in resync mode, check for the trailer. Have to watch
145 	 * out that we do not mis-identify file data as the trailer, so we do
146 	 * NOT try to id a trailer during resync mode. During resync mode we
147 	 * might as well throw this block out since a valid header can NEVER be
148 	 * a block of all 0 (we must have a valid file name).
149 	 */
150 	if (!in_resync && (++*cnt >= NULLCNT))
151 		return(0);
152 	return(1);
153 }
154 
155 /*
156  * ul_oct()
157  *	convert an unsigned long to an octal string. many oddball field
158  *	termination characters are used by the various versions of tar in the
159  *	different fields. term selects which kind to use. str is '0' padded
160  *	at the front to len. we are unable to use only one format as many old
161  *	tar readers are very cranky about this.
162  * Return:
163  *	0 if the number fit into the string, -1 otherwise
164  */
165 
166 static int
167 ul_oct(u_long val, char *str, int len, int term)
168 {
169 	char *pt;
170 
171 	/*
172 	 * term selects the appropriate character(s) for the end of the string
173 	 */
174 	pt = str + len - 1;
175 	switch(term) {
176 	case 3:
177 		*pt-- = '\0';
178 		break;
179 	case 2:
180 		*pt-- = ' ';
181 		*pt-- = '\0';
182 		break;
183 	case 1:
184 		*pt-- = ' ';
185 		break;
186 	case 0:
187 	default:
188 		*pt-- = '\0';
189 		*pt-- = ' ';
190 		break;
191 	}
192 
193 	/*
194 	 * convert and blank pad if there is space
195 	 */
196 	while (pt >= str) {
197 		*pt-- = '0' + (char)(val & 0x7);
198 		if ((val = val >> 3) == (u_long)0)
199 			break;
200 	}
201 
202 	while (pt >= str)
203 		*pt-- = '0';
204 	if (val != (u_long)0)
205 		return(-1);
206 	return(0);
207 }
208 
209 #ifndef NET2_STAT
210 /*
211  * ull_oct()
212  *	convert an unsigned long long to an octal string. one of many oddball
213  *	field termination characters are used by the various versions of tar
214  *	in the different fields. term selects which kind to use. str is '0'
215  *	padded at the front to len. we are unable to use only one format as
216  *	many old tar readers are very cranky about this.
217  * Return:
218  *	0 if the number fit into the string, -1 otherwise
219  */
220 
221 static int
222 ull_oct(unsigned long long val, char *str, int len, int term)
223 {
224 	char *pt;
225 
226 	/*
227 	 * term selects the appropriate character(s) for the end of the string
228 	 */
229 	pt = str + len - 1;
230 	switch(term) {
231 	case 3:
232 		*pt-- = '\0';
233 		break;
234 	case 2:
235 		*pt-- = ' ';
236 		*pt-- = '\0';
237 		break;
238 	case 1:
239 		*pt-- = ' ';
240 		break;
241 	case 0:
242 	default:
243 		*pt-- = '\0';
244 		*pt-- = ' ';
245 		break;
246 	}
247 
248 	/*
249 	 * convert and blank pad if there is space
250 	 */
251 	while (pt >= str) {
252 		*pt-- = '0' + (char)(val & 0x7);
253 		if ((val = val >> 3) == 0)
254 			break;
255 	}
256 
257 	while (pt >= str)
258 		*pt-- = '0';
259 	if (val != (unsigned long long)0)
260 		return(-1);
261 	return(0);
262 }
263 #endif
264 
265 /*
266  * tar_chksm()
267  *	calculate the checksum for a tar block counting the checksum field as
268  *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
269  *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
270  *	pad headers with 0.
271  * Return:
272  *	unsigned long checksum
273  */
274 
275 static u_long
276 tar_chksm(char *blk, int len)
277 {
278 	char *stop;
279 	char *pt;
280 	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
281 
282 	/*
283 	 * add the part of the block before the checksum field
284 	 */
285 	pt = blk;
286 	stop = blk + CHK_OFFSET;
287 	while (pt < stop)
288 		chksm += (u_long)(*pt++ & 0xff);
289 	/*
290 	 * move past the checksum field and keep going, spec counts the
291 	 * checksum field as the sum of 8 blanks (which is pre-computed as
292 	 * BLNKSUM).
293 	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
294 	 * starts, no point in summing zero's)
295 	 */
296 	pt += CHK_LEN;
297 	stop = blk + len;
298 	while (pt < stop)
299 		chksm += (u_long)(*pt++ & 0xff);
300 	return(chksm);
301 }
302 
303 /*
304  * Routines for old BSD style tar (also made portable to sysV tar)
305  */
306 
307 /*
308  * tar_id()
309  *	determine if a block given to us is a valid tar header (and not a USTAR
310  *	header). We have to be on the lookout for those pesky blocks of	all
311  *	zero's.
312  * Return:
313  *	0 if a tar header, -1 otherwise
314  */
315 
316 int
317 tar_id(char *blk, int size)
318 {
319 	HD_TAR *hd;
320 	HD_USTAR *uhd;
321 
322 	if (size < BLKMULT)
323 		return(-1);
324 	hd = (HD_TAR *)blk;
325 	uhd = (HD_USTAR *)blk;
326 
327 	/*
328 	 * check for block of zero's first, a simple and fast test, then make
329 	 * sure this is not a ustar header by looking for the ustar magic
330 	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
331 	 * wrong and create archives missing the \0. Last we check the
332 	 * checksum. If this is ok we have to assume it is a valid header.
333 	 */
334 	if (hd->name[0] == '\0')
335 		return(-1);
336 	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
337 		return(-1);
338 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
339 		return(-1);
340 	return(0);
341 }
342 
343 /*
344  * tar_opt()
345  *	handle tar format specific -o options
346  * Return:
347  *	0 if ok -1 otherwise
348  */
349 
350 int
351 tar_opt(void)
352 {
353 	OPLIST *opt;
354 
355 	while ((opt = opt_next()) != NULL) {
356 		if (strcmp(opt->name, TAR_OPTION) ||
357 		    strcmp(opt->value, TAR_NODIR)) {
358 			tty_warn(1,
359 			    "Unknown tar format -o option/value pair %s=%s",
360 			    opt->name, opt->value);
361 			tty_warn(1,
362 			    "%s=%s is the only supported tar format option",
363 			    TAR_OPTION, TAR_NODIR);
364 			return(-1);
365 		}
366 
367 		/*
368 		 * we only support one option, and only when writing
369 		 */
370 		if ((act != APPND) && (act != ARCHIVE)) {
371 			tty_warn(1, "%s=%s is only supported when writing.",
372 			    opt->name, opt->value);
373 			return(-1);
374 		}
375 		tar_nodir = 1;
376 	}
377 	return(0);
378 }
379 
380 
381 /*
382  * tar_rd()
383  *	extract the values out of block already determined to be a tar header.
384  *	store the values in the ARCHD parameter.
385  * Return:
386  *	0
387  */
388 
389 int
390 tar_rd(ARCHD *arcn, char *buf)
391 {
392 	HD_TAR *hd;
393 	char *pt;
394 
395 	/*
396 	 * we only get proper sized buffers passed to us
397 	 */
398 	if (tar_id(buf, BLKMULT) < 0)
399 		return(-1);
400 	arcn->org_name = arcn->name;
401 	arcn->sb.st_nlink = 1;
402 	arcn->pat = NULL;
403 
404 	/*
405 	 * copy out the name and values in the stat buffer
406 	 */
407 	hd = (HD_TAR *)buf;
408 	if (gnu_hack_string) {
409 		int len = MAX(strlen(gnu_hack_string), PAXPATHLEN);
410 		arcn->nlen = l_strncpy(arcn->name, gnu_hack_string, len);
411 		arcn->name[len] = '\0';
412 		free(gnu_hack_string);
413 		gnu_hack_string = NULL;
414 	} else {
415 		arcn->nlen = l_strncpy(arcn->name, hd->name, sizeof(hd->name));
416 		arcn->name[arcn->nlen] = '\0';
417 	}
418 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
419 	    0xfff);
420 	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
421 	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
422 	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
423 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
424 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
425 
426 	/*
427 	 * have to look at the last character, it may be a '/' and that is used
428 	 * to encode this as a directory
429 	 */
430 	pt = &(arcn->name[arcn->nlen - 1]);
431 	arcn->pad = 0;
432 	arcn->skip = 0;
433 	switch(hd->linkflag) {
434 	case SYMTYPE:
435 		/*
436 		 * symbolic link, need to get the link name and set the type in
437 		 * the st_mode so -v printing will look correct.
438 		 */
439 		arcn->type = PAX_SLK;
440 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
441 			sizeof(hd->linkname));
442 		arcn->ln_name[arcn->ln_nlen] = '\0';
443 		arcn->sb.st_mode |= S_IFLNK;
444 		break;
445 	case LNKTYPE:
446 		/*
447 		 * hard link, need to get the link name, set the type in the
448 		 * st_mode and st_nlink so -v printing will look better.
449 		 */
450 		arcn->type = PAX_HLK;
451 		arcn->sb.st_nlink = 2;
452 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
453 			sizeof(hd->linkname));
454 		arcn->ln_name[arcn->ln_nlen] = '\0';
455 
456 		/*
457 		 * no idea of what type this thing really points at, but
458 		 * we set something for printing only.
459 		 */
460 		arcn->sb.st_mode |= S_IFREG;
461 		break;
462 	case LONGLINKTYPE:
463 		arcn->type = PAX_GLL;
464 		/* FALLTHROUGH */
465 	case LONGNAMETYPE:
466 		/*
467 		 * GNU long link/file; we tag these here and let the
468 		 * pax internals deal with it -- too ugly otherwise.
469 		 */
470 		if (hd->linkflag != LONGLINKTYPE)
471 			arcn->type = PAX_GLF;
472 		arcn->pad = TAR_PAD(arcn->sb.st_size);
473 		arcn->skip = arcn->sb.st_size;
474 		arcn->ln_name[0] = '\0';
475 		arcn->ln_nlen = 0;
476 		break;
477 	case AREGTYPE:
478 	case REGTYPE:
479 	case DIRTYPE:	/* see below */
480 	default:
481 		/*
482 		 * If we have a trailing / this is a directory and NOT a file.
483 		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
484 		 * reported that V7 archives using USTAR directories do exist.
485 		 */
486 		arcn->ln_name[0] = '\0';
487 		arcn->ln_nlen = 0;
488 		if (*pt == '/' || hd->linkflag == DIRTYPE) {
489 			/*
490 			 * it is a directory, set the mode for -v printing
491 			 */
492 			arcn->type = PAX_DIR;
493 			arcn->sb.st_mode |= S_IFDIR;
494 			arcn->sb.st_nlink = 2;
495 		} else {
496 			/*
497 			 * have a file that will be followed by data. Set the
498 			 * skip value to the size field and calculate the size
499 			 * of the padding.
500 			 */
501 			arcn->type = PAX_REG;
502 			arcn->sb.st_mode |= S_IFREG;
503 			arcn->pad = TAR_PAD(arcn->sb.st_size);
504 			arcn->skip = arcn->sb.st_size;
505 		}
506 		break;
507 	}
508 
509 	/*
510 	 * strip off any trailing slash.
511 	 */
512 	if (*pt == '/') {
513 		*pt = '\0';
514 		--arcn->nlen;
515 	}
516 	return(0);
517 }
518 
519 /*
520  * tar_wr()
521  *	write a tar header for the file specified in the ARCHD to the archive.
522  *	Have to check for file types that cannot be stored and file names that
523  *	are too long. Be careful of the term (last arg) to ul_oct, each field
524  *	of tar has it own spec for the termination character(s).
525  *	ASSUMED: space after header in header block is zero filled
526  * Return:
527  *	0 if file has data to be written after the header, 1 if file has NO
528  *	data to write after the header, -1 if archive write failed
529  */
530 
531 int
532 tar_wr(ARCHD *arcn)
533 {
534 	HD_TAR *hd;
535 	int len;
536 	char hdblk[sizeof(HD_TAR)];
537 
538 	/*
539 	 * check for those file system types which tar cannot store
540 	 */
541 	switch(arcn->type) {
542 	case PAX_DIR:
543 		/*
544 		 * user asked that dirs not be written to the archive
545 		 */
546 		if (tar_nodir)
547 			return(1);
548 		break;
549 	case PAX_CHR:
550 		tty_warn(1, "Tar cannot archive a character device %s",
551 		    arcn->org_name);
552 		return(1);
553 	case PAX_BLK:
554 		tty_warn(1,
555 		    "Tar cannot archive a block device %s", arcn->org_name);
556 		return(1);
557 	case PAX_SCK:
558 		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
559 		return(1);
560 	case PAX_FIF:
561 		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
562 		return(1);
563 	case PAX_SLK:
564 	case PAX_HLK:
565 	case PAX_HRG:
566 		if (arcn->ln_nlen > sizeof(hd->linkname)) {
567 			tty_warn(1,"Link name too long for tar %s",
568 			    arcn->ln_name);
569 			return(1);
570 		}
571 		break;
572 	case PAX_REG:
573 	case PAX_CTG:
574 	default:
575 		break;
576 	}
577 
578 	/*
579 	 * check file name len, remember extra char for dirs (the / at the end)
580 	 */
581 	len = arcn->nlen;
582 	if (arcn->type == PAX_DIR)
583 		++len;
584 	if (len > sizeof(hd->name)) {
585 		tty_warn(1, "File name too long for tar %s", arcn->name);
586 		return(1);
587 	}
588 
589 	/*
590 	 * copy the data out of the ARCHD into the tar header based on the type
591 	 * of the file. Remember many tar readers want the unused fields to be
592 	 * padded with zero. We set the linkflag field (type), the linkname
593 	 * (or zero if not used),the size, and set the padding (if any) to be
594 	 * added after the file data (0 for all other types, as they only have
595 	 * a header)
596 	 */
597 	hd = (HD_TAR *)hdblk;
598 	zf_strncpy(hd->name, arcn->name, sizeof(hd->name));
599 	arcn->pad = 0;
600 
601 	if (arcn->type == PAX_DIR) {
602 		/*
603 		 * directories are the same as files, except have a filename
604 		 * that ends with a /, we add the slash here. No data follows,
605 		 * dirs, so no pad.
606 		 */
607 		hd->linkflag = AREGTYPE;
608 		memset(hd->linkname, 0, sizeof(hd->linkname));
609 		hd->name[len-1] = '/';
610 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
611 			goto out;
612 	} else if (arcn->type == PAX_SLK) {
613 		/*
614 		 * no data follows this file, so no pad
615 		 */
616 		hd->linkflag = SYMTYPE;
617 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
618 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
619 			goto out;
620 	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
621 		/*
622 		 * no data follows this file, so no pad
623 		 */
624 		hd->linkflag = LNKTYPE;
625 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
626 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
627 			goto out;
628 	} else {
629 		/*
630 		 * data follows this file, so set the pad
631 		 */
632 		hd->linkflag = AREGTYPE;
633 		memset(hd->linkname, 0, sizeof(hd->linkname));
634 		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
635 			tty_warn(1,"File is too large for tar %s",
636 			    arcn->org_name);
637 			return(1);
638 		}
639 		arcn->pad = TAR_PAD(arcn->sb.st_size);
640 	}
641 
642 	/*
643 	 * copy those fields that are independent of the type
644 	 */
645 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
646 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
647 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
648 	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
649 		goto out;
650 
651 	/*
652 	 * calculate and add the checksum, then write the header. A return of
653 	 * 0 tells the caller to now write the file data, 1 says no data needs
654 	 * to be written
655 	 */
656 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
657 	    sizeof(hd->chksum), 2))
658 		goto out;
659 	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
660 		return(-1);
661 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
662 		return(-1);
663 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
664 		return(0);
665 	return(1);
666 
667     out:
668 	/*
669 	 * header field is out of range
670 	 */
671 	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
672 	return(1);
673 }
674 
675 /*
676  * Routines for POSIX ustar
677  */
678 
679 /*
680  * ustar_strd()
681  *	initialization for ustar read
682  * Return:
683  *	0 if ok, -1 otherwise
684  */
685 
686 int
687 ustar_strd(void)
688 {
689 	return(0);
690 }
691 
692 /*
693  * ustar_stwr()
694  *	initialization for ustar write
695  * Return:
696  *	0 if ok, -1 otherwise
697  */
698 
699 int
700 ustar_stwr(void)
701 {
702 	return(0);
703 }
704 
705 /*
706  * ustar_id()
707  *	determine if a block given to us is a valid ustar header. We have to
708  *	be on the lookout for those pesky blocks of all zero's
709  * Return:
710  *	0 if a ustar header, -1 otherwise
711  */
712 
713 int
714 ustar_id(char *blk, int size)
715 {
716 	HD_USTAR *hd;
717 
718 	if (size < BLKMULT)
719 		return(-1);
720 	hd = (HD_USTAR *)blk;
721 
722 	/*
723 	 * check for block of zero's first, a simple and fast test then check
724 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
725 	 * programs are fouled up and create archives missing the \0. Last we
726 	 * check the checksum. If ok we have to assume it is a valid header.
727 	 */
728 	if (hd->name[0] == '\0')
729 		return(-1);
730 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
731 		return(-1);
732 	if (!strncmp(hd->magic, "ustar  ", 8))
733 		is_oldgnutar = 1;
734 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
735 		return(-1);
736 	return(0);
737 }
738 
739 /*
740  * ustar_rd()
741  *	extract the values out of block already determined to be a ustar header.
742  *	store the values in the ARCHD parameter.
743  * Return:
744  *	0
745  */
746 
747 int
748 ustar_rd(ARCHD *arcn, char *buf)
749 {
750 	HD_USTAR *hd;
751 	char *dest;
752 	int cnt;
753 	dev_t devmajor;
754 	dev_t devminor;
755 
756 	/*
757 	 * we only get proper sized buffers
758 	 */
759 	if (ustar_id(buf, BLKMULT) < 0)
760 		return(-1);
761 	arcn->org_name = arcn->name;
762 	arcn->sb.st_nlink = 1;
763 	arcn->pat = NULL;
764 	hd = (HD_USTAR *)buf;
765 
766 	/*
767 	 * see if the filename is split into two parts. if, so joint the parts.
768 	 * we copy the prefix first and add a / between the prefix and name.
769 	 */
770 	dest = arcn->name;
771 	if (*(hd->prefix) != '\0') {
772 		cnt = l_strncpy(arcn->name, hd->prefix, sizeof(hd->prefix));
773 		dest += cnt;
774 		*dest++ = '/';
775 	}
776 	cnt = l_strncpy(dest, hd->name, sizeof(hd->name));
777 	dest += cnt;
778 	*dest = '\0';
779 	arcn->nlen = dest - arcn->name;
780 
781 	/*
782 	 * follow the spec to the letter. we should only have mode bits, strip
783 	 * off all other crud we may be passed.
784 	 */
785 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
786 	    0xfff);
787 	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
788 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
789 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
790 
791 	/*
792 	 * If we can find the ascii names for gname and uname in the password
793 	 * and group files we will use the uid's and gid they bind. Otherwise
794 	 * we use the uid and gid values stored in the header. (This is what
795 	 * the posix spec wants).
796 	 */
797 	hd->gname[sizeof(hd->gname) - 1] = '\0';
798 	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
799 		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
800 	hd->uname[sizeof(hd->uname) - 1] = '\0';
801 	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
802 		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
803 
804 	/*
805 	 * set the defaults, these may be changed depending on the file type
806 	 */
807 	arcn->ln_name[0] = '\0';
808 	arcn->ln_nlen = 0;
809 	arcn->pad = 0;
810 	arcn->skip = 0;
811 	arcn->sb.st_rdev = (dev_t)0;
812 
813 	/*
814 	 * set the mode and PAX type according to the typeflag in the header
815 	 */
816 	switch(hd->typeflag) {
817 	case FIFOTYPE:
818 		arcn->type = PAX_FIF;
819 		arcn->sb.st_mode |= S_IFIFO;
820 		break;
821 	case DIRTYPE:
822 		arcn->type = PAX_DIR;
823 		arcn->sb.st_mode |= S_IFDIR;
824 		arcn->sb.st_nlink = 2;
825 
826 		/*
827 		 * Some programs that create ustar archives append a '/'
828 		 * to the pathname for directories. This clearly violates
829 		 * ustar specs, but we will silently strip it off anyway.
830 		 */
831 		if (arcn->name[arcn->nlen - 1] == '/')
832 			arcn->name[--arcn->nlen] = '\0';
833 		break;
834 	case BLKTYPE:
835 	case CHRTYPE:
836 		/*
837 		 * this type requires the rdev field to be set.
838 		 */
839 		if (hd->typeflag == BLKTYPE) {
840 			arcn->type = PAX_BLK;
841 			arcn->sb.st_mode |= S_IFBLK;
842 		} else {
843 			arcn->type = PAX_CHR;
844 			arcn->sb.st_mode |= S_IFCHR;
845 		}
846 		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
847 		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
848 		arcn->sb.st_rdev = TODEV(devmajor, devminor);
849 		break;
850 	case SYMTYPE:
851 	case LNKTYPE:
852 		if (hd->typeflag == SYMTYPE) {
853 			arcn->type = PAX_SLK;
854 			arcn->sb.st_mode |= S_IFLNK;
855 		} else {
856 			arcn->type = PAX_HLK;
857 			/*
858 			 * so printing looks better
859 			 */
860 			arcn->sb.st_mode |= S_IFREG;
861 			arcn->sb.st_nlink = 2;
862 		}
863 		/*
864 		 * copy the link name
865 		 */
866 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
867 			sizeof(hd->linkname));
868 		arcn->ln_name[arcn->ln_nlen] = '\0';
869 		break;
870 	case CONTTYPE:
871 	case AREGTYPE:
872 	case REGTYPE:
873 	default:
874 		/*
875 		 * these types have file data that follows. Set the skip and
876 		 * pad fields.
877 		 */
878 		arcn->type = PAX_REG;
879 		arcn->pad = TAR_PAD(arcn->sb.st_size);
880 		arcn->skip = arcn->sb.st_size;
881 		arcn->sb.st_mode |= S_IFREG;
882 		break;
883 	}
884 	return(0);
885 }
886 
887 /*
888  * ustar_wr()
889  *	write a ustar header for the file specified in the ARCHD to the archive
890  *	Have to check for file types that cannot be stored and file names that
891  *	are too long. Be careful of the term (last arg) to ul_oct, we only use
892  *	'\0' for the termination character (this is different than picky tar)
893  *	ASSUMED: space after header in header block is zero filled
894  * Return:
895  *	0 if file has data to be written after the header, 1 if file has NO
896  *	data to write after the header, -1 if archive write failed
897  */
898 
899 int
900 ustar_wr(ARCHD *arcn)
901 {
902 	HD_USTAR *hd;
903 	char *pt;
904 	char hdblk[sizeof(HD_USTAR)];
905 	const char *user, *group;
906 
907 	/*
908 	 * check for those file system types ustar cannot store
909 	 */
910 	if (arcn->type == PAX_SCK) {
911 		tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name);
912 		return(1);
913 	}
914 
915 	/*
916 	 * check the length of the linkname
917 	 */
918 	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
919 	    (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
920 		tty_warn(1, "Link name too long for ustar %s", arcn->ln_name);
921 		return(1);
922 	}
923 
924 	/*
925 	 * split the path name into prefix and name fields (if needed). if
926 	 * pt != arcn->name, the name has to be split
927 	 */
928 	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
929 		tty_warn(1, "File name too long for ustar %s", arcn->name);
930 		return(1);
931 	}
932 	hd = (HD_USTAR *)hdblk;
933 	arcn->pad = 0L;
934 
935 	/*
936 	 * split the name, or zero out the prefix
937 	 */
938 	if (pt != arcn->name) {
939 		/*
940 		 * name was split, pt points at the / where the split is to
941 		 * occur, we remove the / and copy the first part to the prefix
942 		 */
943 		*pt = '\0';
944 		zf_strncpy(hd->prefix, arcn->name, sizeof(hd->prefix));
945 		*pt++ = '/';
946 	} else
947 		memset(hd->prefix, 0, sizeof(hd->prefix));
948 
949 	/*
950 	 * copy the name part. this may be the whole path or the part after
951 	 * the prefix
952 	 */
953 	zf_strncpy(hd->name, pt, sizeof(hd->name));
954 
955 	/*
956 	 * set the fields in the header that are type dependent
957 	 */
958 	switch(arcn->type) {
959 	case PAX_DIR:
960 		hd->typeflag = DIRTYPE;
961 		memset(hd->linkname, 0, sizeof(hd->linkname));
962 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
963 		memset(hd->devminor, 0, sizeof(hd->devminor));
964 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
965 			goto out;
966 		break;
967 	case PAX_CHR:
968 	case PAX_BLK:
969 		if (arcn->type == PAX_CHR)
970 			hd->typeflag = CHRTYPE;
971 		else
972 			hd->typeflag = BLKTYPE;
973 		memset(hd->linkname, 0, sizeof(hd->linkname));
974 		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
975 		   sizeof(hd->devmajor), 3) ||
976 		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
977 		   sizeof(hd->devminor), 3) ||
978 		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
979 			goto out;
980 		break;
981 	case PAX_FIF:
982 		hd->typeflag = FIFOTYPE;
983 		memset(hd->linkname, 0, sizeof(hd->linkname));
984 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
985 		memset(hd->devminor, 0, sizeof(hd->devminor));
986 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
987 			goto out;
988 		break;
989 	case PAX_SLK:
990 	case PAX_HLK:
991 	case PAX_HRG:
992 		if (arcn->type == PAX_SLK)
993 			hd->typeflag = SYMTYPE;
994 		else
995 			hd->typeflag = LNKTYPE;
996 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
997 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
998 		memset(hd->devminor, 0, sizeof(hd->devminor));
999 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1000 			goto out;
1001 		break;
1002 	case PAX_REG:
1003 	case PAX_CTG:
1004 	default:
1005 		/*
1006 		 * file data with this type, set the padding
1007 		 */
1008 		if (arcn->type == PAX_CTG)
1009 			hd->typeflag = CONTTYPE;
1010 		else
1011 			hd->typeflag = REGTYPE;
1012 		memset(hd->linkname, 0, sizeof(hd->linkname));
1013 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
1014 		memset(hd->devminor, 0, sizeof(hd->devminor));
1015 		arcn->pad = TAR_PAD(arcn->sb.st_size);
1016 		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
1017 			tty_warn(1,"File is too long for ustar %s",
1018 			    arcn->org_name);
1019 			return(1);
1020 		}
1021 		break;
1022 	}
1023 
1024 	zf_strncpy(hd->magic, TMAGIC, TMAGLEN);
1025 	zf_strncpy(hd->version, TVERSION, TVERSLEN);
1026 
1027 	/*
1028 	 * set the remaining fields. Some versions want all 16 bits of mode
1029 	 * we better humor them (they really do not meet spec though)....
1030 	 */
1031 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1032 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1033 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1034 	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1035 		goto out;
1036 	user = user_from_uid(arcn->sb.st_uid, 1);
1037 	group = group_from_gid(arcn->sb.st_gid, 1);
1038 	zf_strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1039 	zf_strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1040 
1041 	/*
1042 	 * calculate and store the checksum write the header to the archive
1043 	 * return 0 tells the caller to now write the file data, 1 says no data
1044 	 * needs to be written
1045 	 */
1046 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1047 	   sizeof(hd->chksum), 3))
1048 		goto out;
1049 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1050 		return(-1);
1051 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1052 		return(-1);
1053 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1054 		return(0);
1055 	return(1);
1056 
1057     out:
1058 	/*
1059 	 * header field is out of range
1060 	 */
1061 	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1062 	return(1);
1063 }
1064 
1065 /*
1066  * name_split()
1067  *	see if the name has to be split for storage in a ustar header. We try
1068  *	to fit the entire name in the name field without splitting if we can.
1069  *	The split point is always at a /
1070  * Return
1071  *	character pointer to split point (always the / that is to be removed
1072  *	if the split is not needed, the points is set to the start of the file
1073  *	name (it would violate the spec to split there). A NULL is returned if
1074  *	the file name is too long
1075  */
1076 
1077 static char *
1078 name_split(char *name, int len)
1079 {
1080 	char *start;
1081 
1082 	/*
1083 	 * check to see if the file name is small enough to fit in the name
1084 	 * field. if so just return a pointer to the name.
1085 	 */
1086 	if (len <= TNMSZ)
1087 		return(name);
1088 	if (len > (TPFSZ + TNMSZ + 1))
1089 		return(NULL);
1090 
1091 	/*
1092 	 * we start looking at the biggest sized piece that fits in the name
1093 	 * field. We walk forward looking for a slash to split at. The idea is
1094 	 * to find the biggest piece to fit in the name field (or the smallest
1095 	 * prefix we can find) (the -1 is correct the biggest piece would
1096 	 * include the slash between the two parts that gets thrown away)
1097 	 */
1098 	start = name + len - TNMSZ - 1;
1099 	while ((*start != '\0') && (*start != '/'))
1100 		++start;
1101 
1102 	/*
1103 	 * if we hit the end of the string, this name cannot be split, so we
1104 	 * cannot store this file.
1105 	 */
1106 	if (*start == '\0')
1107 		return(NULL);
1108 	len = start - name;
1109 
1110 	/*
1111 	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1112 	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1113 	 * the file would then expand on extract to //str. The len == 0 below
1114 	 * makes this special case follow the spec to the letter.
1115 	 */
1116 	if ((len > TPFSZ) || (len == 0))
1117 		return(NULL);
1118 
1119 	/*
1120 	 * ok have a split point, return it to the caller
1121 	 */
1122 	return(start);
1123 }
1124 
1125 /*
1126  * deal with GNU tar -X switch.  basically, we go through each line of
1127  * the file, building a string from the "glob" lines in the file into
1128  * RE lines, of the form `/^RE$//', which we pass to rep_add(), which
1129  * will add a empty replacement (exclusion), for the named files.
1130  */
1131 int
1132 tar_gnutar_X_compat(path)
1133 	const char *path;
1134 {
1135 	char *line, sbuf[MAXPATHLEN * 2 + 1 + 5];
1136 	FILE *fp;
1137 	int lineno = 0, i, j;
1138 	size_t len;
1139 
1140 	fp = fopen(path, "r");
1141 	if (fp == NULL) {
1142 		tty_warn(1, "can not open %s: %s", path,
1143 		    strerror(errno));
1144 		return(-1);
1145 	}
1146 
1147 	while ((line = fgetln(fp, &len))) {
1148 		lineno++;
1149 		if (len > MAXPATHLEN) {
1150 			tty_warn(0, "pathname too long, line %d of %s",
1151 			    lineno, path);
1152 		}
1153 		if (line[len - 1] == '\n')
1154 			len--;
1155 		for (i = 0, j = 2; i < len; i++) {
1156 			/*
1157 			 * convert glob to regexp, escaping everything
1158 			 */
1159 			if (line[i] == '*')
1160 				sbuf[j++] = '.';
1161 			else if (line[i] == '?')
1162 				line[i] = '.';
1163 			else if (!isalnum(line[i]) && !isblank(line[i]))
1164 				sbuf[j++] = '\\';
1165 			sbuf[j++] = line[i];
1166 		}
1167 		sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1168 		sbuf[1] = '^';
1169 		sbuf[j] = '$';
1170 		sbuf[j + 3] = '\0';
1171 		if (rep_add(sbuf) < 0)
1172 			return (-1);
1173 	}
1174 	return (0);
1175 }
1176