xref: /freebsd/bin/pax/tar.c (revision 06c3fb27)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/time.h>
38 #include <sys/stat.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include "pax.h"
42 #include "extern.h"
43 #include "tar.h"
44 
45 /*
46  * Routines for reading, writing and header identify of various versions of tar
47  */
48 
49 static u_long tar_chksm(char *, int);
50 static char *name_split(char *, int);
51 static int ul_oct(u_long, char *, int, int);
52 static int uqd_oct(u_quad_t, char *, int, int);
53 
54 /*
55  * Routines common to all versions of tar
56  */
57 
58 static int tar_nodir;			/* do not write dirs under old tar */
59 
60 /*
61  * tar_endwr()
62  *	add the tar trailer of two null blocks
63  * Return:
64  *	0 if ok, -1 otherwise (what wr_skip returns)
65  */
66 
67 int
68 tar_endwr(void)
69 {
70 	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
71 }
72 
73 /*
74  * tar_endrd()
75  *	no cleanup needed here, just return size of trailer (for append)
76  * Return:
77  *	size of trailer (2 * BLKMULT)
78  */
79 
80 off_t
81 tar_endrd(void)
82 {
83 	return((off_t)(NULLCNT*BLKMULT));
84 }
85 
86 /*
87  * tar_trail()
88  *	Called to determine if a header block is a valid trailer. We are passed
89  *	the block, the in_sync flag (which tells us we are in resync mode;
90  *	looking for a valid header), and cnt (which starts at zero) which is
91  *	used to count the number of empty blocks we have seen so far.
92  * Return:
93  *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
94  *	could never contain a header.
95  */
96 
97 int
98 tar_trail(char *buf, int in_resync, int *cnt)
99 {
100 	int i;
101 
102 	/*
103 	 * look for all zero, trailer is two consecutive blocks of zero
104 	 */
105 	for (i = 0; i < BLKMULT; ++i) {
106 		if (buf[i] != '\0')
107 			break;
108 	}
109 
110 	/*
111 	 * if not all zero it is not a trailer, but MIGHT be a header.
112 	 */
113 	if (i != BLKMULT)
114 		return(-1);
115 
116 	/*
117 	 * When given a zero block, we must be careful!
118 	 * If we are not in resync mode, check for the trailer. Have to watch
119 	 * out that we do not mis-identify file data as the trailer, so we do
120 	 * NOT try to id a trailer during resync mode. During resync mode we
121 	 * might as well throw this block out since a valid header can NEVER be
122 	 * a block of all 0 (we must have a valid file name).
123 	 */
124 	if (!in_resync && (++*cnt >= NULLCNT))
125 		return(0);
126 	return(1);
127 }
128 
129 /*
130  * ul_oct()
131  *	convert an unsigned long to an octal string. many oddball field
132  *	termination characters are used by the various versions of tar in the
133  *	different fields. term selects which kind to use. str is '0' padded
134  *	at the front to len. we are unable to use only one format as many old
135  *	tar readers are very cranky about this.
136  * Return:
137  *	0 if the number fit into the string, -1 otherwise
138  */
139 
140 static int
141 ul_oct(u_long val, char *str, int len, int term)
142 {
143 	char *pt;
144 
145 	/*
146 	 * term selects the appropriate character(s) for the end of the string
147 	 */
148 	pt = str + len - 1;
149 	switch(term) {
150 	case 3:
151 		*pt-- = '\0';
152 		break;
153 	case 2:
154 		*pt-- = ' ';
155 		*pt-- = '\0';
156 		break;
157 	case 1:
158 		*pt-- = ' ';
159 		break;
160 	case 0:
161 	default:
162 		*pt-- = '\0';
163 		*pt-- = ' ';
164 		break;
165 	}
166 
167 	/*
168 	 * convert and blank pad if there is space
169 	 */
170 	while (pt >= str) {
171 		*pt-- = '0' + (char)(val & 0x7);
172 		if ((val = val >> 3) == (u_long)0)
173 			break;
174 	}
175 
176 	while (pt >= str)
177 		*pt-- = '0';
178 	if (val != (u_long)0)
179 		return(-1);
180 	return(0);
181 }
182 
183 /*
184  * uqd_oct()
185  *	convert an u_quad_t to an octal string. one of many oddball field
186  *	termination characters are used by the various versions of tar in the
187  *	different fields. term selects which kind to use. str is '0' padded
188  *	at the front to len. we are unable to use only one format as many old
189  *	tar readers are very cranky about this.
190  * Return:
191  *	0 if the number fit into the string, -1 otherwise
192  */
193 
194 static int
195 uqd_oct(u_quad_t val, char *str, int len, int term)
196 {
197 	char *pt;
198 
199 	/*
200 	 * term selects the appropriate character(s) for the end of the string
201 	 */
202 	pt = str + len - 1;
203 	switch(term) {
204 	case 3:
205 		*pt-- = '\0';
206 		break;
207 	case 2:
208 		*pt-- = ' ';
209 		*pt-- = '\0';
210 		break;
211 	case 1:
212 		*pt-- = ' ';
213 		break;
214 	case 0:
215 	default:
216 		*pt-- = '\0';
217 		*pt-- = ' ';
218 		break;
219 	}
220 
221 	/*
222 	 * convert and blank pad if there is space
223 	 */
224 	while (pt >= str) {
225 		*pt-- = '0' + (char)(val & 0x7);
226 		if ((val = val >> 3) == 0)
227 			break;
228 	}
229 
230 	while (pt >= str)
231 		*pt-- = '0';
232 	if (val != (u_quad_t)0)
233 		return(-1);
234 	return(0);
235 }
236 
237 /*
238  * tar_chksm()
239  *	calculate the checksum for a tar block counting the checksum field as
240  *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
241  *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
242  *	pad headers with 0.
243  * Return:
244  *	unsigned long checksum
245  */
246 
247 static u_long
248 tar_chksm(char *blk, int len)
249 {
250 	char *stop;
251 	char *pt;
252 	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
253 
254 	/*
255 	 * add the part of the block before the checksum field
256 	 */
257 	pt = blk;
258 	stop = blk + CHK_OFFSET;
259 	while (pt < stop)
260 		chksm += (u_long)(*pt++ & 0xff);
261 	/*
262 	 * move past the checksum field and keep going, spec counts the
263 	 * checksum field as the sum of 8 blanks (which is pre-computed as
264 	 * BLNKSUM).
265 	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
266 	 * starts, no point in summing zero's)
267 	 */
268 	pt += CHK_LEN;
269 	stop = blk + len;
270 	while (pt < stop)
271 		chksm += (u_long)(*pt++ & 0xff);
272 	return(chksm);
273 }
274 
275 /*
276  * Routines for old BSD style tar (also made portable to sysV tar)
277  */
278 
279 /*
280  * tar_id()
281  *	determine if a block given to us is a valid tar header (and not a USTAR
282  *	header). We have to be on the lookout for those pesky blocks of all
283  *	zero's.
284  * Return:
285  *	0 if a tar header, -1 otherwise
286  */
287 
288 int
289 tar_id(char *blk, int size)
290 {
291 	HD_TAR *hd;
292 	HD_USTAR *uhd;
293 
294 	if (size < BLKMULT)
295 		return(-1);
296 	hd = (HD_TAR *)blk;
297 	uhd = (HD_USTAR *)blk;
298 
299 	/*
300 	 * check for block of zero's first, a simple and fast test, then make
301 	 * sure this is not a ustar header by looking for the ustar magic
302 	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
303 	 * wrong and create archives missing the \0. Last we check the
304 	 * checksum. If this is ok we have to assume it is a valid header.
305 	 */
306 	if (hd->name[0] == '\0')
307 		return(-1);
308 	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
309 		return(-1);
310 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
311 		return(-1);
312 	return(0);
313 }
314 
315 /*
316  * tar_opt()
317  *	handle tar format specific -o options
318  * Return:
319  *	0 if ok -1 otherwise
320  */
321 
322 int
323 tar_opt(void)
324 {
325 	OPLIST *opt;
326 
327 	while ((opt = opt_next()) != NULL) {
328 		if (strcmp(opt->name, TAR_OPTION) ||
329 		    strcmp(opt->value, TAR_NODIR)) {
330 			paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
331 			    opt->name, opt->value);
332 			paxwarn(1,"%s=%s is the only supported tar format option",
333 			    TAR_OPTION, TAR_NODIR);
334 			return(-1);
335 		}
336 
337 		/*
338 		 * we only support one option, and only when writing
339 		 */
340 		if ((act != APPND) && (act != ARCHIVE)) {
341 			paxwarn(1, "%s=%s is only supported when writing.",
342 			    opt->name, opt->value);
343 			return(-1);
344 		}
345 		tar_nodir = 1;
346 	}
347 	return(0);
348 }
349 
350 
351 /*
352  * tar_rd()
353  *	extract the values out of block already determined to be a tar header.
354  *	store the values in the ARCHD parameter.
355  * Return:
356  *	0
357  */
358 
359 int
360 tar_rd(ARCHD *arcn, char *buf)
361 {
362 	HD_TAR *hd;
363 	char *pt;
364 
365 	/*
366 	 * we only get proper sized buffers passed to us
367 	 */
368 	if (tar_id(buf, BLKMULT) < 0)
369 		return(-1);
370 	arcn->org_name = arcn->name;
371 	arcn->sb.st_nlink = 1;
372 	arcn->pat = NULL;
373 
374 	/*
375 	 * copy out the name and values in the stat buffer
376 	 */
377 	hd = (HD_TAR *)buf;
378 	/*
379 	 * old tar format specifies the name always be null-terminated,
380 	 * but let's be robust to broken archives.
381 	 * the same applies to handling links below.
382 	 */
383 	arcn->nlen = l_strncpy(arcn->name, hd->name,
384 	    MIN(sizeof(hd->name), sizeof(arcn->name)) - 1);
385 	arcn->name[arcn->nlen] = '\0';
386 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
387 	    0xfff);
388 	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
389 	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
390 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
391 	arcn->sb.st_mtime = (time_t)asc_uqd(hd->mtime, sizeof(hd->mtime), OCT);
392 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
393 
394 	/*
395 	 * have to look at the last character, it may be a '/' and that is used
396 	 * to encode this as a directory
397 	 */
398 	pt = &(arcn->name[arcn->nlen - 1]);
399 	arcn->pad = 0;
400 	arcn->skip = 0;
401 	switch(hd->linkflag) {
402 	case SYMTYPE:
403 		/*
404 		 * symbolic link, need to get the link name and set the type in
405 		 * the st_mode so -v printing will look correct.
406 		 */
407 		arcn->type = PAX_SLK;
408 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
409 		    MIN(sizeof(hd->linkname), sizeof(arcn->ln_name)) - 1);
410 		arcn->ln_name[arcn->ln_nlen] = '\0';
411 		arcn->sb.st_mode |= S_IFLNK;
412 		break;
413 	case LNKTYPE:
414 		/*
415 		 * hard link, need to get the link name, set the type in the
416 		 * st_mode and st_nlink so -v printing will look better.
417 		 */
418 		arcn->type = PAX_HLK;
419 		arcn->sb.st_nlink = 2;
420 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
421 		    MIN(sizeof(hd->linkname), sizeof(arcn->ln_name)) - 1);
422 		arcn->ln_name[arcn->ln_nlen] = '\0';
423 
424 		/*
425 		 * no idea of what type this thing really points at, but
426 		 * we set something for printing only.
427 		 */
428 		arcn->sb.st_mode |= S_IFREG;
429 		break;
430 	case DIRTYPE:
431 		/*
432 		 * It is a directory, set the mode for -v printing
433 		 */
434 		arcn->type = PAX_DIR;
435 		arcn->sb.st_mode |= S_IFDIR;
436 		arcn->sb.st_nlink = 2;
437 		arcn->ln_name[0] = '\0';
438 		arcn->ln_nlen = 0;
439 		break;
440 	case AREGTYPE:
441 	case REGTYPE:
442 	default:
443 		/*
444 		 * If we have a trailing / this is a directory and NOT a file.
445 		 */
446 		arcn->ln_name[0] = '\0';
447 		arcn->ln_nlen = 0;
448 		if (*pt == '/') {
449 			/*
450 			 * it is a directory, set the mode for -v printing
451 			 */
452 			arcn->type = PAX_DIR;
453 			arcn->sb.st_mode |= S_IFDIR;
454 			arcn->sb.st_nlink = 2;
455 		} else {
456 			/*
457 			 * have a file that will be followed by data. Set the
458 			 * skip value to the size field and calculate the size
459 			 * of the padding.
460 			 */
461 			arcn->type = PAX_REG;
462 			arcn->sb.st_mode |= S_IFREG;
463 			arcn->pad = TAR_PAD(arcn->sb.st_size);
464 			arcn->skip = arcn->sb.st_size;
465 		}
466 		break;
467 	}
468 
469 	/*
470 	 * strip off any trailing slash.
471 	 */
472 	if (*pt == '/') {
473 		*pt = '\0';
474 		--arcn->nlen;
475 	}
476 	return(0);
477 }
478 
479 /*
480  * tar_wr()
481  *	write a tar header for the file specified in the ARCHD to the archive.
482  *	Have to check for file types that cannot be stored and file names that
483  *	are too long. Be careful of the term (last arg) to ul_oct, each field
484  *	of tar has it own spec for the termination character(s).
485  *	ASSUMED: space after header in header block is zero filled
486  * Return:
487  *	0 if file has data to be written after the header, 1 if file has NO
488  *	data to write after the header, -1 if archive write failed
489  */
490 
491 int
492 tar_wr(ARCHD *arcn)
493 {
494 	HD_TAR *hd;
495 	int len;
496 	HD_TAR hdblk;
497 
498 	/*
499 	 * check for those file system types which tar cannot store
500 	 */
501 	switch(arcn->type) {
502 	case PAX_DIR:
503 		/*
504 		 * user asked that dirs not be written to the archive
505 		 */
506 		if (tar_nodir)
507 			return(1);
508 		break;
509 	case PAX_CHR:
510 		paxwarn(1, "Tar cannot archive a character device %s",
511 		    arcn->org_name);
512 		return(1);
513 	case PAX_BLK:
514 		paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
515 		return(1);
516 	case PAX_SCK:
517 		paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
518 		return(1);
519 	case PAX_FIF:
520 		paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
521 		return(1);
522 	case PAX_SLK:
523 	case PAX_HLK:
524 	case PAX_HRG:
525 		if (arcn->ln_nlen >= (int)sizeof(hd->linkname)) {
526 			paxwarn(1,"Link name too long for tar %s", arcn->ln_name);
527 			return(1);
528 		}
529 		break;
530 	case PAX_REG:
531 	case PAX_CTG:
532 	default:
533 		break;
534 	}
535 
536 	/*
537 	 * check file name len, remember extra char for dirs (the / at the end)
538 	 */
539 	len = arcn->nlen;
540 	if (arcn->type == PAX_DIR)
541 		++len;
542 	if (len >= (int)sizeof(hd->name)) {
543 		paxwarn(1, "File name too long for tar %s", arcn->name);
544 		return(1);
545 	}
546 
547 	/*
548 	 * Copy the data out of the ARCHD into the tar header based on the type
549 	 * of the file. Remember, many tar readers want all fields to be
550 	 * padded with zero so we zero the header first.  We then set the
551 	 * linkflag field (type), the linkname, the size, and set the padding
552 	 * (if any) to be added after the file data (0 for all other types,
553 	 * as they only have a header).
554 	 */
555 	hd = &hdblk;
556 	l_strncpy(hd->name, arcn->name, sizeof(hd->name) - 1);
557 	hd->name[sizeof(hd->name) - 1] = '\0';
558 	arcn->pad = 0;
559 
560 	if (arcn->type == PAX_DIR) {
561 		/*
562 		 * directories are the same as files, except have a filename
563 		 * that ends with a /, we add the slash here. No data follows,
564 		 * dirs, so no pad.
565 		 */
566 		hd->linkflag = AREGTYPE;
567 		memset(hd->linkname, 0, sizeof(hd->linkname));
568 		hd->name[len-1] = '/';
569 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
570 			goto out;
571 	} else if (arcn->type == PAX_SLK) {
572 		/*
573 		 * no data follows this file, so no pad
574 		 */
575 		hd->linkflag = SYMTYPE;
576 		l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname) - 1);
577 		hd->linkname[sizeof(hd->linkname) - 1] = '\0';
578 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
579 			goto out;
580 	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
581 		/*
582 		 * no data follows this file, so no pad
583 		 */
584 		hd->linkflag = LNKTYPE;
585 		l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname) - 1);
586 		hd->linkname[sizeof(hd->linkname) - 1] = '\0';
587 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
588 			goto out;
589 	} else {
590 		/*
591 		 * data follows this file, so set the pad
592 		 */
593 		hd->linkflag = AREGTYPE;
594 		memset(hd->linkname, 0, sizeof(hd->linkname));
595 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
596 		    sizeof(hd->size), 1)) {
597 			paxwarn(1,"File is too large for tar %s", arcn->org_name);
598 			return(1);
599 		}
600 		arcn->pad = TAR_PAD(arcn->sb.st_size);
601 	}
602 
603 	/*
604 	 * copy those fields that are independent of the type
605 	 */
606 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
607 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
608 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
609 	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
610 		goto out;
611 
612 	/*
613 	 * calculate and add the checksum, then write the header. A return of
614 	 * 0 tells the caller to now write the file data, 1 says no data needs
615 	 * to be written
616 	 */
617 	if (ul_oct(tar_chksm((char *)&hdblk, sizeof(HD_TAR)), hd->chksum,
618 	    sizeof(hd->chksum), 3))
619 		goto out;
620 	if (wr_rdbuf((char *)&hdblk, sizeof(HD_TAR)) < 0)
621 		return(-1);
622 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
623 		return(-1);
624 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
625 		return(0);
626 	return(1);
627 
628     out:
629 	/*
630 	 * header field is out of range
631 	 */
632 	paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
633 	return(1);
634 }
635 
636 /*
637  * Routines for POSIX ustar
638  */
639 
640 /*
641  * ustar_strd()
642  *	initialization for ustar read
643  * Return:
644  *	0 if ok, -1 otherwise
645  */
646 
647 int
648 ustar_strd(void)
649 {
650 	if ((usrtb_start() < 0) || (grptb_start() < 0))
651 		return(-1);
652 	return(0);
653 }
654 
655 /*
656  * ustar_stwr()
657  *	initialization for ustar write
658  * Return:
659  *	0 if ok, -1 otherwise
660  */
661 
662 int
663 ustar_stwr(void)
664 {
665 	if ((uidtb_start() < 0) || (gidtb_start() < 0))
666 		return(-1);
667 	return(0);
668 }
669 
670 /*
671  * ustar_id()
672  *	determine if a block given to us is a valid ustar header. We have to
673  *	be on the lookout for those pesky blocks of all zero's
674  * Return:
675  *	0 if a ustar header, -1 otherwise
676  */
677 
678 int
679 ustar_id(char *blk, int size)
680 {
681 	HD_USTAR *hd;
682 
683 	if (size < BLKMULT)
684 		return(-1);
685 	hd = (HD_USTAR *)blk;
686 
687 	/*
688 	 * check for block of zero's first, a simple and fast test then check
689 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
690 	 * programs are fouled up and create archives missing the \0. Last we
691 	 * check the checksum. If ok we have to assume it is a valid header.
692 	 */
693 	if (hd->name[0] == '\0')
694 		return(-1);
695 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
696 		return(-1);
697 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
698 		return(-1);
699 	return(0);
700 }
701 
702 /*
703  * ustar_rd()
704  *	extract the values out of block already determined to be a ustar header.
705  *	store the values in the ARCHD parameter.
706  * Return:
707  *	0
708  */
709 
710 int
711 ustar_rd(ARCHD *arcn, char *buf)
712 {
713 	HD_USTAR *hd;
714 	char *dest;
715 	int cnt = 0;
716 	dev_t devmajor;
717 	dev_t devminor;
718 
719 	/*
720 	 * we only get proper sized buffers
721 	 */
722 	if (ustar_id(buf, BLKMULT) < 0)
723 		return(-1);
724 	arcn->org_name = arcn->name;
725 	arcn->sb.st_nlink = 1;
726 	arcn->pat = NULL;
727 	arcn->nlen = 0;
728 	hd = (HD_USTAR *)buf;
729 
730 	/*
731 	 * see if the filename is split into two parts. if, so joint the parts.
732 	 * we copy the prefix first and add a / between the prefix and name.
733 	 */
734 	dest = arcn->name;
735 	if (*(hd->prefix) != '\0') {
736 		cnt = l_strncpy(dest, hd->prefix,
737 		    MIN(sizeof(hd->prefix), sizeof(arcn->name) - 2));
738 		dest += cnt;
739 		*dest++ = '/';
740 		cnt++;
741 	}
742 	/*
743 	 * ustar format specifies the name may be unterminated
744 	 * if it fills the entire field.  this also applies to
745 	 * the prefix and the linkname.
746 	 */
747 	arcn->nlen = cnt + l_strncpy(dest, hd->name,
748 	    MIN(sizeof(hd->name), sizeof(arcn->name) - cnt - 1));
749 	arcn->name[arcn->nlen] = '\0';
750 
751 	/*
752 	 * follow the spec to the letter. we should only have mode bits, strip
753 	 * off all other crud we may be passed.
754 	 */
755 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
756 	    0xfff);
757 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
758 	arcn->sb.st_mtime = (time_t)asc_uqd(hd->mtime, sizeof(hd->mtime), OCT);
759 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
760 
761 	/*
762 	 * If we can find the ascii names for gname and uname in the password
763 	 * and group files we will use the uid's and gid they bind. Otherwise
764 	 * we use the uid and gid values stored in the header. (This is what
765 	 * the POSIX spec wants).
766 	 */
767 	hd->gname[sizeof(hd->gname) - 1] = '\0';
768 	if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
769 		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
770 	hd->uname[sizeof(hd->uname) - 1] = '\0';
771 	if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
772 		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
773 
774 	/*
775 	 * set the defaults, these may be changed depending on the file type
776 	 */
777 	arcn->ln_name[0] = '\0';
778 	arcn->ln_nlen = 0;
779 	arcn->pad = 0;
780 	arcn->skip = 0;
781 	arcn->sb.st_rdev = (dev_t)0;
782 
783 	/*
784 	 * set the mode and PAX type according to the typeflag in the header
785 	 */
786 	switch(hd->typeflag) {
787 	case FIFOTYPE:
788 		arcn->type = PAX_FIF;
789 		arcn->sb.st_mode |= S_IFIFO;
790 		break;
791 	case DIRTYPE:
792 		arcn->type = PAX_DIR;
793 		arcn->sb.st_mode |= S_IFDIR;
794 		arcn->sb.st_nlink = 2;
795 
796 		/*
797 		 * Some programs that create ustar archives append a '/'
798 		 * to the pathname for directories. This clearly violates
799 		 * ustar specs, but we will silently strip it off anyway.
800 		 */
801 		if (arcn->name[arcn->nlen - 1] == '/')
802 			arcn->name[--arcn->nlen] = '\0';
803 		break;
804 	case BLKTYPE:
805 	case CHRTYPE:
806 		/*
807 		 * this type requires the rdev field to be set.
808 		 */
809 		if (hd->typeflag == BLKTYPE) {
810 			arcn->type = PAX_BLK;
811 			arcn->sb.st_mode |= S_IFBLK;
812 		} else {
813 			arcn->type = PAX_CHR;
814 			arcn->sb.st_mode |= S_IFCHR;
815 		}
816 		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
817 		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
818 		arcn->sb.st_rdev = TODEV(devmajor, devminor);
819 		break;
820 	case SYMTYPE:
821 	case LNKTYPE:
822 		if (hd->typeflag == SYMTYPE) {
823 			arcn->type = PAX_SLK;
824 			arcn->sb.st_mode |= S_IFLNK;
825 		} else {
826 			arcn->type = PAX_HLK;
827 			/*
828 			 * so printing looks better
829 			 */
830 			arcn->sb.st_mode |= S_IFREG;
831 			arcn->sb.st_nlink = 2;
832 		}
833 		/*
834 		 * copy the link name
835 		 */
836 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
837 		    MIN(sizeof(hd->linkname), sizeof(arcn->ln_name) - 1));
838 		arcn->ln_name[arcn->ln_nlen] = '\0';
839 		break;
840 	case CONTTYPE:
841 	case AREGTYPE:
842 	case REGTYPE:
843 	default:
844 		/*
845 		 * these types have file data that follows. Set the skip and
846 		 * pad fields.
847 		 */
848 		arcn->type = PAX_REG;
849 		arcn->pad = TAR_PAD(arcn->sb.st_size);
850 		arcn->skip = arcn->sb.st_size;
851 		arcn->sb.st_mode |= S_IFREG;
852 		break;
853 	}
854 	return(0);
855 }
856 
857 /*
858  * ustar_wr()
859  *	write a ustar header for the file specified in the ARCHD to the archive
860  *	Have to check for file types that cannot be stored and file names that
861  *	are too long. Be careful of the term (last arg) to ul_oct, we only use
862  *	'\0' for the termination character (this is different than picky tar)
863  *	ASSUMED: space after header in header block is zero filled
864  * Return:
865  *	0 if file has data to be written after the header, 1 if file has NO
866  *	data to write after the header, -1 if archive write failed
867  */
868 
869 int
870 ustar_wr(ARCHD *arcn)
871 {
872 	HD_USTAR *hd;
873 	char *pt;
874 	HD_USTAR hdblk;
875 
876 	/*
877 	 * check for those file system types ustar cannot store
878 	 */
879 	if (arcn->type == PAX_SCK) {
880 		paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
881 		return(1);
882 	}
883 
884 	/*
885 	 * check the length of the linkname
886 	 */
887 	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
888 	    (arcn->type == PAX_HRG)) &&
889 	    (arcn->ln_nlen > (int)sizeof(hd->linkname))) {
890 		paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
891 		return(1);
892 	}
893 
894 	/*
895 	 * split the path name into prefix and name fields (if needed). if
896 	 * pt != arcn->name, the name has to be split
897 	 */
898 	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
899 		paxwarn(1, "File name too long for ustar %s", arcn->name);
900 		return(1);
901 	}
902 	hd = &hdblk;
903 	arcn->pad = 0L;
904 
905 	/*
906 	 * split the name, or zero out the prefix
907 	 */
908 	if (pt != arcn->name) {
909 		/*
910 		 * name was split, pt points at the / where the split is to
911 		 * occur, we remove the / and copy the first part to the prefix
912 		 */
913 		*pt = '\0';
914 		l_strncpy(hd->prefix, arcn->name, sizeof(hd->prefix));
915 		*pt++ = '/';
916 	} else
917 		memset(hd->prefix, 0, sizeof(hd->prefix));
918 
919 	/*
920 	 * copy the name part. this may be the whole path or the part after
921 	 * the prefix.  both the name and prefix may fill the entire field.
922 	 */
923 	l_strncpy(hd->name, pt, sizeof(hd->name));
924 
925 	/*
926 	 * set the fields in the header that are type dependent
927 	 */
928 	switch(arcn->type) {
929 	case PAX_DIR:
930 		hd->typeflag = DIRTYPE;
931 		memset(hd->linkname, 0, sizeof(hd->linkname));
932 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
933 		memset(hd->devminor, 0, sizeof(hd->devminor));
934 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
935 			goto out;
936 		break;
937 	case PAX_CHR:
938 	case PAX_BLK:
939 		if (arcn->type == PAX_CHR)
940 			hd->typeflag = CHRTYPE;
941 		else
942 			hd->typeflag = BLKTYPE;
943 		memset(hd->linkname, 0, sizeof(hd->linkname));
944 		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
945 		   sizeof(hd->devmajor), 3) ||
946 		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
947 		   sizeof(hd->devminor), 3) ||
948 		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
949 			goto out;
950 		break;
951 	case PAX_FIF:
952 		hd->typeflag = FIFOTYPE;
953 		memset(hd->linkname, 0, sizeof(hd->linkname));
954 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
955 		memset(hd->devminor, 0, sizeof(hd->devminor));
956 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
957 			goto out;
958 		break;
959 	case PAX_SLK:
960 	case PAX_HLK:
961 	case PAX_HRG:
962 		if (arcn->type == PAX_SLK)
963 			hd->typeflag = SYMTYPE;
964 		else
965 			hd->typeflag = LNKTYPE;
966 		/* the link name may occupy the entire field in ustar */
967 		l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
968 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
969 		memset(hd->devminor, 0, sizeof(hd->devminor));
970 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
971 			goto out;
972 		break;
973 	case PAX_REG:
974 	case PAX_CTG:
975 	default:
976 		/*
977 		 * file data with this type, set the padding
978 		 */
979 		if (arcn->type == PAX_CTG)
980 			hd->typeflag = CONTTYPE;
981 		else
982 			hd->typeflag = REGTYPE;
983 		memset(hd->linkname, 0, sizeof(hd->linkname));
984 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
985 		memset(hd->devminor, 0, sizeof(hd->devminor));
986 		arcn->pad = TAR_PAD(arcn->sb.st_size);
987 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
988 		    sizeof(hd->size), 3)) {
989 			paxwarn(1,"File is too long for ustar %s",arcn->org_name);
990 			return(1);
991 		}
992 		break;
993 	}
994 
995 	l_strncpy(hd->magic, TMAGIC, TMAGLEN);
996 	l_strncpy(hd->version, TVERSION, TVERSLEN);
997 
998 	/*
999 	 * set the remaining fields. Some versions want all 16 bits of mode
1000 	 * we better humor them (they really do not meet spec though)....
1001 	 */
1002 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1003 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1004 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1005 	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1006 		goto out;
1007 	l_strncpy(hd->uname,name_uid(arcn->sb.st_uid, 0),sizeof(hd->uname));
1008 	l_strncpy(hd->gname,name_gid(arcn->sb.st_gid, 0),sizeof(hd->gname));
1009 
1010 	/*
1011 	 * calculate and store the checksum write the header to the archive
1012 	 * return 0 tells the caller to now write the file data, 1 says no data
1013 	 * needs to be written
1014 	 */
1015 	if (ul_oct(tar_chksm((char *)&hdblk, sizeof(HD_USTAR)), hd->chksum,
1016 	   sizeof(hd->chksum), 3))
1017 		goto out;
1018 	if (wr_rdbuf((char *)&hdblk, sizeof(HD_USTAR)) < 0)
1019 		return(-1);
1020 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1021 		return(-1);
1022 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1023 		return(0);
1024 	return(1);
1025 
1026     out:
1027     	/*
1028 	 * header field is out of range
1029 	 */
1030 	paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1031 	return(1);
1032 }
1033 
1034 /*
1035  * name_split()
1036  *	see if the name has to be split for storage in a ustar header. We try
1037  *	to fit the entire name in the name field without splitting if we can.
1038  *	The split point is always at a /
1039  * Return
1040  *	character pointer to split point (always the / that is to be removed
1041  *	if the split is not needed, the points is set to the start of the file
1042  *	name (it would violate the spec to split there). A NULL is returned if
1043  *	the file name is too long
1044  */
1045 
1046 static char *
1047 name_split(char *name, int len)
1048 {
1049 	char *start;
1050 
1051 	/*
1052 	 * check to see if the file name is small enough to fit in the name
1053 	 * field. if so just return a pointer to the name.
1054 	 */
1055 	if (len <= TNMSZ)
1056 		return(name);
1057 	if (len > TPFSZ + TNMSZ)
1058 		return(NULL);
1059 
1060 	/*
1061 	 * we start looking at the biggest sized piece that fits in the name
1062 	 * field. We walk forward looking for a slash to split at. The idea is
1063 	 * to find the biggest piece to fit in the name field (or the smallest
1064 	 * prefix we can find)
1065 	 */
1066 	start = name + len - TNMSZ;
1067 	while ((*start != '\0') && (*start != '/'))
1068 		++start;
1069 
1070 	/*
1071 	 * if we hit the end of the string, this name cannot be split, so we
1072 	 * cannot store this file.
1073 	 */
1074 	if (*start == '\0')
1075 		return(NULL);
1076 	len = start - name;
1077 
1078 	/*
1079 	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1080 	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1081 	 * the file would then expand on extract to //str. The len == 0 below
1082 	 * makes this special case follow the spec to the letter.
1083 	 */
1084 	if ((len > TPFSZ) || (len == 0))
1085 		return(NULL);
1086 
1087 	/*
1088 	 * ok have a split point, return it to the caller
1089 	 */
1090 	return(start);
1091 }
1092