xref: /openbsd/bin/pax/tar.c (revision 8df76133)
1 /*	$OpenBSD: tar.c,v 1.85 2024/04/17 18:12:12 jca Exp $	*/
2 /*	$NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1992 Keith Muller.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Keith Muller of the University of California, San Diego.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/queue.h>
39 #include <sys/stat.h>
40 #include <ctype.h>
41 #include <errno.h>
42 #include <grp.h>
43 #include <libgen.h>
44 #include <limits.h>
45 #include <pwd.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 
51 #include "pax.h"
52 #include "extern.h"
53 #include "tar.h"
54 
55 SLIST_HEAD(xheader, xheader_record);
56 struct xheader_record {
57 	SLIST_ENTRY(xheader_record)	 entry;
58 	size_t				 reclen;
59 	char				*record;
60 };
61 
62 /* shortest possible extended record: "5 a=\n" */
63 #define MINXHDRSZ	5
64 
65 /*
66  * Routines for reading, writing and header identify of various versions of tar
67  */
68 
69 static size_t expandname(char *, size_t, char **, const char *, size_t);
70 static u_long tar_chksm(char *, int);
71 static char *name_split(char *, int);
72 static int ul_oct(u_long, char *, int, int);
73 static int ull_oct(unsigned long long, char *, int, int);
74 static int rd_xheader(ARCHD *arcn, int, off_t);
75 #ifndef SMALL
76 static int wr_xheader(ARCHD *, struct xheader *);
77 #endif
78 
79 static uid_t uid_nobody;
80 static uid_t uid_warn;
81 static gid_t gid_nobody;
82 static gid_t gid_warn;
83 
84 /*
85  * Routines common to all versions of tar
86  */
87 
88 int tar_nodir;				/* do not write dirs under old tar */
89 char *gnu_name_string;			/* GNU ././@LongLink hackery name */
90 char *gnu_link_string;			/* GNU ././@LongLink hackery link */
91 
92 /*
93  * tar_endwr()
94  *	add the tar trailer of two null blocks
95  * Return:
96  *	0 if ok, -1 otherwise (what wr_skip returns)
97  */
98 
99 int
tar_endwr(void)100 tar_endwr(void)
101 {
102 	return wr_skip(NULLCNT * BLKMULT);
103 }
104 
105 /*
106  * tar_endrd()
107  *	no cleanup needed here, just return size of trailer (for append)
108  * Return:
109  *	size of trailer (2 * BLKMULT)
110  */
111 
112 off_t
tar_endrd(void)113 tar_endrd(void)
114 {
115 	return NULLCNT * BLKMULT;
116 }
117 
118 /*
119  * tar_trail()
120  *	Called to determine if a header block is a valid trailer. We are passed
121  *	the block, the in_sync flag (which tells us we are in resync mode;
122  *	looking for a valid header), and cnt (which starts at zero) which is
123  *	used to count the number of empty blocks we have seen so far.
124  * Return:
125  *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
126  *	could never contain a header.
127  */
128 
129 int
tar_trail(ARCHD * ignore,char * buf,int in_resync,int * cnt)130 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
131 {
132 	int i;
133 
134 	/*
135 	 * look for all zero, trailer is two consecutive blocks of zero
136 	 */
137 	for (i = 0; i < BLKMULT; ++i) {
138 		if (buf[i] != '\0')
139 			break;
140 	}
141 
142 	/*
143 	 * if not all zero it is not a trailer, but MIGHT be a header.
144 	 */
145 	if (i != BLKMULT)
146 		return(-1);
147 
148 	/*
149 	 * When given a zero block, we must be careful!
150 	 * If we are not in resync mode, check for the trailer. Have to watch
151 	 * out that we do not mis-identify file data as the trailer, so we do
152 	 * NOT try to id a trailer during resync mode. During resync mode we
153 	 * might as well throw this block out since a valid header can NEVER be
154 	 * a block of all 0 (we must have a valid file name).
155 	 */
156 	if (!in_resync && (++*cnt >= NULLCNT))
157 		return(0);
158 	return(1);
159 }
160 
161 /*
162  * ul_oct()
163  *	convert an unsigned long to an octal string. many oddball field
164  *	termination characters are used by the various versions of tar in the
165  *	different fields. term selects which kind to use. str is '0' padded
166  *	at the front to len. we are unable to use only one format as many old
167  *	tar readers are very cranky about this.
168  * Return:
169  *	0 if the number fit into the string, -1 otherwise
170  */
171 
172 static int
ul_oct(u_long val,char * str,int len,int term)173 ul_oct(u_long val, char *str, int len, int term)
174 {
175 	char *pt;
176 
177 	/*
178 	 * term selects the appropriate character(s) for the end of the string
179 	 */
180 	pt = str + len - 1;
181 	switch (term) {
182 	case 3:
183 		*pt-- = '\0';
184 		break;
185 	case 2:
186 		*pt-- = ' ';
187 		*pt-- = '\0';
188 		break;
189 	case 1:
190 		*pt-- = ' ';
191 		break;
192 	case 0:
193 	default:
194 		*pt-- = '\0';
195 		*pt-- = ' ';
196 		break;
197 	}
198 
199 	/*
200 	 * convert and blank pad if there is space
201 	 */
202 	while (pt >= str) {
203 		*pt-- = '0' + (char)(val & 0x7);
204 		val >>= 3;
205 		if (val == 0)
206 			break;
207 	}
208 
209 	while (pt >= str)
210 		*pt-- = '0';
211 	if (val != 0)
212 		return(-1);
213 	return(0);
214 }
215 
216 /*
217  * ull_oct()
218  *	Convert an unsigned long long to an octal string.  One of many oddball
219  *	field termination characters are used by the various versions of tar
220  *	in the different fields.  term selects which kind to use.  str is
221  *	'0' padded at the front to len.  We are unable to use only one format
222  *	as many old tar readers are very cranky about this.
223  * Return:
224  *	0 if the number fit into the string, -1 otherwise
225  */
226 
227 static int
ull_oct(unsigned long long val,char * str,int len,int term)228 ull_oct(unsigned long long val, char *str, int len, int term)
229 {
230 	char *pt;
231 
232 	/*
233 	 * term selects the appropriate character(s) for the end of the string
234 	 */
235 	pt = str + len - 1;
236 	switch (term) {
237 	case 3:
238 		*pt-- = '\0';
239 		break;
240 	case 2:
241 		*pt-- = ' ';
242 		*pt-- = '\0';
243 		break;
244 	case 1:
245 		*pt-- = ' ';
246 		break;
247 	case 0:
248 	default:
249 		*pt-- = '\0';
250 		*pt-- = ' ';
251 		break;
252 	}
253 
254 	/*
255 	 * convert and blank pad if there is space
256 	 */
257 	while (pt >= str) {
258 		*pt-- = '0' + (char)(val & 0x7);
259 		val >>= 3;
260 		if (val == 0)
261 			break;
262 	}
263 
264 	while (pt >= str)
265 		*pt-- = '0';
266 	if (val != 0)
267 		return(-1);
268 	return(0);
269 }
270 
271 /*
272  * tar_chksm()
273  *	calculate the checksum for a tar block counting the checksum field as
274  *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
275  *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
276  *	pad headers with 0.
277  * Return:
278  *	unsigned long checksum
279  */
280 
281 static u_long
tar_chksm(char * blk,int len)282 tar_chksm(char *blk, int len)
283 {
284 	char *stop;
285 	char *pt;
286 	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
287 
288 	/*
289 	 * add the part of the block before the checksum field
290 	 */
291 	pt = blk;
292 	stop = blk + CHK_OFFSET;
293 	while (pt < stop)
294 		chksm += (u_long)(*pt++ & 0xff);
295 	/*
296 	 * move past the checksum field and keep going, spec counts the
297 	 * checksum field as the sum of 8 blanks (which is pre-computed as
298 	 * BLNKSUM).
299 	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
300 	 * starts, no point in summing zero's)
301 	 */
302 	pt += CHK_LEN;
303 	stop = blk + len;
304 	while (pt < stop)
305 		chksm += (u_long)(*pt++ & 0xff);
306 	return(chksm);
307 }
308 
309 /*
310  * Routines for old BSD style tar (also made portable to sysV tar)
311  */
312 
313 /*
314  * tar_id()
315  *	determine if a block given to us is a valid tar header (and not a USTAR
316  *	header). We have to be on the lookout for those pesky blocks of	all
317  *	zero's.
318  * Return:
319  *	0 if a tar header, -1 otherwise
320  */
321 
322 int
tar_id(char * blk,int size)323 tar_id(char *blk, int size)
324 {
325 	HD_TAR *hd;
326 	HD_USTAR *uhd;
327 
328 	if (size < BLKMULT)
329 		return(-1);
330 	hd = (HD_TAR *)blk;
331 	uhd = (HD_USTAR *)blk;
332 
333 	/*
334 	 * check for block of zero's first, a simple and fast test, then make
335 	 * sure this is not a ustar header by looking for the ustar magic
336 	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
337 	 * wrong and create archives missing the \0. Last we check the
338 	 * checksum. If this is ok we have to assume it is a valid header.
339 	 */
340 	if (hd->name[0] == '\0')
341 		return(-1);
342 	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
343 		return(-1);
344 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
345 		return(-1);
346 	force_one_volume = 1;
347 	return(0);
348 }
349 
350 /*
351  * tar_opt()
352  *	handle tar format specific -o options
353  * Return:
354  *	0 if ok -1 otherwise
355  */
356 
357 int
tar_opt(void)358 tar_opt(void)
359 {
360 	OPLIST *opt;
361 
362 	while ((opt = opt_next()) != NULL) {
363 		if (strcmp(opt->name, TAR_OPTION) ||
364 		    strcmp(opt->value, TAR_NODIR)) {
365 			paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
366 			    opt->name, opt->value);
367 			paxwarn(1,"%s=%s is the only supported tar format option",
368 			    TAR_OPTION, TAR_NODIR);
369 			return(-1);
370 		}
371 
372 		/*
373 		 * we only support one option, and only when writing
374 		 */
375 		if ((act != APPND) && (act != ARCHIVE)) {
376 			paxwarn(1, "%s=%s is only supported when writing.",
377 			    opt->name, opt->value);
378 			return(-1);
379 		}
380 		tar_nodir = 1;
381 	}
382 	return(0);
383 }
384 
385 
386 /*
387  * tar_rd()
388  *	extract the values out of block already determined to be a tar header.
389  *	store the values in the ARCHD parameter.
390  * Return:
391  *	0
392  */
393 
394 int
tar_rd(ARCHD * arcn,char * buf)395 tar_rd(ARCHD *arcn, char *buf)
396 {
397 	HD_TAR *hd;
398 	unsigned long long val;
399 	char *pt;
400 
401 	/*
402 	 * we only get proper sized buffers passed to us
403 	 */
404 	if (tar_id(buf, BLKMULT) < 0)
405 		return(-1);
406 	memset(arcn, 0, sizeof(*arcn));
407 	arcn->org_name = arcn->name;
408 	arcn->sb.st_nlink = 1;
409 
410 	/*
411 	 * copy out the name and values in the stat buffer
412 	 */
413 	hd = (HD_TAR *)buf;
414 	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
415 		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
416 		    &gnu_name_string, hd->name, sizeof(hd->name));
417 		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
418 		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
419 	}
420 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
421 	    0xfff);
422 	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
423 	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
424 	arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT);
425 	val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT);
426 	if (val > MAX_TIME_T)
427 		arcn->sb.st_mtime = MAX_TIME_T;
428 	else
429 		arcn->sb.st_mtime = val;
430 	arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim;
431 
432 	/*
433 	 * have to look at the last character, it may be a '/' and that is used
434 	 * to encode this as a directory
435 	 */
436 	pt = &(arcn->name[arcn->nlen - 1]);
437 	arcn->pad = 0;
438 	arcn->skip = 0;
439 	switch (hd->linkflag) {
440 	case SYMTYPE:
441 		/*
442 		 * symbolic link, need to get the link name and set the type in
443 		 * the st_mode so -v printing will look correct.
444 		 */
445 		arcn->type = PAX_SLK;
446 		arcn->sb.st_mode |= S_IFLNK;
447 		break;
448 	case LNKTYPE:
449 		/*
450 		 * hard link, need to get the link name, set the type in the
451 		 * st_mode and st_nlink so -v printing will look better.
452 		 */
453 		arcn->type = PAX_HLK;
454 		arcn->sb.st_nlink = 2;
455 
456 		/*
457 		 * no idea of what type this thing really points at, but
458 		 * we set something for printing only.
459 		 */
460 		arcn->sb.st_mode |= S_IFREG;
461 		break;
462 	case LONGLINKTYPE:
463 	case LONGNAMETYPE:
464 		/*
465 		 * GNU long link/file; we tag these here and let the
466 		 * pax internals deal with it -- too ugly otherwise.
467 		 */
468 		arcn->type =
469 		    hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
470 		arcn->pad = TAR_PAD(arcn->sb.st_size);
471 		arcn->skip = arcn->sb.st_size;
472 		break;
473 	case DIRTYPE:
474 		/*
475 		 * It is a directory, set the mode for -v printing
476 		 */
477 		arcn->type = PAX_DIR;
478 		arcn->sb.st_mode |= S_IFDIR;
479 		arcn->sb.st_nlink = 2;
480 		break;
481 	case AREGTYPE:
482 	case REGTYPE:
483 	default:
484 		/*
485 		 * If we have a trailing / this is a directory and NOT a file.
486 		 */
487 		arcn->ln_name[0] = '\0';
488 		arcn->ln_nlen = 0;
489 		if (*pt == '/') {
490 			/*
491 			 * it is a directory, set the mode for -v printing
492 			 */
493 			arcn->type = PAX_DIR;
494 			arcn->sb.st_mode |= S_IFDIR;
495 			arcn->sb.st_nlink = 2;
496 		} else {
497 			/*
498 			 * have a file that will be followed by data. Set the
499 			 * skip value to the size field and calculate the size
500 			 * of the padding.
501 			 */
502 			arcn->type = PAX_REG;
503 			arcn->sb.st_mode |= S_IFREG;
504 			arcn->pad = TAR_PAD(arcn->sb.st_size);
505 			arcn->skip = arcn->sb.st_size;
506 		}
507 		break;
508 	}
509 
510 	/*
511 	 * strip off any trailing slash.
512 	 */
513 	if (*pt == '/') {
514 		*pt = '\0';
515 		--arcn->nlen;
516 	}
517 	return(0);
518 }
519 
520 /*
521  * tar_wr()
522  *	write a tar header for the file specified in the ARCHD to the archive.
523  *	Have to check for file types that cannot be stored and file names that
524  *	are too long. Be careful of the term (last arg) to ul_oct, each field
525  *	of tar has it own spec for the termination character(s).
526  *	ASSUMED: space after header in header block is zero filled
527  * Return:
528  *	0 if file has data to be written after the header, 1 if file has NO
529  *	data to write after the header, -1 if archive write failed
530  */
531 
532 int
tar_wr(ARCHD * arcn)533 tar_wr(ARCHD *arcn)
534 {
535 	HD_TAR *hd;
536 	int len;
537 	char hdblk[sizeof(HD_TAR)];
538 
539 	/*
540 	 * check for those file system types which tar cannot store
541 	 */
542 	switch (arcn->type) {
543 	case PAX_DIR:
544 		/*
545 		 * user asked that dirs not be written to the archive
546 		 */
547 		if (tar_nodir)
548 			return(1);
549 		break;
550 	case PAX_CHR:
551 		paxwarn(1, "Tar cannot archive a character device %s",
552 		    arcn->org_name);
553 		return(1);
554 	case PAX_BLK:
555 		paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
556 		return(1);
557 	case PAX_SCK:
558 		paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
559 		return(1);
560 	case PAX_FIF:
561 		paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
562 		return(1);
563 	case PAX_SLK:
564 	case PAX_HLK:
565 	case PAX_HRG:
566 		if ((size_t)arcn->ln_nlen > sizeof(hd->linkname)) {
567 			paxwarn(1, "Link name too long for tar %s",
568 			    arcn->ln_name);
569 			return(1);
570 		}
571 		break;
572 	case PAX_REG:
573 	case PAX_CTG:
574 	default:
575 		break;
576 	}
577 
578 	/*
579 	 * check file name len, remember extra char for dirs (the / at the end)
580 	 */
581 	len = arcn->nlen;
582 	if (arcn->type == PAX_DIR)
583 		++len;
584 	if ((size_t)len > sizeof(hd->name)) {
585 		paxwarn(1, "File name too long for tar %s", arcn->name);
586 		return(1);
587 	}
588 
589 	/*
590 	 * Copy the data out of the ARCHD into the tar header based on the type
591 	 * of the file. Remember, many tar readers want all fields to be
592 	 * padded with zero so we zero the header first.  We then set the
593 	 * linkflag field (type), the linkname, the size, and set the padding
594 	 * (if any) to be added after the file data (0 for all other types,
595 	 * as they only have a header).
596 	 */
597 	memset(hdblk, 0, sizeof(hdblk));
598 	hd = (HD_TAR *)hdblk;
599 	fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name));
600 	arcn->pad = 0;
601 
602 	if (arcn->type == PAX_DIR) {
603 		/*
604 		 * directories are the same as files, except have a filename
605 		 * that ends with a /, we add the slash here. No data follows
606 		 * dirs, so no pad.
607 		 */
608 		hd->linkflag = AREGTYPE;
609 		hd->name[len-1] = '/';
610 		if (ul_oct(0, hd->size, sizeof(hd->size), 1))
611 			goto out;
612 	} else if (arcn->type == PAX_SLK) {
613 		/*
614 		 * no data follows this file, so no pad
615 		 */
616 		hd->linkflag = SYMTYPE;
617 		fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
618 		    sizeof(arcn->ln_name));
619 		if (ul_oct(0, hd->size, sizeof(hd->size), 1))
620 			goto out;
621 	} else if (PAX_IS_HARDLINK(arcn->type)) {
622 		/*
623 		 * no data follows this file, so no pad
624 		 */
625 		hd->linkflag = LNKTYPE;
626 		fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
627 		    sizeof(arcn->ln_name));
628 		if (ul_oct(0, hd->size, sizeof(hd->size), 1))
629 			goto out;
630 	} else {
631 		/*
632 		 * data follows this file, so set the pad
633 		 */
634 		hd->linkflag = AREGTYPE;
635 		if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
636 			paxwarn(1, "File is too large for tar %s",
637 			    arcn->org_name);
638 			return(1);
639 		}
640 		arcn->pad = TAR_PAD(arcn->sb.st_size);
641 	}
642 
643 	/*
644 	 * copy those fields that are independent of the type
645 	 */
646 	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
647 	    ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
648 		sizeof(hd->mtime), 1) ||
649 	    ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
650 	    ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0))
651 		goto out;
652 
653 	/*
654 	 * calculate and add the checksum, then write the header. A return of
655 	 * 0 tells the caller to now write the file data, 1 says no data needs
656 	 * to be written
657 	 */
658 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
659 	    sizeof(hd->chksum), 3))
660 		goto out;
661 	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
662 		return(-1);
663 	if (wr_skip(BLKMULT - sizeof(HD_TAR)) < 0)
664 		return(-1);
665 	if (PAX_IS_REG(arcn->type))
666 		return(0);
667 	return(1);
668 
669     out:
670 	/*
671 	 * header field is out of range
672 	 */
673 	paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
674 	return(1);
675 }
676 
677 /*
678  * Routines for POSIX ustar
679  */
680 
681 /*
682  * ustar_id()
683  *	determine if a block given to us is a valid ustar header. We have to
684  *	be on the lookout for those pesky blocks of all zero's
685  * Return:
686  *	0 if a ustar header, -1 otherwise
687  */
688 
689 int
ustar_id(char * blk,int size)690 ustar_id(char *blk, int size)
691 {
692 	HD_USTAR *hd;
693 
694 	if (size < BLKMULT)
695 		return(-1);
696 	hd = (HD_USTAR *)blk;
697 
698 	/*
699 	 * check for block of zero's first, a simple and fast test then check
700 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
701 	 * programs are fouled up and create archives missing the \0. Last we
702 	 * check the checksum. If ok we have to assume it is a valid header.
703 	 */
704 	if (hd->prefix[0] == '\0' && hd->name[0] == '\0')
705 		return(-1);
706 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
707 		return(-1);
708 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
709 		return(-1);
710 	return(0);
711 }
712 
713 /*
714  * ustar_rd()
715  *	extract the values out of block already determined to be a ustar header.
716  *	store the values in the ARCHD parameter.
717  * Return:
718  *	0
719  */
720 
721 int
ustar_rd(ARCHD * arcn,char * buf)722 ustar_rd(ARCHD *arcn, char *buf)
723 {
724 	HD_USTAR *hd = (HD_USTAR *)buf;
725 	char *dest;
726 	int cnt = 0;
727 	dev_t devmajor;
728 	dev_t devminor;
729 	unsigned long long val;
730 
731 	/*
732 	 * we only get proper sized buffers
733 	 */
734 	if (ustar_id(buf, BLKMULT) < 0)
735 		return(-1);
736 
737 reset:
738 	memset(arcn, 0, sizeof(*arcn));
739 	arcn->org_name = arcn->name;
740 	arcn->sb.st_nlink = 1;
741 	arcn->sb.st_size = (off_t)-1;
742 
743 	/* Process Extended headers. */
744 	if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) {
745 		if (rd_xheader(arcn, hd->typeflag == GHDRTYPE,
746 		    (off_t)asc_ull(hd->size, sizeof(hd->size), OCT)) < 0)
747 			return (-1);
748 
749 		/* Update and check the ustar header. */
750 		if (rd_wrbuf(buf, BLKMULT) != BLKMULT)
751 			return (-1);
752 		if (ustar_id(buf, BLKMULT) < 0)
753 			return(-1);
754 
755 		/* if the next block is another extension, reset the values */
756 		if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE)
757 			goto reset;
758 	}
759 
760 	if (!arcn->nlen) {
761 		/*
762 		 * See if the filename is split into two parts. if, so join
763 		 * the parts.  We copy the prefix first and add a / between
764 		 * the prefix and name.
765 		 */
766 		dest = arcn->name;
767 		if (*(hd->prefix) != '\0') {
768 			cnt = fieldcpy(dest, sizeof(arcn->name) - 1,
769 			    hd->prefix, sizeof(hd->prefix));
770 			dest += cnt;
771 			*dest++ = '/';
772 			cnt++;
773 		} else
774 			cnt = 0;
775 
776 		if (hd->typeflag != LONGLINKTYPE &&
777 		    hd->typeflag != LONGNAMETYPE) {
778 			arcn->nlen = cnt + expandname(dest,
779 			    sizeof(arcn->name) - cnt, &gnu_name_string,
780 			    hd->name, sizeof(hd->name));
781 		}
782 	}
783 
784 	if (!arcn->ln_nlen &&
785 	    hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
786 		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
787 		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
788 	}
789 
790 	/*
791 	 * follow the spec to the letter. we should only have mode bits, strip
792 	 * off all other crud we may be passed.
793 	 */
794 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
795 	    0xfff);
796 	if (arcn->sb.st_size == (off_t)-1) {
797 		arcn->sb.st_size =
798 		    (off_t)asc_ull(hd->size, sizeof(hd->size), OCT);
799 	}
800 	if (arcn->sb.st_mtime == 0) {
801 		val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT);
802 		if (val > MAX_TIME_T)
803 			arcn->sb.st_mtime = MAX_TIME_T;
804 		else
805 			arcn->sb.st_mtime = val;
806 	}
807 	if (arcn->sb.st_ctime == 0) {
808 		arcn->sb.st_ctim = arcn->sb.st_mtim;
809 	}
810 	if (arcn->sb.st_atime == 0) {
811 		arcn->sb.st_atim = arcn->sb.st_mtim;
812 	}
813 
814 	/*
815 	 * If we can find the ascii names for gname and uname in the password
816 	 * and group files we will use the uid's and gid they bind. Otherwise
817 	 * we use the uid and gid values stored in the header. (This is what
818 	 * the posix spec wants).
819 	 */
820 	hd->gname[sizeof(hd->gname) - 1] = '\0';
821 	if (Nflag || gid_from_group(hd->gname, &(arcn->sb.st_gid)) == -1)
822 		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
823 	hd->uname[sizeof(hd->uname) - 1] = '\0';
824 	if (Nflag || uid_from_user(hd->uname, &(arcn->sb.st_uid)) == -1)
825 		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
826 
827 	/*
828 	 * set the defaults, these may be changed depending on the file type
829 	 */
830 	arcn->pad = 0;
831 	arcn->skip = 0;
832 	arcn->sb.st_rdev = (dev_t)0;
833 
834 	/*
835 	 * set the mode and PAX type according to the typeflag in the header
836 	 */
837 	switch (hd->typeflag) {
838 	case FIFOTYPE:
839 		arcn->type = PAX_FIF;
840 		arcn->sb.st_mode |= S_IFIFO;
841 		break;
842 	case DIRTYPE:
843 		arcn->type = PAX_DIR;
844 		arcn->sb.st_mode |= S_IFDIR;
845 		arcn->sb.st_nlink = 2;
846 
847 		/*
848 		 * Some programs that create ustar archives append a '/'
849 		 * to the pathname for directories. This clearly violates
850 		 * ustar specs, but we will silently strip it off anyway.
851 		 */
852 		if (arcn->name[arcn->nlen - 1] == '/')
853 			arcn->name[--arcn->nlen] = '\0';
854 		break;
855 	case BLKTYPE:
856 	case CHRTYPE:
857 		/*
858 		 * this type requires the rdev field to be set.
859 		 */
860 		if (hd->typeflag == BLKTYPE) {
861 			arcn->type = PAX_BLK;
862 			arcn->sb.st_mode |= S_IFBLK;
863 		} else {
864 			arcn->type = PAX_CHR;
865 			arcn->sb.st_mode |= S_IFCHR;
866 		}
867 		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
868 		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
869 		arcn->sb.st_rdev = TODEV(devmajor, devminor);
870 		break;
871 	case SYMTYPE:
872 	case LNKTYPE:
873 		if (hd->typeflag == SYMTYPE) {
874 			arcn->type = PAX_SLK;
875 			arcn->sb.st_mode |= S_IFLNK;
876 		} else {
877 			arcn->type = PAX_HLK;
878 			/*
879 			 * so printing looks better
880 			 */
881 			arcn->sb.st_mode |= S_IFREG;
882 			arcn->sb.st_nlink = 2;
883 		}
884 		break;
885 	case LONGLINKTYPE:
886 	case LONGNAMETYPE:
887 		/*
888 		 * GNU long link/file; we tag these here and let the
889 		 * pax internals deal with it -- too ugly otherwise.
890 		 */
891 		arcn->type =
892 		    hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
893 		arcn->pad = TAR_PAD(arcn->sb.st_size);
894 		arcn->skip = arcn->sb.st_size;
895 		break;
896 	case CONTTYPE:
897 	case AREGTYPE:
898 	case REGTYPE:
899 	default:
900 		/*
901 		 * these types have file data that follows. Set the skip and
902 		 * pad fields.
903 		 */
904 		arcn->type = PAX_REG;
905 		arcn->pad = TAR_PAD(arcn->sb.st_size);
906 		arcn->skip = arcn->sb.st_size;
907 		arcn->sb.st_mode |= S_IFREG;
908 		break;
909 	}
910 	return(0);
911 }
912 
913 #ifndef SMALL
914 static int
xheader_add(struct xheader * xhdr,const char * keyword,const char * value)915 xheader_add(struct xheader *xhdr, const char *keyword,
916     const char *value)
917 {
918 	struct xheader_record *rec;
919 	int reclen, tmplen;
920 	char *s;
921 
922 	tmplen = MINXHDRSZ;
923 	do {
924 		reclen = tmplen;
925 		tmplen = snprintf(NULL, 0, "%d %s=%s\n", reclen, keyword,
926 		    value);
927 	} while (tmplen >= 0 && tmplen != reclen);
928 	if (tmplen < 0)
929 		return -1;
930 
931 	rec = calloc(1, sizeof(*rec));
932 	if (rec == NULL)
933 		return -1;
934 	rec->reclen = reclen;
935 	if (asprintf(&s, "%d %s=%s\n", reclen, keyword, value) < 0) {
936 		free(rec);
937 		return -1;
938 	}
939 	rec->record = s;
940 
941 	SLIST_INSERT_HEAD(xhdr, rec, entry);
942 
943 	return 0;
944 }
945 
946 static int
xheader_add_ull(struct xheader * xhdr,const char * keyword,unsigned long long value)947 xheader_add_ull(struct xheader *xhdr, const char *keyword,
948     unsigned long long value)
949 {
950 	struct xheader_record *rec;
951 	int reclen, tmplen;
952 	char *s;
953 
954 	tmplen = MINXHDRSZ;
955 	do {
956 		reclen = tmplen;
957 		tmplen = snprintf(NULL, 0, "%d %s=%llu\n", reclen, keyword,
958 		    value);
959 	} while (tmplen >= 0 && tmplen != reclen);
960 	if (tmplen < 0)
961 		return -1;
962 
963 	rec = calloc(1, sizeof(*rec));
964 	if (rec == NULL)
965 		return -1;
966 	rec->reclen = reclen;
967 	if (asprintf(&s, "%d %s=%llu\n", reclen, keyword, value) < 0) {
968 		free(rec);
969 		return -1;
970 	}
971 	rec->record = s;
972 
973 	SLIST_INSERT_HEAD(xhdr, rec, entry);
974 
975 	return 0;
976 }
977 
978 static int
xheader_add_ts(struct xheader * xhdr,const char * keyword,const struct timespec * value)979 xheader_add_ts(struct xheader *xhdr, const char *keyword,
980     const struct timespec *value)
981 {
982 	struct xheader_record *rec;
983 	int reclen, tmplen;
984 	char frac[sizeof(".111222333")] = "";
985 	char *s;
986 
987 	/* Only write subsecond part if non-zero */
988 	if (value->tv_nsec != 0) {
989 		int n;
990 
991 		n = snprintf(frac, sizeof(frac), ".%09ld",
992 		    (long)value->tv_nsec);
993 		if (n <= 0)
994 			return -1;
995 
996 		/* Zap trailing zeros */
997 		for (n--; n > 1 && frac[n] == '0'; n--)
998 			frac[n] = '\0';
999 	}
1000 
1001 	tmplen = MINXHDRSZ;
1002 	do {
1003 		reclen = tmplen;
1004 		tmplen = snprintf(NULL, 0, "%d %s=%lld%s\n", reclen,
1005 		    keyword, (long long)value->tv_sec, frac);
1006 	} while (tmplen >= 0 && tmplen != reclen);
1007 	if (tmplen < 0)
1008 		return -1;
1009 
1010 	rec = calloc(1, sizeof(*rec));
1011 	if (rec == NULL)
1012 		return -1;
1013 	rec->reclen = reclen;
1014 	if (asprintf(&s, "%d %s=%lld%s\n", reclen, keyword,
1015 	    (long long)value->tv_sec, frac) < 0) {
1016 		free(rec);
1017 		return -1;
1018 	}
1019 	rec->record = s;
1020 
1021 	SLIST_INSERT_HEAD(xhdr, rec, entry);
1022 
1023 	return 0;
1024 }
1025 
1026 static void
xheader_free(struct xheader * xhdr)1027 xheader_free(struct xheader *xhdr)
1028 {
1029 	struct xheader_record *rec;
1030 
1031 	while (!SLIST_EMPTY(xhdr)) {
1032 		rec = SLIST_FIRST(xhdr);
1033 		SLIST_REMOVE_HEAD(xhdr, entry);
1034 		free(rec->record);
1035 		free(rec);
1036 	}
1037 }
1038 
1039 static int
wr_xheader(ARCHD * arcn,struct xheader * xhdr)1040 wr_xheader(ARCHD *arcn, struct xheader *xhdr)
1041 {
1042 	char hdblk[sizeof(HD_USTAR)];
1043 	HD_USTAR *hd;
1044 	char buf[sizeof(hd->name) + 1];
1045 	struct xheader_record *rec;
1046 	size_t size;
1047 
1048 	size = 0;
1049 	SLIST_FOREACH(rec, xhdr, entry)
1050 		size += rec->reclen;
1051 
1052 	memset(hdblk, 0, sizeof(hdblk));
1053 	hd = (HD_USTAR *)hdblk;
1054 	hd->typeflag = XHDRTYPE;
1055 	strncpy(hd->magic, TMAGIC, TMAGLEN);
1056 	strncpy(hd->version, TVERSION, TVERSLEN);
1057 	if (ul_oct(size, hd->size, sizeof(hd->size), 3))
1058 		return -1;
1059 
1060 	/*
1061 	 * Best effort attempt at providing a useful file name for
1062 	 * implementations that don't support pax format. Don't bother
1063 	 * with truncation if the resulting file name doesn't fit.
1064 	 * XXX dirname/basename portability (check return value?)
1065 	 */
1066 	(void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s",
1067 	    dirname(arcn->name), (long)getpid(), basename(arcn->name));
1068 	fieldcpy(hd->name, sizeof(hd->name), buf, sizeof(buf));
1069 
1070 	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
1071 	    ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
1072 		sizeof(hd->mtime), 1) ||
1073 	    ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
1074 	    ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0))
1075 		return -1;
1076 
1077 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1078 	   sizeof(hd->chksum), 3))
1079 		return -1;
1080 
1081 	/* write out extended header */
1082 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1083 		return -1;
1084 	if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0)
1085 		return -1;
1086 
1087 	/* write out extended header records */
1088 	SLIST_FOREACH(rec, xhdr, entry)
1089 		if (wr_rdbuf(rec->record, rec->reclen) < 0)
1090 			return -1;
1091 
1092 	if (wr_skip(TAR_PAD(size)) < 0)
1093 		return -1;
1094 
1095 	return 0;
1096 }
1097 #endif
1098 
1099 static int
wr_ustar_or_pax(ARCHD * arcn,int ustar)1100 wr_ustar_or_pax(ARCHD *arcn, int ustar)
1101 {
1102 	HD_USTAR *hd;
1103 	const char *name;
1104 	char *pt, hdblk[sizeof(HD_USTAR)];
1105 #ifndef SMALL
1106 	struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr);
1107 #endif
1108 	int bad_mtime;
1109 
1110 	/*
1111 	 * check for those file system types ustar cannot store
1112 	 */
1113 	if (arcn->type == PAX_SCK) {
1114 		paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
1115 		return(1);
1116 	}
1117 
1118 	/*
1119 	 * user asked that dirs not be written to the archive
1120 	 */
1121 	if (arcn->type == PAX_DIR && tar_nodir)
1122 		return (1);
1123 
1124 	/*
1125 	 * check the length of the linkname
1126 	 */
1127 	if (PAX_IS_LINK(arcn->type) &&
1128 	    ((size_t)arcn->ln_nlen > sizeof(hd->linkname))) {
1129 		if (ustar) {
1130 			paxwarn(1, "Link name too long for ustar %s",
1131 			    arcn->ln_name);
1132 			return(1);
1133 		}
1134 #ifndef SMALL
1135 		else if (xheader_add(&xhdr, "linkpath", arcn->ln_name) == -1) {
1136 			paxwarn(1, "Link name too long for pax %s",
1137 			    arcn->ln_name);
1138 			xheader_free(&xhdr);
1139 			return(1);
1140 		}
1141 #endif
1142 	}
1143 
1144 	/*
1145 	 * split the path name into prefix and name fields (if needed). if
1146 	 * pt != arcn->name, the name has to be split
1147 	 */
1148 	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1149 		if (ustar) {
1150 			paxwarn(1, "File name too long for ustar %s",
1151 			    arcn->name);
1152 			return(1);
1153 		}
1154 #ifndef SMALL
1155 		else if (xheader_add(&xhdr, "path", arcn->name) == -1) {
1156 			paxwarn(1, "File name too long for pax %s",
1157 			    arcn->name);
1158 			xheader_free(&xhdr);
1159 			return(1);
1160 		}
1161 		/* PAX format, we don't need to split the path */
1162 		pt = arcn->name;
1163 #endif
1164 	}
1165 
1166 	/*
1167 	 * zero out the header so we don't have to worry about zero fill below
1168 	 */
1169 	memset(hdblk, 0, sizeof(hdblk));
1170 	hd = (HD_USTAR *)hdblk;
1171 	arcn->pad = 0;
1172 
1173 	/*
1174 	 * split the name, or zero out the prefix
1175 	 */
1176 	if (pt != arcn->name) {
1177 		/*
1178 		 * name was split, pt points at the / where the split is to
1179 		 * occur, we remove the / and copy the first part to the prefix
1180 		 */
1181 		*pt = '\0';
1182 		fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name,
1183 		    sizeof(arcn->name));
1184 		*pt++ = '/';
1185 	}
1186 
1187 	/*
1188 	 * copy the name part. this may be the whole path or the part after
1189 	 * the prefix
1190 	 */
1191 	fieldcpy(hd->name, sizeof(hd->name), pt,
1192 	    sizeof(arcn->name) - (pt - arcn->name));
1193 
1194 	/*
1195 	 * set the fields in the header that are type dependent
1196 	 */
1197 	switch (arcn->type) {
1198 	case PAX_DIR:
1199 		hd->typeflag = DIRTYPE;
1200 		if (ul_oct(0, hd->size, sizeof(hd->size), 3))
1201 			goto out;
1202 		break;
1203 	case PAX_CHR:
1204 	case PAX_BLK:
1205 		if (arcn->type == PAX_CHR)
1206 			hd->typeflag = CHRTYPE;
1207 		else
1208 			hd->typeflag = BLKTYPE;
1209 		if (ul_oct(MAJOR(arcn->sb.st_rdev), hd->devmajor,
1210 		   sizeof(hd->devmajor), 3) ||
1211 		   ul_oct(MINOR(arcn->sb.st_rdev), hd->devminor,
1212 		   sizeof(hd->devminor), 3) ||
1213 		   ul_oct(0, hd->size, sizeof(hd->size), 3))
1214 			goto out;
1215 		break;
1216 	case PAX_FIF:
1217 		hd->typeflag = FIFOTYPE;
1218 		if (ul_oct(0, hd->size, sizeof(hd->size), 3))
1219 			goto out;
1220 		break;
1221 	case PAX_SLK:
1222 	case PAX_HLK:
1223 	case PAX_HRG:
1224 		if (arcn->type == PAX_SLK)
1225 			hd->typeflag = SYMTYPE;
1226 		else
1227 			hd->typeflag = LNKTYPE;
1228 		fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
1229 		    sizeof(arcn->ln_name));
1230 		if (ul_oct(0, hd->size, sizeof(hd->size), 3))
1231 			goto out;
1232 		break;
1233 	case PAX_REG:
1234 	case PAX_CTG:
1235 	default:
1236 		/*
1237 		 * file data with this type, set the padding
1238 		 */
1239 		if (arcn->type == PAX_CTG)
1240 			hd->typeflag = CONTTYPE;
1241 		else
1242 			hd->typeflag = REGTYPE;
1243 		arcn->pad = TAR_PAD(arcn->sb.st_size);
1244 		if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
1245 			if (ustar) {
1246 				paxwarn(1, "File is too long for ustar %s",
1247 				    arcn->org_name);
1248 				return(1);
1249 			}
1250 #ifndef SMALL
1251 			else if (xheader_add_ull(&xhdr, "size",
1252 			    arcn->sb.st_size) == -1) {
1253 				paxwarn(1, "File is too long for pax %s",
1254 				    arcn->org_name);
1255 				xheader_free(&xhdr);
1256 				return(1);
1257 			}
1258 #endif
1259 		}
1260 		break;
1261 	}
1262 
1263 	strncpy(hd->magic, TMAGIC, TMAGLEN);
1264 	strncpy(hd->version, TVERSION, TVERSLEN);
1265 
1266 	/*
1267 	 * set the remaining fields. Some versions want all 16 bits of mode
1268 	 * we better humor them (they really do not meet spec though)....
1269 	 */
1270 	if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) {
1271 		if (uid_nobody == 0) {
1272 			if (uid_from_user("nobody", &uid_nobody) == -1)
1273 				goto out;
1274 		}
1275 		if (uid_warn != arcn->sb.st_uid) {
1276 			uid_warn = arcn->sb.st_uid;
1277 			paxwarn(1,
1278 			    "Ustar header field is too small for uid %lu, "
1279 			    "using nobody", (u_long)arcn->sb.st_uid);
1280 		}
1281 		if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3))
1282 			goto out;
1283 	}
1284 	if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) {
1285 		if (gid_nobody == 0) {
1286 			if (gid_from_group("nobody", &gid_nobody) == -1)
1287 				goto out;
1288 		}
1289 		if (gid_warn != arcn->sb.st_gid) {
1290 			gid_warn = arcn->sb.st_gid;
1291 			paxwarn(1,
1292 			    "Ustar header field is too small for gid %lu, "
1293 			    "using nobody", (u_long)arcn->sb.st_gid);
1294 		}
1295 		if (ul_oct(gid_nobody, hd->gid, sizeof(hd->gid), 3))
1296 			goto out;
1297 	}
1298 	bad_mtime = ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime,
1299 	    hd->mtime, sizeof(hd->mtime), 3);
1300 	if (bad_mtime && ustar)
1301 		goto out;
1302 #ifndef SMALL
1303 	if (!ustar) {
1304 		/*
1305 		 * The pax format can preserve atime and store
1306 		 * a possibly more accurate mtime.
1307 		 *
1308 		 * ctime isn't specified by POSIX so omit it.
1309 		 */
1310 		if (xheader_add_ts(&xhdr, "atime", &arcn->sb.st_atim) == -1) {
1311 			paxwarn(1, "Couldn't preserve %s in pax format for %s",
1312 			    "atime", arcn->org_name);
1313 			xheader_free(&xhdr);
1314 			return (1);
1315 		}
1316 		if ((bad_mtime || arcn->sb.st_mtime < 0 ||
1317 			arcn->sb.st_mtim.tv_nsec != 0) &&
1318 		    xheader_add_ts(&xhdr, "mtime", &arcn->sb.st_mtim) == -1) {
1319 			paxwarn(1, "Couldn't preserve %s in pax format for %s",
1320 			    "mtime", arcn->org_name);
1321 			xheader_free(&xhdr);
1322 			return (1);
1323 		}
1324 	}
1325 #endif
1326 	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3))
1327 		goto out;
1328 	if (!Nflag) {
1329 		if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL)
1330 			strncpy(hd->uname, name, sizeof(hd->uname));
1331 		if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL)
1332 			strncpy(hd->gname, name, sizeof(hd->gname));
1333 	}
1334 
1335 #ifndef SMALL
1336 	/* write out a pax extended header if needed */
1337 	if (!SLIST_EMPTY(&xhdr)) {
1338 		int ret;
1339 
1340 		ret = wr_xheader(arcn, &xhdr);
1341 		xheader_free(&xhdr);
1342 		if (ret == -1)
1343 			return(-1);
1344 	}
1345 #endif
1346 
1347 	/*
1348 	 * calculate and store the checksum write the header to the archive
1349 	 * return 0 tells the caller to now write the file data, 1 says no data
1350 	 * needs to be written
1351 	 */
1352 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1353 	   sizeof(hd->chksum), 3))
1354 		goto out;
1355 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1356 		return(-1);
1357 	if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0)
1358 		return(-1);
1359 	if (PAX_IS_REG(arcn->type))
1360 		return(0);
1361 	return(1);
1362 
1363     out:
1364 #ifndef SMALL
1365 	xheader_free(&xhdr);
1366 #endif
1367 	/*
1368 	 * header field is out of range
1369 	 */
1370 	paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1371 	return(1);
1372 }
1373 
1374 /*
1375  * ustar_wr()
1376  *	Write out a ustar format archive.
1377  *	Have to check for file types that cannot be stored and file names that
1378  *	are too long. Be careful of the term (last arg) to ul_oct, we only use
1379  *	'\0' for the termination character (this is different than picky tar).
1380  *	ASSUMED: space after header in header block is zero filled
1381  * Return:
1382  *	0 if file has data to be written after the header, 1 if file has NO
1383  *	data to write after the header, -1 if archive write failed
1384  */
1385 int
ustar_wr(ARCHD * arcn)1386 ustar_wr(ARCHD *arcn)
1387 {
1388 	return wr_ustar_or_pax(arcn, 1);
1389 }
1390 
1391 /*
1392  * pax_id()
1393  *	determine if a block given to us is a valid pax header.
1394  * Return:
1395  *	0 if a pax header, -1 otherwise
1396  */
1397 #ifndef SMALL
1398 int
pax_id(char * blk,int size)1399 pax_id(char *blk, int size)
1400 {
1401 	HD_USTAR *hd;
1402 
1403 	if (size < BLKMULT)
1404 		return(-1);
1405 	hd = (HD_USTAR *)blk;
1406 
1407 	/*
1408 	 * check for block of zero's first, a simple and fast test then check
1409 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
1410 	 * programs are fouled up and create archives missing the \0. Last we
1411 	 * check the checksum and the type flag. If ok we have to assume it is
1412 	 * a valid pax header.
1413 	 */
1414 	if (hd->prefix[0] == '\0' && hd->name[0] == '\0')
1415 		return(-1);
1416 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
1417 		return(-1);
1418 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
1419 		return(-1);
1420 	/*
1421 	 * It is valid for a pax formatted archive not to start with
1422 	 * a global header nor with an extended header. In that case
1423 	 * we'll fall back to ustar in append mode.
1424 	 */
1425 	if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE)
1426 		return(0);
1427 	return (-1);
1428 }
1429 #endif
1430 
1431 /*
1432  * pax_wr()
1433  *	Write out a pax format archive.
1434  *	Have to check for file types that cannot be stored.  Be careful of the
1435  *      term (last arg) to ul_oct, we only use '\0' for the termination
1436  *      character (this is different than picky tar).
1437  *	ASSUMED: space after header in header block is zero filled
1438  * Return:
1439  *	0 if file has data to be written after the header, 1 if file has NO
1440  *	data to write after the header, -1 if archive write failed
1441  */
1442 #ifndef SMALL
1443 int
pax_wr(ARCHD * arcn)1444 pax_wr(ARCHD *arcn)
1445 {
1446 	return wr_ustar_or_pax(arcn, 0);
1447 }
1448 #endif
1449 
1450 /*
1451  * pax_opt()
1452  *	handle pax format specific -o options
1453  * Return:
1454  *	0 if ok -1 otherwise
1455  */
1456 #ifndef SMALL
1457 int
pax_opt(void)1458 pax_opt(void)
1459 {
1460 	OPLIST *opt;
1461 
1462 	while ((opt = opt_next()) != NULL) {
1463 		if (1) {
1464 			paxwarn(1, "Unknown pax format -o option/value pair %s=%s",
1465 			    opt->name, opt->value);
1466 			return(-1);
1467 		}
1468 	}
1469 	return 0;
1470 }
1471 #endif
1472 
1473 /*
1474  * name_split()
1475  *	see if the name has to be split for storage in a ustar header. We try
1476  *	to fit the entire name in the name field without splitting if we can.
1477  *	The split point is always at a /
1478  * Return
1479  *	character pointer to split point (always the / that is to be removed
1480  *	if the split is not needed, the points is set to the start of the file
1481  *	name (it would violate the spec to split there). A NULL is returned if
1482  *	the file name is too long
1483  */
1484 
1485 static char *
name_split(char * name,int len)1486 name_split(char *name, int len)
1487 {
1488 	char *start;
1489 
1490 	/*
1491 	 * check to see if the file name is small enough to fit in the name
1492 	 * field. if so just return a pointer to the name.
1493 	 * The strings can fill the complete name and prefix fields
1494 	 * without a NUL terminator.
1495 	 */
1496 	if (len <= TNMSZ)
1497 		return(name);
1498 	if (len > (TPFSZ + TNMSZ + 1))
1499 		return(NULL);
1500 
1501 	/*
1502 	 * we start looking at the biggest sized piece that fits in the name
1503 	 * field. We walk forward looking for a slash to split at. The idea is
1504 	 * to find the biggest piece to fit in the name field (or the smallest
1505 	 * prefix we can find) (the -1 is correct the biggest piece would
1506 	 * include the slash between the two parts that gets thrown away)
1507 	 */
1508 	start = name + len - TNMSZ - 1;
1509 
1510 	/*
1511 	 * the prefix may not be empty, so skip the first character when
1512 	 * trying to split a path of exactly TNMSZ+1 characters.
1513 	 * NOTE: This means the ustar format can't store /str if
1514 	 * str contains no slashes and the length of str == TNMSZ
1515 	 */
1516 	if (start == name)
1517 		++start;
1518 
1519 	while ((*start != '\0') && (*start != '/'))
1520 		++start;
1521 
1522 	/*
1523 	 * if we hit the end of the string, this name cannot be split, so we
1524 	 * cannot store this file.
1525 	 */
1526 	if (*start == '\0')
1527 		return(NULL);
1528 
1529 	/*
1530 	 * the split point isn't valid if it results in a prefix
1531 	 * longer than TPFSZ
1532 	 */
1533 	if ((start - name) > TPFSZ)
1534 		return(NULL);
1535 
1536 	/*
1537 	 * ok have a split point, return it to the caller
1538 	 */
1539 	return(start);
1540 }
1541 
1542 static size_t
expandname(char * buf,size_t len,char ** gnu_name,const char * name,size_t limit)1543 expandname(char *buf, size_t len, char **gnu_name, const char *name,
1544     size_t limit)
1545 {
1546 	size_t nlen;
1547 
1548 	if (*gnu_name) {
1549 		/* *gnu_name is NUL terminated */
1550 		if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
1551 			nlen = len - 1;
1552 		free(*gnu_name);
1553 		*gnu_name = NULL;
1554 	} else
1555 		nlen = fieldcpy(buf, len, name, limit);
1556 	return(nlen);
1557 }
1558 
1559 static int
rd_time(struct timespec * ts,const char * keyword,char * p)1560 rd_time(struct timespec *ts, const char *keyword, char *p)
1561 {
1562 	const char *errstr;
1563 	char *q;
1564 	int multiplier;
1565 
1566 	if ((q = strchr(p, '.')) != NULL)
1567 		*q = '\0';
1568 
1569 	ts->tv_sec = strtonum(p, 0, MAX_TIME_T, &errstr);
1570 	if (errstr != NULL) {
1571 		paxwarn(1, "%s is %s: %s", keyword, errstr, p);
1572 		return -1;
1573 	}
1574 
1575 	ts->tv_nsec = 0;
1576 
1577 	if (q == NULL)
1578 		return 0;
1579 
1580 	multiplier = 100000000;
1581 	for (q++; *q != '\0'; q++) {
1582 		if (!isdigit((unsigned char)*q)) {
1583 			paxwarn(1, "%s contains non-digit", keyword);
1584 			return -1;
1585 		}
1586 		ts->tv_nsec += (*q - '0') * multiplier;
1587 		multiplier /= 10;
1588 	}
1589 
1590 	return 0;
1591 }
1592 
1593 static int
rd_size(off_t * size,const char * keyword,char * p)1594 rd_size(off_t *size, const char *keyword, char *p)
1595 {
1596 	const char *errstr;
1597 
1598 	/* Assume off_t is a long long. */
1599 	*size = strtonum(p, 0, LLONG_MAX, &errstr);
1600 	if (errstr != NULL) {
1601 		paxwarn(1, "%s is %s: %s", keyword, errstr, p);
1602 		return -1;
1603 	}
1604 
1605 	return 0;
1606 }
1607 
1608 static int
rd_xheader(ARCHD * arcn,int global,off_t size)1609 rd_xheader(ARCHD *arcn, int global, off_t size)
1610 {
1611 	/*
1612 	 * The pax format supposedly supports arbitrarily sized extended
1613 	 * record headers, this implementation doesn't.
1614 	 */
1615 	char buf[sizeof("30xx linkpath=") - 1 + PAXPATHLEN + sizeof("\n")];
1616 	long len;
1617 	char *delim, *keyword;
1618 	char *nextp, *p, *end;
1619 	int pad, ret = 0;
1620 
1621 	/* before we alter size, make note of how much we have to skip */
1622 	pad = TAR_PAD((unsigned)size);
1623 
1624 	p = end = buf;
1625 	while (size > 0 || p < end) {
1626 		if (size > 0) {
1627 			int rdlen;
1628 
1629 			/* shift stuff down */
1630 			if (p > buf) {
1631 				memmove(buf, p, end - p);
1632 				end -= p - buf;
1633 				p = buf;
1634 			}
1635 
1636 			/* fill starting at end */
1637 			rdlen = MINIMUM(size, (buf + sizeof buf) - end);
1638 			if (rd_wrbuf(end, rdlen) != rdlen) {
1639 				ret = -1;
1640 				break;
1641 			}
1642 			size -= rdlen;
1643 			end += rdlen;
1644 		}
1645 
1646 		/* [p, end) is good */
1647 		if (memchr(p, ' ', end - p) == NULL ||
1648 		    !isdigit((unsigned char)*p)) {
1649 			paxwarn(1, "Invalid extended header record");
1650 			ret = -1;
1651 			break;
1652 		}
1653 		errno = 0;
1654 		len = strtol(p, &delim, 10);
1655 		if (*delim != ' ' || (errno == ERANGE && len == LONG_MAX) ||
1656 		    len < MINXHDRSZ) {
1657 			paxwarn(1, "Invalid extended header record length");
1658 			ret = -1;
1659 			break;
1660 		}
1661 		if (len > end - p) {
1662 			paxwarn(1, "Extended header record length %lu is "
1663 			    "out of range", len);
1664 			/* if we can just toss this record, do so */
1665 			len -= end - p;
1666 			if (len <= size && rd_skip(len) == 0) {
1667 				size -= len;
1668 				p = end = buf;
1669 				continue;
1670 			}
1671 			ret = -1;
1672 			break;
1673 		}
1674 		nextp = p + len;
1675 		keyword = p = delim + 1;
1676 		p = memchr(p, '=', len);
1677 		if (!p || nextp[-1] != '\n') {
1678 			paxwarn(1, "Malformed extended header record");
1679 			ret = -1;
1680 			break;
1681 		}
1682 		*p++ = nextp[-1] = '\0';
1683 		if (!global) {
1684 			if (!strcmp(keyword, "path")) {
1685 				arcn->nlen = strlcpy(arcn->name, p,
1686 				    sizeof(arcn->name));
1687 			} else if (!strcmp(keyword, "linkpath")) {
1688 				arcn->ln_nlen = strlcpy(arcn->ln_name, p,
1689 				    sizeof(arcn->ln_name));
1690 			} else if (!strcmp(keyword, "mtime")) {
1691 				ret = rd_time(&arcn->sb.st_mtim, keyword, p);
1692 				if (ret < 0)
1693 					break;
1694 			} else if (!strcmp(keyword, "atime")) {
1695 				ret = rd_time(&arcn->sb.st_atim, keyword, p);
1696 				if (ret < 0)
1697 					break;
1698 			} else if (!strcmp(keyword, "ctime")) {
1699 				ret = rd_time(&arcn->sb.st_ctim, keyword, p);
1700 				if (ret < 0)
1701 					break;
1702 			} else if (!strcmp(keyword, "size")) {
1703 				ret = rd_size(&arcn->sb.st_size, keyword, p);
1704 				if (ret < 0)
1705 					break;
1706 			}
1707 		}
1708 		p = nextp;
1709 	}
1710 
1711 	if (rd_skip(size + pad) < 0)
1712 		return (-1);
1713 	return (ret);
1714 }
1715