1 /* $OpenBSD: tar.c,v 1.73 2023/09/04 17:05:34 jca Exp $ */ 2 /* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1992 Keith Muller. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Keith Muller of the University of California, San Diego. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 #include <ctype.h> 40 #include <errno.h> 41 #include <grp.h> 42 #include <limits.h> 43 #include <pwd.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "pax.h" 50 #include "extern.h" 51 #include "tar.h" 52 53 /* 54 * Routines for reading, writing and header identify of various versions of tar 55 */ 56 57 static size_t expandname(char *, size_t, char **, const char *, size_t); 58 static u_long tar_chksm(char *, int); 59 static char *name_split(char *, int); 60 static int ul_oct(u_long, char *, int, int); 61 static int ull_oct(unsigned long long, char *, int, int); 62 static int rd_xheader(ARCHD *arcn, int, off_t); 63 64 static uid_t uid_nobody; 65 static uid_t uid_warn; 66 static gid_t gid_nobody; 67 static gid_t gid_warn; 68 69 /* 70 * Routines common to all versions of tar 71 */ 72 73 int tar_nodir; /* do not write dirs under old tar */ 74 char *gnu_name_string; /* GNU ././@LongLink hackery name */ 75 char *gnu_link_string; /* GNU ././@LongLink hackery link */ 76 77 /* 78 * tar_endwr() 79 * add the tar trailer of two null blocks 80 * Return: 81 * 0 if ok, -1 otherwise (what wr_skip returns) 82 */ 83 84 int 85 tar_endwr(void) 86 { 87 return wr_skip(NULLCNT * BLKMULT); 88 } 89 90 /* 91 * tar_endrd() 92 * no cleanup needed here, just return size of trailer (for append) 93 * Return: 94 * size of trailer (2 * BLKMULT) 95 */ 96 97 off_t 98 tar_endrd(void) 99 { 100 return NULLCNT * BLKMULT; 101 } 102 103 /* 104 * tar_trail() 105 * Called to determine if a header block is a valid trailer. We are passed 106 * the block, the in_sync flag (which tells us we are in resync mode; 107 * looking for a valid header), and cnt (which starts at zero) which is 108 * used to count the number of empty blocks we have seen so far. 109 * Return: 110 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 111 * could never contain a header. 112 */ 113 114 int 115 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt) 116 { 117 int i; 118 119 /* 120 * look for all zero, trailer is two consecutive blocks of zero 121 */ 122 for (i = 0; i < BLKMULT; ++i) { 123 if (buf[i] != '\0') 124 break; 125 } 126 127 /* 128 * if not all zero it is not a trailer, but MIGHT be a header. 129 */ 130 if (i != BLKMULT) 131 return(-1); 132 133 /* 134 * When given a zero block, we must be careful! 135 * If we are not in resync mode, check for the trailer. Have to watch 136 * out that we do not mis-identify file data as the trailer, so we do 137 * NOT try to id a trailer during resync mode. During resync mode we 138 * might as well throw this block out since a valid header can NEVER be 139 * a block of all 0 (we must have a valid file name). 140 */ 141 if (!in_resync && (++*cnt >= NULLCNT)) 142 return(0); 143 return(1); 144 } 145 146 /* 147 * ul_oct() 148 * convert an unsigned long to an octal string. many oddball field 149 * termination characters are used by the various versions of tar in the 150 * different fields. term selects which kind to use. str is '0' padded 151 * at the front to len. we are unable to use only one format as many old 152 * tar readers are very cranky about this. 153 * Return: 154 * 0 if the number fit into the string, -1 otherwise 155 */ 156 157 static int 158 ul_oct(u_long val, char *str, int len, int term) 159 { 160 char *pt; 161 162 /* 163 * term selects the appropriate character(s) for the end of the string 164 */ 165 pt = str + len - 1; 166 switch (term) { 167 case 3: 168 *pt-- = '\0'; 169 break; 170 case 2: 171 *pt-- = ' '; 172 *pt-- = '\0'; 173 break; 174 case 1: 175 *pt-- = ' '; 176 break; 177 case 0: 178 default: 179 *pt-- = '\0'; 180 *pt-- = ' '; 181 break; 182 } 183 184 /* 185 * convert and blank pad if there is space 186 */ 187 while (pt >= str) { 188 *pt-- = '0' + (char)(val & 0x7); 189 val >>= 3; 190 if (val == 0) 191 break; 192 } 193 194 while (pt >= str) 195 *pt-- = '0'; 196 if (val != 0) 197 return(-1); 198 return(0); 199 } 200 201 /* 202 * ull_oct() 203 * Convert an unsigned long long to an octal string. One of many oddball 204 * field termination characters are used by the various versions of tar 205 * in the different fields. term selects which kind to use. str is 206 * '0' padded at the front to len. We are unable to use only one format 207 * as many old tar readers are very cranky about this. 208 * Return: 209 * 0 if the number fit into the string, -1 otherwise 210 */ 211 212 static int 213 ull_oct(unsigned long long val, char *str, int len, int term) 214 { 215 char *pt; 216 217 /* 218 * term selects the appropriate character(s) for the end of the string 219 */ 220 pt = str + len - 1; 221 switch (term) { 222 case 3: 223 *pt-- = '\0'; 224 break; 225 case 2: 226 *pt-- = ' '; 227 *pt-- = '\0'; 228 break; 229 case 1: 230 *pt-- = ' '; 231 break; 232 case 0: 233 default: 234 *pt-- = '\0'; 235 *pt-- = ' '; 236 break; 237 } 238 239 /* 240 * convert and blank pad if there is space 241 */ 242 while (pt >= str) { 243 *pt-- = '0' + (char)(val & 0x7); 244 val >>= 3; 245 if (val == 0) 246 break; 247 } 248 249 while (pt >= str) 250 *pt-- = '0'; 251 if (val != 0) 252 return(-1); 253 return(0); 254 } 255 256 /* 257 * tar_chksm() 258 * calculate the checksum for a tar block counting the checksum field as 259 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 260 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 261 * pad headers with 0. 262 * Return: 263 * unsigned long checksum 264 */ 265 266 static u_long 267 tar_chksm(char *blk, int len) 268 { 269 char *stop; 270 char *pt; 271 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 272 273 /* 274 * add the part of the block before the checksum field 275 */ 276 pt = blk; 277 stop = blk + CHK_OFFSET; 278 while (pt < stop) 279 chksm += (u_long)(*pt++ & 0xff); 280 /* 281 * move past the checksum field and keep going, spec counts the 282 * checksum field as the sum of 8 blanks (which is pre-computed as 283 * BLNKSUM). 284 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 285 * starts, no point in summing zero's) 286 */ 287 pt += CHK_LEN; 288 stop = blk + len; 289 while (pt < stop) 290 chksm += (u_long)(*pt++ & 0xff); 291 return(chksm); 292 } 293 294 /* 295 * Routines for old BSD style tar (also made portable to sysV tar) 296 */ 297 298 /* 299 * tar_id() 300 * determine if a block given to us is a valid tar header (and not a USTAR 301 * header). We have to be on the lookout for those pesky blocks of all 302 * zero's. 303 * Return: 304 * 0 if a tar header, -1 otherwise 305 */ 306 307 int 308 tar_id(char *blk, int size) 309 { 310 HD_TAR *hd; 311 HD_USTAR *uhd; 312 313 if (size < BLKMULT) 314 return(-1); 315 hd = (HD_TAR *)blk; 316 uhd = (HD_USTAR *)blk; 317 318 /* 319 * check for block of zero's first, a simple and fast test, then make 320 * sure this is not a ustar header by looking for the ustar magic 321 * cookie. We should use TMAGLEN, but some USTAR archive programs are 322 * wrong and create archives missing the \0. Last we check the 323 * checksum. If this is ok we have to assume it is a valid header. 324 */ 325 if (hd->name[0] == '\0') 326 return(-1); 327 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 328 return(-1); 329 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 330 return(-1); 331 force_one_volume = 1; 332 return(0); 333 } 334 335 /* 336 * tar_opt() 337 * handle tar format specific -o options 338 * Return: 339 * 0 if ok -1 otherwise 340 */ 341 342 int 343 tar_opt(void) 344 { 345 OPLIST *opt; 346 347 while ((opt = opt_next()) != NULL) { 348 if (strcmp(opt->name, TAR_OPTION) || 349 strcmp(opt->value, TAR_NODIR)) { 350 paxwarn(1, "Unknown tar format -o option/value pair %s=%s", 351 opt->name, opt->value); 352 paxwarn(1,"%s=%s is the only supported tar format option", 353 TAR_OPTION, TAR_NODIR); 354 return(-1); 355 } 356 357 /* 358 * we only support one option, and only when writing 359 */ 360 if ((act != APPND) && (act != ARCHIVE)) { 361 paxwarn(1, "%s=%s is only supported when writing.", 362 opt->name, opt->value); 363 return(-1); 364 } 365 tar_nodir = 1; 366 } 367 return(0); 368 } 369 370 371 /* 372 * tar_rd() 373 * extract the values out of block already determined to be a tar header. 374 * store the values in the ARCHD parameter. 375 * Return: 376 * 0 377 */ 378 379 int 380 tar_rd(ARCHD *arcn, char *buf) 381 { 382 HD_TAR *hd; 383 unsigned long long val; 384 char *pt; 385 386 /* 387 * we only get proper sized buffers passed to us 388 */ 389 if (tar_id(buf, BLKMULT) < 0) 390 return(-1); 391 memset(arcn, 0, sizeof(*arcn)); 392 arcn->org_name = arcn->name; 393 arcn->sb.st_nlink = 1; 394 395 /* 396 * copy out the name and values in the stat buffer 397 */ 398 hd = (HD_TAR *)buf; 399 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 400 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 401 &gnu_name_string, hd->name, sizeof(hd->name)); 402 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 403 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 404 } 405 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 406 0xfff); 407 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 408 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 409 arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); 410 val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); 411 if (val > MAX_TIME_T) 412 arcn->sb.st_mtime = MAX_TIME_T; 413 else 414 arcn->sb.st_mtime = val; 415 arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; 416 417 /* 418 * have to look at the last character, it may be a '/' and that is used 419 * to encode this as a directory 420 */ 421 pt = &(arcn->name[arcn->nlen - 1]); 422 arcn->pad = 0; 423 arcn->skip = 0; 424 switch (hd->linkflag) { 425 case SYMTYPE: 426 /* 427 * symbolic link, need to get the link name and set the type in 428 * the st_mode so -v printing will look correct. 429 */ 430 arcn->type = PAX_SLK; 431 arcn->sb.st_mode |= S_IFLNK; 432 break; 433 case LNKTYPE: 434 /* 435 * hard link, need to get the link name, set the type in the 436 * st_mode and st_nlink so -v printing will look better. 437 */ 438 arcn->type = PAX_HLK; 439 arcn->sb.st_nlink = 2; 440 441 /* 442 * no idea of what type this thing really points at, but 443 * we set something for printing only. 444 */ 445 arcn->sb.st_mode |= S_IFREG; 446 break; 447 case LONGLINKTYPE: 448 case LONGNAMETYPE: 449 /* 450 * GNU long link/file; we tag these here and let the 451 * pax internals deal with it -- too ugly otherwise. 452 */ 453 arcn->type = 454 hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; 455 arcn->pad = TAR_PAD(arcn->sb.st_size); 456 arcn->skip = arcn->sb.st_size; 457 break; 458 case DIRTYPE: 459 /* 460 * It is a directory, set the mode for -v printing 461 */ 462 arcn->type = PAX_DIR; 463 arcn->sb.st_mode |= S_IFDIR; 464 arcn->sb.st_nlink = 2; 465 break; 466 case AREGTYPE: 467 case REGTYPE: 468 default: 469 /* 470 * If we have a trailing / this is a directory and NOT a file. 471 */ 472 arcn->ln_name[0] = '\0'; 473 arcn->ln_nlen = 0; 474 if (*pt == '/') { 475 /* 476 * it is a directory, set the mode for -v printing 477 */ 478 arcn->type = PAX_DIR; 479 arcn->sb.st_mode |= S_IFDIR; 480 arcn->sb.st_nlink = 2; 481 } else { 482 /* 483 * have a file that will be followed by data. Set the 484 * skip value to the size field and calculate the size 485 * of the padding. 486 */ 487 arcn->type = PAX_REG; 488 arcn->sb.st_mode |= S_IFREG; 489 arcn->pad = TAR_PAD(arcn->sb.st_size); 490 arcn->skip = arcn->sb.st_size; 491 } 492 break; 493 } 494 495 /* 496 * strip off any trailing slash. 497 */ 498 if (*pt == '/') { 499 *pt = '\0'; 500 --arcn->nlen; 501 } 502 return(0); 503 } 504 505 /* 506 * tar_wr() 507 * write a tar header for the file specified in the ARCHD to the archive. 508 * Have to check for file types that cannot be stored and file names that 509 * are too long. Be careful of the term (last arg) to ul_oct, each field 510 * of tar has it own spec for the termination character(s). 511 * ASSUMED: space after header in header block is zero filled 512 * Return: 513 * 0 if file has data to be written after the header, 1 if file has NO 514 * data to write after the header, -1 if archive write failed 515 */ 516 517 int 518 tar_wr(ARCHD *arcn) 519 { 520 HD_TAR *hd; 521 int len; 522 char hdblk[sizeof(HD_TAR)]; 523 524 /* 525 * check for those file system types which tar cannot store 526 */ 527 switch (arcn->type) { 528 case PAX_DIR: 529 /* 530 * user asked that dirs not be written to the archive 531 */ 532 if (tar_nodir) 533 return(1); 534 break; 535 case PAX_CHR: 536 paxwarn(1, "Tar cannot archive a character device %s", 537 arcn->org_name); 538 return(1); 539 case PAX_BLK: 540 paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name); 541 return(1); 542 case PAX_SCK: 543 paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name); 544 return(1); 545 case PAX_FIF: 546 paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name); 547 return(1); 548 case PAX_SLK: 549 case PAX_HLK: 550 case PAX_HRG: 551 if ((size_t)arcn->ln_nlen > sizeof(hd->linkname)) { 552 paxwarn(1, "Link name too long for tar %s", 553 arcn->ln_name); 554 return(1); 555 } 556 break; 557 case PAX_REG: 558 case PAX_CTG: 559 default: 560 break; 561 } 562 563 /* 564 * check file name len, remember extra char for dirs (the / at the end) 565 */ 566 len = arcn->nlen; 567 if (arcn->type == PAX_DIR) 568 ++len; 569 if ((size_t)len > sizeof(hd->name)) { 570 paxwarn(1, "File name too long for tar %s", arcn->name); 571 return(1); 572 } 573 574 /* 575 * Copy the data out of the ARCHD into the tar header based on the type 576 * of the file. Remember, many tar readers want all fields to be 577 * padded with zero so we zero the header first. We then set the 578 * linkflag field (type), the linkname, the size, and set the padding 579 * (if any) to be added after the file data (0 for all other types, 580 * as they only have a header). 581 */ 582 memset(hdblk, 0, sizeof(hdblk)); 583 hd = (HD_TAR *)hdblk; 584 fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name)); 585 arcn->pad = 0; 586 587 if (arcn->type == PAX_DIR) { 588 /* 589 * directories are the same as files, except have a filename 590 * that ends with a /, we add the slash here. No data follows 591 * dirs, so no pad. 592 */ 593 hd->linkflag = AREGTYPE; 594 hd->name[len-1] = '/'; 595 if (ul_oct(0, hd->size, sizeof(hd->size), 1)) 596 goto out; 597 } else if (arcn->type == PAX_SLK) { 598 /* 599 * no data follows this file, so no pad 600 */ 601 hd->linkflag = SYMTYPE; 602 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 603 sizeof(arcn->ln_name)); 604 if (ul_oct(0, hd->size, sizeof(hd->size), 1)) 605 goto out; 606 } else if (PAX_IS_HARDLINK(arcn->type)) { 607 /* 608 * no data follows this file, so no pad 609 */ 610 hd->linkflag = LNKTYPE; 611 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 612 sizeof(arcn->ln_name)); 613 if (ul_oct(0, hd->size, sizeof(hd->size), 1)) 614 goto out; 615 } else { 616 /* 617 * data follows this file, so set the pad 618 */ 619 hd->linkflag = AREGTYPE; 620 if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 621 paxwarn(1, "File is too large for tar %s", 622 arcn->org_name); 623 return(1); 624 } 625 arcn->pad = TAR_PAD(arcn->sb.st_size); 626 } 627 628 /* 629 * copy those fields that are independent of the type 630 */ 631 if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 632 ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, 633 sizeof(hd->mtime), 1) || 634 ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 635 ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0)) 636 goto out; 637 638 /* 639 * calculate and add the checksum, then write the header. A return of 640 * 0 tells the caller to now write the file data, 1 says no data needs 641 * to be written 642 */ 643 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 644 sizeof(hd->chksum), 3)) 645 goto out; 646 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 647 return(-1); 648 if (wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) 649 return(-1); 650 if (PAX_IS_REG(arcn->type)) 651 return(0); 652 return(1); 653 654 out: 655 /* 656 * header field is out of range 657 */ 658 paxwarn(1, "Tar header field is too small for %s", arcn->org_name); 659 return(1); 660 } 661 662 /* 663 * Routines for POSIX ustar 664 */ 665 666 /* 667 * ustar_id() 668 * determine if a block given to us is a valid ustar header. We have to 669 * be on the lookout for those pesky blocks of all zero's 670 * Return: 671 * 0 if a ustar header, -1 otherwise 672 */ 673 674 int 675 ustar_id(char *blk, int size) 676 { 677 HD_USTAR *hd; 678 679 if (size < BLKMULT) 680 return(-1); 681 hd = (HD_USTAR *)blk; 682 683 /* 684 * check for block of zero's first, a simple and fast test then check 685 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 686 * programs are fouled up and create archives missing the \0. Last we 687 * check the checksum. If ok we have to assume it is a valid header. 688 */ 689 if (hd->prefix[0] == '\0' && hd->name[0] == '\0') 690 return(-1); 691 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 692 return(-1); 693 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 694 return(-1); 695 return(0); 696 } 697 698 /* 699 * ustar_rd() 700 * extract the values out of block already determined to be a ustar header. 701 * store the values in the ARCHD parameter. 702 * Return: 703 * 0 704 */ 705 706 int 707 ustar_rd(ARCHD *arcn, char *buf) 708 { 709 HD_USTAR *hd = (HD_USTAR *)buf; 710 char *dest; 711 int cnt = 0; 712 dev_t devmajor; 713 dev_t devminor; 714 unsigned long long val; 715 716 /* 717 * we only get proper sized buffers 718 */ 719 if (ustar_id(buf, BLKMULT) < 0) 720 return(-1); 721 722 reset: 723 memset(arcn, 0, sizeof(*arcn)); 724 arcn->org_name = arcn->name; 725 arcn->sb.st_nlink = 1; 726 727 /* Process Extended headers. */ 728 if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) { 729 if (rd_xheader(arcn, hd->typeflag == GHDRTYPE, 730 (off_t)asc_ull(hd->size, sizeof(hd->size), OCT)) < 0) 731 return (-1); 732 733 /* Update and check the ustar header. */ 734 if (rd_wrbuf(buf, BLKMULT) != BLKMULT) 735 return (-1); 736 if (ustar_id(buf, BLKMULT) < 0) 737 return(-1); 738 739 /* if the next block is another extension, reset the values */ 740 if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) 741 goto reset; 742 } 743 744 if (!arcn->nlen) { 745 /* 746 * See if the filename is split into two parts. if, so join 747 * the parts. We copy the prefix first and add a / between 748 * the prefix and name. 749 */ 750 dest = arcn->name; 751 if (*(hd->prefix) != '\0') { 752 cnt = fieldcpy(dest, sizeof(arcn->name) - 1, 753 hd->prefix, sizeof(hd->prefix)); 754 dest += cnt; 755 *dest++ = '/'; 756 cnt++; 757 } else 758 cnt = 0; 759 760 if (hd->typeflag != LONGLINKTYPE && 761 hd->typeflag != LONGNAMETYPE) { 762 arcn->nlen = cnt + expandname(dest, 763 sizeof(arcn->name) - cnt, &gnu_name_string, 764 hd->name, sizeof(hd->name)); 765 } 766 } 767 768 if (!arcn->ln_nlen && 769 hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 770 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 771 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 772 } 773 774 /* 775 * follow the spec to the letter. we should only have mode bits, strip 776 * off all other crud we may be passed. 777 */ 778 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 779 0xfff); 780 arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); 781 if (arcn->sb.st_mtime == 0) { 782 val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); 783 if (val > MAX_TIME_T) 784 arcn->sb.st_mtime = MAX_TIME_T; 785 else 786 arcn->sb.st_mtime = val; 787 } 788 if (arcn->sb.st_ctime == 0) { 789 arcn->sb.st_ctim = arcn->sb.st_mtim; 790 } 791 if (arcn->sb.st_atime == 0) { 792 arcn->sb.st_atim = arcn->sb.st_mtim; 793 } 794 795 /* 796 * If we can find the ascii names for gname and uname in the password 797 * and group files we will use the uid's and gid they bind. Otherwise 798 * we use the uid and gid values stored in the header. (This is what 799 * the posix spec wants). 800 */ 801 hd->gname[sizeof(hd->gname) - 1] = '\0'; 802 if (Nflag || gid_from_group(hd->gname, &(arcn->sb.st_gid)) == -1) 803 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 804 hd->uname[sizeof(hd->uname) - 1] = '\0'; 805 if (Nflag || uid_from_user(hd->uname, &(arcn->sb.st_uid)) == -1) 806 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 807 808 /* 809 * set the defaults, these may be changed depending on the file type 810 */ 811 arcn->pad = 0; 812 arcn->skip = 0; 813 arcn->sb.st_rdev = (dev_t)0; 814 815 /* 816 * set the mode and PAX type according to the typeflag in the header 817 */ 818 switch (hd->typeflag) { 819 case FIFOTYPE: 820 arcn->type = PAX_FIF; 821 arcn->sb.st_mode |= S_IFIFO; 822 break; 823 case DIRTYPE: 824 arcn->type = PAX_DIR; 825 arcn->sb.st_mode |= S_IFDIR; 826 arcn->sb.st_nlink = 2; 827 828 /* 829 * Some programs that create ustar archives append a '/' 830 * to the pathname for directories. This clearly violates 831 * ustar specs, but we will silently strip it off anyway. 832 */ 833 if (arcn->name[arcn->nlen - 1] == '/') 834 arcn->name[--arcn->nlen] = '\0'; 835 break; 836 case BLKTYPE: 837 case CHRTYPE: 838 /* 839 * this type requires the rdev field to be set. 840 */ 841 if (hd->typeflag == BLKTYPE) { 842 arcn->type = PAX_BLK; 843 arcn->sb.st_mode |= S_IFBLK; 844 } else { 845 arcn->type = PAX_CHR; 846 arcn->sb.st_mode |= S_IFCHR; 847 } 848 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 849 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 850 arcn->sb.st_rdev = TODEV(devmajor, devminor); 851 break; 852 case SYMTYPE: 853 case LNKTYPE: 854 if (hd->typeflag == SYMTYPE) { 855 arcn->type = PAX_SLK; 856 arcn->sb.st_mode |= S_IFLNK; 857 } else { 858 arcn->type = PAX_HLK; 859 /* 860 * so printing looks better 861 */ 862 arcn->sb.st_mode |= S_IFREG; 863 arcn->sb.st_nlink = 2; 864 } 865 break; 866 case LONGLINKTYPE: 867 case LONGNAMETYPE: 868 /* 869 * GNU long link/file; we tag these here and let the 870 * pax internals deal with it -- too ugly otherwise. 871 */ 872 arcn->type = 873 hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; 874 arcn->pad = TAR_PAD(arcn->sb.st_size); 875 arcn->skip = arcn->sb.st_size; 876 break; 877 case CONTTYPE: 878 case AREGTYPE: 879 case REGTYPE: 880 default: 881 /* 882 * these types have file data that follows. Set the skip and 883 * pad fields. 884 */ 885 arcn->type = PAX_REG; 886 arcn->pad = TAR_PAD(arcn->sb.st_size); 887 arcn->skip = arcn->sb.st_size; 888 arcn->sb.st_mode |= S_IFREG; 889 break; 890 } 891 return(0); 892 } 893 894 /* 895 * ustar_wr() 896 * write a ustar header for the file specified in the ARCHD to the archive 897 * Have to check for file types that cannot be stored and file names that 898 * are too long. Be careful of the term (last arg) to ul_oct, we only use 899 * '\0' for the termination character (this is different than picky tar) 900 * ASSUMED: space after header in header block is zero filled 901 * Return: 902 * 0 if file has data to be written after the header, 1 if file has NO 903 * data to write after the header, -1 if archive write failed 904 */ 905 906 int 907 ustar_wr(ARCHD *arcn) 908 { 909 HD_USTAR *hd; 910 const char *name; 911 char *pt, hdblk[sizeof(HD_USTAR)]; 912 913 /* 914 * check for those file system types ustar cannot store 915 */ 916 if (arcn->type == PAX_SCK) { 917 paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name); 918 return(1); 919 } 920 921 /* 922 * user asked that dirs not be written to the archive 923 */ 924 if (arcn->type == PAX_DIR && tar_nodir) 925 return (1); 926 927 /* 928 * check the length of the linkname 929 */ 930 if (PAX_IS_LINK(arcn->type) && 931 ((size_t)arcn->ln_nlen > sizeof(hd->linkname))) { 932 paxwarn(1, "Link name too long for ustar %s", arcn->ln_name); 933 return(1); 934 } 935 936 /* 937 * split the path name into prefix and name fields (if needed). if 938 * pt != arcn->name, the name has to be split 939 */ 940 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 941 paxwarn(1, "File name too long for ustar %s", arcn->name); 942 return(1); 943 } 944 945 /* 946 * zero out the header so we don't have to worry about zero fill below 947 */ 948 memset(hdblk, 0, sizeof(hdblk)); 949 hd = (HD_USTAR *)hdblk; 950 arcn->pad = 0; 951 952 /* 953 * split the name, or zero out the prefix 954 */ 955 if (pt != arcn->name) { 956 /* 957 * name was split, pt points at the / where the split is to 958 * occur, we remove the / and copy the first part to the prefix 959 */ 960 *pt = '\0'; 961 fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name, 962 sizeof(arcn->name)); 963 *pt++ = '/'; 964 } 965 966 /* 967 * copy the name part. this may be the whole path or the part after 968 * the prefix 969 */ 970 fieldcpy(hd->name, sizeof(hd->name), pt, 971 sizeof(arcn->name) - (pt - arcn->name)); 972 973 /* 974 * set the fields in the header that are type dependent 975 */ 976 switch (arcn->type) { 977 case PAX_DIR: 978 hd->typeflag = DIRTYPE; 979 if (ul_oct(0, hd->size, sizeof(hd->size), 3)) 980 goto out; 981 break; 982 case PAX_CHR: 983 case PAX_BLK: 984 if (arcn->type == PAX_CHR) 985 hd->typeflag = CHRTYPE; 986 else 987 hd->typeflag = BLKTYPE; 988 if (ul_oct(MAJOR(arcn->sb.st_rdev), hd->devmajor, 989 sizeof(hd->devmajor), 3) || 990 ul_oct(MINOR(arcn->sb.st_rdev), hd->devminor, 991 sizeof(hd->devminor), 3) || 992 ul_oct(0, hd->size, sizeof(hd->size), 3)) 993 goto out; 994 break; 995 case PAX_FIF: 996 hd->typeflag = FIFOTYPE; 997 if (ul_oct(0, hd->size, sizeof(hd->size), 3)) 998 goto out; 999 break; 1000 case PAX_SLK: 1001 case PAX_HLK: 1002 case PAX_HRG: 1003 if (arcn->type == PAX_SLK) 1004 hd->typeflag = SYMTYPE; 1005 else 1006 hd->typeflag = LNKTYPE; 1007 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 1008 sizeof(arcn->ln_name)); 1009 if (ul_oct(0, hd->size, sizeof(hd->size), 3)) 1010 goto out; 1011 break; 1012 case PAX_REG: 1013 case PAX_CTG: 1014 default: 1015 /* 1016 * file data with this type, set the padding 1017 */ 1018 if (arcn->type == PAX_CTG) 1019 hd->typeflag = CONTTYPE; 1020 else 1021 hd->typeflag = REGTYPE; 1022 arcn->pad = TAR_PAD(arcn->sb.st_size); 1023 if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) { 1024 paxwarn(1, "File is too long for ustar %s", 1025 arcn->org_name); 1026 return(1); 1027 } 1028 break; 1029 } 1030 1031 strncpy(hd->magic, TMAGIC, TMAGLEN); 1032 strncpy(hd->version, TVERSION, TVERSLEN); 1033 1034 /* 1035 * set the remaining fields. Some versions want all 16 bits of mode 1036 * we better humor them (they really do not meet spec though).... 1037 */ 1038 if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) { 1039 if (uid_nobody == 0) { 1040 if (uid_from_user("nobody", &uid_nobody) == -1) 1041 goto out; 1042 } 1043 if (uid_warn != arcn->sb.st_uid) { 1044 uid_warn = arcn->sb.st_uid; 1045 paxwarn(1, 1046 "Ustar header field is too small for uid %lu, " 1047 "using nobody", (u_long)arcn->sb.st_uid); 1048 } 1049 if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3)) 1050 goto out; 1051 } 1052 if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) { 1053 if (gid_nobody == 0) { 1054 if (gid_from_group("nobody", &gid_nobody) == -1) 1055 goto out; 1056 } 1057 if (gid_warn != arcn->sb.st_gid) { 1058 gid_warn = arcn->sb.st_gid; 1059 paxwarn(1, 1060 "Ustar header field is too small for gid %lu, " 1061 "using nobody", (u_long)arcn->sb.st_gid); 1062 } 1063 if (ul_oct(gid_nobody, hd->gid, sizeof(hd->gid), 3)) 1064 goto out; 1065 } 1066 if (ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, 1067 sizeof(hd->mtime), 3) || 1068 ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) 1069 goto out; 1070 if (!Nflag) { 1071 if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL) 1072 strncpy(hd->uname, name, sizeof(hd->uname)); 1073 if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL) 1074 strncpy(hd->gname, name, sizeof(hd->gname)); 1075 } 1076 1077 /* 1078 * calculate and store the checksum write the header to the archive 1079 * return 0 tells the caller to now write the file data, 1 says no data 1080 * needs to be written 1081 */ 1082 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1083 sizeof(hd->chksum), 3)) 1084 goto out; 1085 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1086 return(-1); 1087 if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) 1088 return(-1); 1089 if (PAX_IS_REG(arcn->type)) 1090 return(0); 1091 return(1); 1092 1093 out: 1094 /* 1095 * header field is out of range 1096 */ 1097 paxwarn(1, "Ustar header field is too small for %s", arcn->org_name); 1098 return(1); 1099 } 1100 1101 /* 1102 * name_split() 1103 * see if the name has to be split for storage in a ustar header. We try 1104 * to fit the entire name in the name field without splitting if we can. 1105 * The split point is always at a / 1106 * Return 1107 * character pointer to split point (always the / that is to be removed 1108 * if the split is not needed, the points is set to the start of the file 1109 * name (it would violate the spec to split there). A NULL is returned if 1110 * the file name is too long 1111 */ 1112 1113 static char * 1114 name_split(char *name, int len) 1115 { 1116 char *start; 1117 1118 /* 1119 * check to see if the file name is small enough to fit in the name 1120 * field. if so just return a pointer to the name. 1121 * The strings can fill the complete name and prefix fields 1122 * without a NUL terminator. 1123 */ 1124 if (len <= TNMSZ) 1125 return(name); 1126 if (len > (TPFSZ + TNMSZ + 1)) 1127 return(NULL); 1128 1129 /* 1130 * we start looking at the biggest sized piece that fits in the name 1131 * field. We walk forward looking for a slash to split at. The idea is 1132 * to find the biggest piece to fit in the name field (or the smallest 1133 * prefix we can find) (the -1 is correct the biggest piece would 1134 * include the slash between the two parts that gets thrown away) 1135 */ 1136 start = name + len - TNMSZ - 1; 1137 1138 /* 1139 * the prefix may not be empty, so skip the first character when 1140 * trying to split a path of exactly TNMSZ+1 characters. 1141 * NOTE: This means the ustar format can't store /str if 1142 * str contains no slashes and the length of str == TNMSZ 1143 */ 1144 if (start == name) 1145 ++start; 1146 1147 while ((*start != '\0') && (*start != '/')) 1148 ++start; 1149 1150 /* 1151 * if we hit the end of the string, this name cannot be split, so we 1152 * cannot store this file. 1153 */ 1154 if (*start == '\0') 1155 return(NULL); 1156 1157 /* 1158 * the split point isn't valid if it results in a prefix 1159 * longer than TPFSZ 1160 */ 1161 if ((start - name) > TPFSZ) 1162 return(NULL); 1163 1164 /* 1165 * ok have a split point, return it to the caller 1166 */ 1167 return(start); 1168 } 1169 1170 static size_t 1171 expandname(char *buf, size_t len, char **gnu_name, const char *name, 1172 size_t limit) 1173 { 1174 size_t nlen; 1175 1176 if (*gnu_name) { 1177 /* *gnu_name is NUL terminated */ 1178 if ((nlen = strlcpy(buf, *gnu_name, len)) >= len) 1179 nlen = len - 1; 1180 free(*gnu_name); 1181 *gnu_name = NULL; 1182 } else 1183 nlen = fieldcpy(buf, len, name, limit); 1184 return(nlen); 1185 } 1186 1187 /* shortest possible extended record: "5 a=\n" */ 1188 #define MINXHDRSZ 5 1189 1190 /* longest record we'll accept */ 1191 #define MAXXHDRSZ BLKMULT 1192 1193 static int 1194 rd_time(struct timespec *ts, const char *keyword, char *p) 1195 { 1196 const char *errstr; 1197 char *q; 1198 int multiplier; 1199 1200 if ((q = strchr(p, '.')) != NULL) 1201 *q = '\0'; 1202 1203 ts->tv_sec = strtonum(p, 0, MAX_TIME_T, &errstr); 1204 if (errstr != NULL) { 1205 paxwarn(1, "%s is %s: %s", keyword, errstr, p); 1206 return -1; 1207 } 1208 1209 ts->tv_nsec = 0; 1210 1211 if (q == NULL) 1212 return 0; 1213 1214 multiplier = 100000000; 1215 for (q++; *q != '\0'; q++) { 1216 if (!isdigit((unsigned char)*q)) { 1217 paxwarn(1, "%s contains non-digit", keyword); 1218 return -1; 1219 } 1220 ts->tv_nsec += (*q - '0') * multiplier; 1221 multiplier /= 10; 1222 } 1223 1224 return 0; 1225 } 1226 1227 static int 1228 rd_xheader(ARCHD *arcn, int global, off_t size) 1229 { 1230 char buf[MAXXHDRSZ]; 1231 long len; 1232 char *delim, *keyword; 1233 char *nextp, *p, *end; 1234 int pad, ret = 0; 1235 1236 /* before we alter size, make note of how much we have to skip */ 1237 pad = TAR_PAD((unsigned)size); 1238 1239 p = end = buf; 1240 while (size > 0 || p < end) { 1241 if (size > 0) { 1242 int rdlen; 1243 1244 /* shift stuff down */ 1245 if (p > buf) { 1246 memmove(buf, p, end - p); 1247 end -= p - buf; 1248 p = buf; 1249 } 1250 1251 /* fill starting at end */ 1252 rdlen = MINIMUM(size, (buf + sizeof buf) - end); 1253 if (rd_wrbuf(end, rdlen) != rdlen) { 1254 ret = -1; 1255 break; 1256 } 1257 size -= rdlen; 1258 end += rdlen; 1259 } 1260 1261 /* [p, end) is good */ 1262 if (memchr(p, ' ', end - p) == NULL || 1263 !isdigit((unsigned char)*p)) { 1264 paxwarn(1, "Invalid extended header record"); 1265 ret = -1; 1266 break; 1267 } 1268 errno = 0; 1269 len = strtol(p, &delim, 10); 1270 if (*delim != ' ' || (errno == ERANGE && len == LONG_MAX) || 1271 len < MINXHDRSZ) { 1272 paxwarn(1, "Invalid extended header record length"); 1273 ret = -1; 1274 break; 1275 } 1276 if (len > end - p) { 1277 paxwarn(1, "Extended header record length %lu is " 1278 "out of range", len); 1279 /* if we can just toss this record, do so */ 1280 len -= end - p; 1281 if (len <= size && rd_skip(len) == 0) { 1282 size -= len; 1283 p = end = buf; 1284 continue; 1285 } 1286 ret = -1; 1287 break; 1288 } 1289 nextp = p + len; 1290 keyword = p = delim + 1; 1291 p = memchr(p, '=', len); 1292 if (!p || nextp[-1] != '\n') { 1293 paxwarn(1, "Malformed extended header record"); 1294 ret = -1; 1295 break; 1296 } 1297 *p++ = nextp[-1] = '\0'; 1298 if (!global) { 1299 if (!strcmp(keyword, "path")) { 1300 arcn->nlen = strlcpy(arcn->name, p, 1301 sizeof(arcn->name)); 1302 } else if (!strcmp(keyword, "linkpath")) { 1303 arcn->ln_nlen = strlcpy(arcn->ln_name, p, 1304 sizeof(arcn->ln_name)); 1305 } else if (!strcmp(keyword, "mtime")) { 1306 ret = rd_time(&arcn->sb.st_mtim, keyword, p); 1307 if (ret < 0) 1308 break; 1309 } else if (!strcmp(keyword, "atime")) { 1310 ret = rd_time(&arcn->sb.st_atim, keyword, p); 1311 if (ret < 0) 1312 break; 1313 } else if (!strcmp(keyword, "ctime")) { 1314 ret = rd_time(&arcn->sb.st_ctim, keyword, p); 1315 if (ret < 0) 1316 break; 1317 } 1318 } 1319 p = nextp; 1320 } 1321 1322 if (rd_skip(size + pad) < 0) 1323 return (-1); 1324 return (ret); 1325 } 1326