1 /* $NetBSD: tar.c,v 1.34 2002/12/08 02:00:10 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 #if defined(__RCSID) && !defined(lint) 42 #if 0 43 static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44 #else 45 __RCSID("$NetBSD: tar.c,v 1.34 2002/12/08 02:00:10 mrg Exp $"); 46 #endif 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/time.h> 51 #include <sys/stat.h> 52 #include <sys/param.h> 53 54 #include <ctype.h> 55 #include <errno.h> 56 #include <grp.h> 57 #include <pwd.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <unistd.h> 62 63 #include "pax.h" 64 #include "extern.h" 65 #include "tar.h" 66 67 /* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71 static int expandname(char *, size_t, char **, const char *); 72 static void longlink(ARCHD *); 73 static u_long tar_chksm(char *, int); 74 static char *name_split(char *, int); 75 static int ul_oct(u_long, char *, int, int); 76 #if !defined(NET2_STAT) && !defined(_LP64) 77 static int ull_oct(unsigned long long, char *, int, int); 78 #endif 79 static int tar_gnutar_exclude_one(const char *, size_t); 80 81 /* 82 * Routines common to all versions of tar 83 */ 84 85 static int tar_nodir; /* do not write dirs under old tar */ 86 int is_gnutar; /* behave like gnu tar; enable gnu 87 * extensions and skip end-ofvolume 88 * checks 89 */ 90 static int seen_gnu_warning; /* Have we warned yet? */ 91 static char *gnu_hack_string; /* ././@LongLink hackery */ 92 static int gnu_hack_len; /* len of gnu_hack_string */ 93 char *gnu_name_string; /* ././@LongLink hackery name */ 94 char *gnu_link_string; /* ././@LongLink hackery link */ 95 96 /* 97 * tar_endwr() 98 * add the tar trailer of two null blocks 99 * Return: 100 * 0 if ok, -1 otherwise (what wr_skip returns) 101 */ 102 103 int 104 tar_endwr(void) 105 { 106 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 107 } 108 109 /* 110 * tar_endrd() 111 * no cleanup needed here, just return size of trailer (for append) 112 * Return: 113 * size of trailer (2 * BLKMULT) 114 */ 115 116 off_t 117 tar_endrd(void) 118 { 119 return((off_t)(NULLCNT*BLKMULT)); 120 } 121 122 /* 123 * tar_trail() 124 * Called to determine if a header block is a valid trailer. We are passed 125 * the block, the in_sync flag (which tells us we are in resync mode; 126 * looking for a valid header), and cnt (which starts at zero) which is 127 * used to count the number of empty blocks we have seen so far. 128 * Return: 129 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 130 * could never contain a header. 131 */ 132 133 int 134 tar_trail(char *buf, int in_resync, int *cnt) 135 { 136 int i; 137 138 /* 139 * look for all zero, trailer is two consecutive blocks of zero 140 */ 141 for (i = 0; i < BLKMULT; ++i) { 142 if (buf[i] != '\0') 143 break; 144 } 145 146 /* 147 * if not all zero it is not a trailer, but MIGHT be a header. 148 */ 149 if (i != BLKMULT) 150 return(-1); 151 152 /* 153 * When given a zero block, we must be careful! 154 * If we are not in resync mode, check for the trailer. Have to watch 155 * out that we do not mis-identify file data as the trailer, so we do 156 * NOT try to id a trailer during resync mode. During resync mode we 157 * might as well throw this block out since a valid header can NEVER be 158 * a block of all 0 (we must have a valid file name). 159 */ 160 if (!in_resync && (++*cnt >= NULLCNT)) 161 return(0); 162 return(1); 163 } 164 165 /* 166 * ul_oct() 167 * convert an unsigned long to an octal string. many oddball field 168 * termination characters are used by the various versions of tar in the 169 * different fields. term selects which kind to use. str is '0' padded 170 * at the front to len. we are unable to use only one format as many old 171 * tar readers are very cranky about this. 172 * Return: 173 * 0 if the number fit into the string, -1 otherwise 174 */ 175 176 static int 177 ul_oct(u_long val, char *str, int len, int term) 178 { 179 char *pt; 180 181 /* 182 * term selects the appropriate character(s) for the end of the string 183 */ 184 pt = str + len - 1; 185 switch(term) { 186 case 3: 187 *pt-- = '\0'; 188 break; 189 case 2: 190 *pt-- = ' '; 191 *pt-- = '\0'; 192 break; 193 case 1: 194 *pt-- = ' '; 195 break; 196 case 0: 197 default: 198 *pt-- = '\0'; 199 *pt-- = ' '; 200 break; 201 } 202 203 /* 204 * convert and blank pad if there is space 205 */ 206 while (pt >= str) { 207 *pt-- = '0' + (char)(val & 0x7); 208 if ((val = val >> 3) == (u_long)0) 209 break; 210 } 211 212 while (pt >= str) 213 *pt-- = '0'; 214 if (val != (u_long)0) 215 return(-1); 216 return(0); 217 } 218 219 #if !defined(NET2_STAT) && !defined(_LP64) 220 /* 221 * ull_oct() 222 * convert an unsigned long long to an octal string. one of many oddball 223 * field termination characters are used by the various versions of tar 224 * in the different fields. term selects which kind to use. str is '0' 225 * padded at the front to len. we are unable to use only one format as 226 * many old tar readers are very cranky about this. 227 * Return: 228 * 0 if the number fit into the string, -1 otherwise 229 */ 230 231 static int 232 ull_oct(unsigned long long val, char *str, int len, int term) 233 { 234 char *pt; 235 236 /* 237 * term selects the appropriate character(s) for the end of the string 238 */ 239 pt = str + len - 1; 240 switch(term) { 241 case 3: 242 *pt-- = '\0'; 243 break; 244 case 2: 245 *pt-- = ' '; 246 *pt-- = '\0'; 247 break; 248 case 1: 249 *pt-- = ' '; 250 break; 251 case 0: 252 default: 253 *pt-- = '\0'; 254 *pt-- = ' '; 255 break; 256 } 257 258 /* 259 * convert and blank pad if there is space 260 */ 261 while (pt >= str) { 262 *pt-- = '0' + (char)(val & 0x7); 263 if ((val = val >> 3) == 0) 264 break; 265 } 266 267 while (pt >= str) 268 *pt-- = '0'; 269 if (val != (unsigned long long)0) 270 return(-1); 271 return(0); 272 } 273 #endif 274 275 /* 276 * tar_chksm() 277 * calculate the checksum for a tar block counting the checksum field as 278 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 279 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 280 * pad headers with 0. 281 * Return: 282 * unsigned long checksum 283 */ 284 285 static u_long 286 tar_chksm(char *blk, int len) 287 { 288 char *stop; 289 char *pt; 290 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 291 292 /* 293 * add the part of the block before the checksum field 294 */ 295 pt = blk; 296 stop = blk + CHK_OFFSET; 297 while (pt < stop) 298 chksm += (u_long)(*pt++ & 0xff); 299 /* 300 * move past the checksum field and keep going, spec counts the 301 * checksum field as the sum of 8 blanks (which is pre-computed as 302 * BLNKSUM). 303 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 304 * starts, no point in summing zero's) 305 */ 306 pt += CHK_LEN; 307 stop = blk + len; 308 while (pt < stop) 309 chksm += (u_long)(*pt++ & 0xff); 310 return(chksm); 311 } 312 313 /* 314 * Routines for old BSD style tar (also made portable to sysV tar) 315 */ 316 317 /* 318 * tar_id() 319 * determine if a block given to us is a valid tar header (and not a USTAR 320 * header). We have to be on the lookout for those pesky blocks of all 321 * zero's. 322 * Return: 323 * 0 if a tar header, -1 otherwise 324 */ 325 326 int 327 tar_id(char *blk, int size) 328 { 329 HD_TAR *hd; 330 HD_USTAR *uhd; 331 332 if (size < BLKMULT) 333 return(-1); 334 hd = (HD_TAR *)blk; 335 uhd = (HD_USTAR *)blk; 336 337 /* 338 * check for block of zero's first, a simple and fast test, then make 339 * sure this is not a ustar header by looking for the ustar magic 340 * cookie. We should use TMAGLEN, but some USTAR archive programs are 341 * wrong and create archives missing the \0. Last we check the 342 * checksum. If this is ok we have to assume it is a valid header. 343 */ 344 if (hd->name[0] == '\0') 345 return(-1); 346 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 347 return(-1); 348 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 349 return(-1); 350 return(0); 351 } 352 353 /* 354 * tar_opt() 355 * handle tar format specific -o options 356 * Return: 357 * 0 if ok -1 otherwise 358 */ 359 360 int 361 tar_opt(void) 362 { 363 OPLIST *opt; 364 365 while ((opt = opt_next()) != NULL) { 366 if (strcmp(opt->name, TAR_OPTION) || 367 strcmp(opt->value, TAR_NODIR)) { 368 tty_warn(1, 369 "Unknown tar format -o option/value pair %s=%s", 370 opt->name, opt->value); 371 tty_warn(1, 372 "%s=%s is the only supported tar format option", 373 TAR_OPTION, TAR_NODIR); 374 return(-1); 375 } 376 377 /* 378 * we only support one option, and only when writing 379 */ 380 if ((act != APPND) && (act != ARCHIVE)) { 381 tty_warn(1, "%s=%s is only supported when writing.", 382 opt->name, opt->value); 383 return(-1); 384 } 385 tar_nodir = 1; 386 } 387 return(0); 388 } 389 390 391 /* 392 * tar_rd() 393 * extract the values out of block already determined to be a tar header. 394 * store the values in the ARCHD parameter. 395 * Return: 396 * 0 397 */ 398 399 int 400 tar_rd(ARCHD *arcn, char *buf) 401 { 402 HD_TAR *hd; 403 char *pt; 404 405 /* 406 * we only get proper sized buffers passed to us 407 */ 408 if (tar_id(buf, BLKMULT) < 0) 409 return(-1); 410 memset(arcn, 0, sizeof(*arcn)); 411 arcn->org_name = arcn->name; 412 arcn->pat = NULL; 413 arcn->sb.st_nlink = 1; 414 415 /* 416 * copy out the name and values in the stat buffer 417 */ 418 hd = (HD_TAR *)buf; 419 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 420 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 421 &gnu_name_string, hd->name); 422 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 423 &gnu_link_string, hd->linkname); 424 } 425 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 426 0xfff); 427 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 428 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 429 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 430 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 431 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 432 433 /* 434 * have to look at the last character, it may be a '/' and that is used 435 * to encode this as a directory 436 */ 437 pt = &(arcn->name[arcn->nlen - 1]); 438 arcn->pad = 0; 439 arcn->skip = 0; 440 switch(hd->linkflag) { 441 case SYMTYPE: 442 /* 443 * symbolic link, need to get the link name and set the type in 444 * the st_mode so -v printing will look correct. 445 */ 446 arcn->type = PAX_SLK; 447 arcn->sb.st_mode |= S_IFLNK; 448 break; 449 case LNKTYPE: 450 /* 451 * hard link, need to get the link name, set the type in the 452 * st_mode and st_nlink so -v printing will look better. 453 */ 454 arcn->type = PAX_HLK; 455 arcn->sb.st_nlink = 2; 456 457 /* 458 * no idea of what type this thing really points at, but 459 * we set something for printing only. 460 */ 461 arcn->sb.st_mode |= S_IFREG; 462 break; 463 case LONGLINKTYPE: 464 arcn->type = PAX_GLL; 465 /* FALLTHROUGH */ 466 case LONGNAMETYPE: 467 /* 468 * GNU long link/file; we tag these here and let the 469 * pax internals deal with it -- too ugly otherwise. 470 */ 471 if (hd->linkflag != LONGLINKTYPE) 472 arcn->type = PAX_GLF; 473 arcn->pad = TAR_PAD(arcn->sb.st_size); 474 arcn->skip = arcn->sb.st_size; 475 break; 476 case AREGTYPE: 477 case REGTYPE: 478 case DIRTYPE: /* see below */ 479 default: 480 /* 481 * If we have a trailing / this is a directory and NOT a file. 482 * Note: V7 tar doesn't actually have DIRTYPE, but it was 483 * reported that V7 archives using USTAR directories do exist. 484 */ 485 if (*pt == '/' || hd->linkflag == DIRTYPE) { 486 /* 487 * it is a directory, set the mode for -v printing 488 */ 489 arcn->type = PAX_DIR; 490 arcn->sb.st_mode |= S_IFDIR; 491 arcn->sb.st_nlink = 2; 492 } else { 493 /* 494 * have a file that will be followed by data. Set the 495 * skip value to the size field and calculate the size 496 * of the padding. 497 */ 498 arcn->type = PAX_REG; 499 arcn->sb.st_mode |= S_IFREG; 500 arcn->pad = TAR_PAD(arcn->sb.st_size); 501 arcn->skip = arcn->sb.st_size; 502 } 503 break; 504 } 505 506 /* 507 * strip off any trailing slash. 508 */ 509 if (*pt == '/') { 510 *pt = '\0'; 511 --arcn->nlen; 512 } 513 return(0); 514 } 515 516 /* 517 * tar_wr() 518 * write a tar header for the file specified in the ARCHD to the archive. 519 * Have to check for file types that cannot be stored and file names that 520 * are too long. Be careful of the term (last arg) to ul_oct, each field 521 * of tar has it own spec for the termination character(s). 522 * ASSUMED: space after header in header block is zero filled 523 * Return: 524 * 0 if file has data to be written after the header, 1 if file has NO 525 * data to write after the header, -1 if archive write failed 526 */ 527 528 int 529 tar_wr(ARCHD *arcn) 530 { 531 HD_TAR *hd; 532 int len; 533 char hdblk[sizeof(HD_TAR)]; 534 535 /* 536 * check for those file system types which tar cannot store 537 */ 538 switch(arcn->type) { 539 case PAX_DIR: 540 /* 541 * user asked that dirs not be written to the archive 542 */ 543 if (tar_nodir) 544 return(1); 545 break; 546 case PAX_CHR: 547 tty_warn(1, "Tar cannot archive a character device %s", 548 arcn->org_name); 549 return(1); 550 case PAX_BLK: 551 tty_warn(1, 552 "Tar cannot archive a block device %s", arcn->org_name); 553 return(1); 554 case PAX_SCK: 555 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 556 return(1); 557 case PAX_FIF: 558 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 559 return(1); 560 case PAX_SLK: 561 case PAX_HLK: 562 case PAX_HRG: 563 if (arcn->ln_nlen > sizeof(hd->linkname)) { 564 tty_warn(1,"Link name too long for tar %s", 565 arcn->ln_name); 566 return(1); 567 } 568 break; 569 case PAX_REG: 570 case PAX_CTG: 571 default: 572 break; 573 } 574 575 /* 576 * check file name len, remember extra char for dirs (the / at the end) 577 */ 578 len = arcn->nlen; 579 if (arcn->type == PAX_DIR) 580 ++len; 581 if (len >= sizeof(hd->name)) { 582 tty_warn(1, "File name too long for tar %s", arcn->name); 583 return(1); 584 } 585 586 /* 587 * copy the data out of the ARCHD into the tar header based on the type 588 * of the file. Remember many tar readers want the unused fields to be 589 * padded with zero. We set the linkflag field (type), the linkname 590 * (or zero if not used),the size, and set the padding (if any) to be 591 * added after the file data (0 for all other types, as they only have 592 * a header) 593 */ 594 memset(hdblk, 0, sizeof(hdblk)); 595 hd = (HD_TAR *)hdblk; 596 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 597 arcn->pad = 0; 598 599 if (arcn->type == PAX_DIR) { 600 /* 601 * directories are the same as files, except have a filename 602 * that ends with a /, we add the slash here. No data follows, 603 * dirs, so no pad. 604 */ 605 hd->linkflag = AREGTYPE; 606 hd->name[len-1] = '/'; 607 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 608 goto out; 609 } else if (arcn->type == PAX_SLK) { 610 /* 611 * no data follows this file, so no pad 612 */ 613 hd->linkflag = SYMTYPE; 614 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 615 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 616 goto out; 617 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 618 /* 619 * no data follows this file, so no pad 620 */ 621 hd->linkflag = LNKTYPE; 622 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 623 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 624 goto out; 625 } else { 626 /* 627 * data follows this file, so set the pad 628 */ 629 hd->linkflag = AREGTYPE; 630 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 631 tty_warn(1,"File is too large for tar %s", 632 arcn->org_name); 633 return(1); 634 } 635 arcn->pad = TAR_PAD(arcn->sb.st_size); 636 } 637 638 /* 639 * copy those fields that are independent of the type 640 */ 641 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 642 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 643 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 644 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 645 goto out; 646 647 /* 648 * calculate and add the checksum, then write the header. A return of 649 * 0 tells the caller to now write the file data, 1 says no data needs 650 * to be written 651 */ 652 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 653 sizeof(hd->chksum), 3)) 654 goto out; /* XXX Something's wrong here 655 * because a zero-byte file can 656 * cause this to be done and 657 * yet the resulting warning 658 * seems incorrect */ 659 660 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 661 return(-1); 662 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 663 return(-1); 664 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 665 return(0); 666 return(1); 667 668 out: 669 /* 670 * header field is out of range 671 */ 672 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 673 return(1); 674 } 675 676 /* 677 * Routines for POSIX ustar 678 */ 679 680 /* 681 * ustar_strd() 682 * initialization for ustar read 683 * Return: 684 * 0 if ok, -1 otherwise 685 */ 686 687 int 688 ustar_strd(void) 689 { 690 return(0); 691 } 692 693 /* 694 * ustar_stwr() 695 * initialization for ustar write 696 * Return: 697 * 0 if ok, -1 otherwise 698 */ 699 700 int 701 ustar_stwr(void) 702 { 703 return(0); 704 } 705 706 /* 707 * ustar_id() 708 * determine if a block given to us is a valid ustar header. We have to 709 * be on the lookout for those pesky blocks of all zero's 710 * Return: 711 * 0 if a ustar header, -1 otherwise 712 */ 713 714 int 715 ustar_id(char *blk, int size) 716 { 717 HD_USTAR *hd; 718 719 if (size < BLKMULT) 720 return(-1); 721 hd = (HD_USTAR *)blk; 722 723 /* 724 * check for block of zero's first, a simple and fast test then check 725 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 726 * programs are fouled up and create archives missing the \0. Last we 727 * check the checksum. If ok we have to assume it is a valid header. 728 */ 729 if (hd->name[0] == '\0') 730 return(-1); 731 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 732 return(-1); 733 /* This is GNU tar */ 734 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 735 !seen_gnu_warning) { 736 seen_gnu_warning = 1; 737 tty_warn(0, 738 "Trying to read GNU tar archive with extensions off"); 739 } 740 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 741 return(-1); 742 return(0); 743 } 744 745 /* 746 * ustar_rd() 747 * extract the values out of block already determined to be a ustar header. 748 * store the values in the ARCHD parameter. 749 * Return: 750 * 0 751 */ 752 753 int 754 ustar_rd(ARCHD *arcn, char *buf) 755 { 756 HD_USTAR *hd; 757 char *dest; 758 int cnt; 759 dev_t devmajor; 760 dev_t devminor; 761 762 /* 763 * we only get proper sized buffers 764 */ 765 if (ustar_id(buf, BLKMULT) < 0) 766 return(-1); 767 768 memset(arcn, 0, sizeof(*arcn)); 769 arcn->org_name = arcn->name; 770 arcn->pat = NULL; 771 arcn->sb.st_nlink = 1; 772 hd = (HD_USTAR *)buf; 773 774 /* 775 * see if the filename is split into two parts. if, so joint the parts. 776 * we copy the prefix first and add a / between the prefix and name. 777 */ 778 dest = arcn->name; 779 if (*(hd->prefix) != '\0') { 780 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 781 dest += cnt; 782 *dest++ = '/'; 783 cnt++; 784 } else { 785 cnt = 0; 786 } 787 788 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 789 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 790 &gnu_name_string, hd->name); 791 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 792 &gnu_link_string, hd->linkname); 793 } 794 795 /* 796 * follow the spec to the letter. we should only have mode bits, strip 797 * off all other crud we may be passed. 798 */ 799 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 800 0xfff); 801 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 802 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 803 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 804 805 /* 806 * If we can find the ascii names for gname and uname in the password 807 * and group files we will use the uid's and gid they bind. Otherwise 808 * we use the uid and gid values stored in the header. (This is what 809 * the posix spec wants). 810 */ 811 hd->gname[sizeof(hd->gname) - 1] = '\0'; 812 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 813 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 814 hd->uname[sizeof(hd->uname) - 1] = '\0'; 815 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 816 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 817 818 /* 819 * set the defaults, these may be changed depending on the file type 820 */ 821 arcn->pad = 0; 822 arcn->skip = 0; 823 arcn->sb.st_rdev = (dev_t)0; 824 825 /* 826 * set the mode and PAX type according to the typeflag in the header 827 */ 828 switch(hd->typeflag) { 829 case FIFOTYPE: 830 arcn->type = PAX_FIF; 831 arcn->sb.st_mode |= S_IFIFO; 832 break; 833 case DIRTYPE: 834 arcn->type = PAX_DIR; 835 arcn->sb.st_mode |= S_IFDIR; 836 arcn->sb.st_nlink = 2; 837 838 /* 839 * Some programs that create ustar archives append a '/' 840 * to the pathname for directories. This clearly violates 841 * ustar specs, but we will silently strip it off anyway. 842 */ 843 if (arcn->name[arcn->nlen - 1] == '/') 844 arcn->name[--arcn->nlen] = '\0'; 845 break; 846 case BLKTYPE: 847 case CHRTYPE: 848 /* 849 * this type requires the rdev field to be set. 850 */ 851 if (hd->typeflag == BLKTYPE) { 852 arcn->type = PAX_BLK; 853 arcn->sb.st_mode |= S_IFBLK; 854 } else { 855 arcn->type = PAX_CHR; 856 arcn->sb.st_mode |= S_IFCHR; 857 } 858 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 859 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 860 arcn->sb.st_rdev = TODEV(devmajor, devminor); 861 break; 862 case SYMTYPE: 863 case LNKTYPE: 864 if (hd->typeflag == SYMTYPE) { 865 arcn->type = PAX_SLK; 866 arcn->sb.st_mode |= S_IFLNK; 867 } else { 868 arcn->type = PAX_HLK; 869 /* 870 * so printing looks better 871 */ 872 arcn->sb.st_mode |= S_IFREG; 873 arcn->sb.st_nlink = 2; 874 } 875 break; 876 case LONGLINKTYPE: 877 if (is_gnutar) 878 arcn->type = PAX_GLL; 879 /* FALLTHROUGH */ 880 case LONGNAMETYPE: 881 if (is_gnutar) { 882 /* 883 * GNU long link/file; we tag these here and let the 884 * pax internals deal with it -- too ugly otherwise. 885 */ 886 if (hd->typeflag != LONGLINKTYPE) 887 arcn->type = PAX_GLF; 888 arcn->pad = TAR_PAD(arcn->sb.st_size); 889 arcn->skip = arcn->sb.st_size; 890 } else { 891 tty_warn(1, "GNU Long %s found in posix ustar archive.", 892 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 893 } 894 break; 895 case CONTTYPE: 896 case AREGTYPE: 897 case REGTYPE: 898 default: 899 /* 900 * these types have file data that follows. Set the skip and 901 * pad fields. 902 */ 903 arcn->type = PAX_REG; 904 arcn->pad = TAR_PAD(arcn->sb.st_size); 905 arcn->skip = arcn->sb.st_size; 906 arcn->sb.st_mode |= S_IFREG; 907 break; 908 } 909 return(0); 910 } 911 912 static int 913 expandname(char *buf, size_t len, char **gnu_name, const char *name) 914 { 915 if (*gnu_name) { 916 len = strlcpy(buf, *gnu_name, len); 917 free(*gnu_name); 918 *gnu_name = NULL; 919 } else { 920 len = strlcpy(buf, name, len); 921 } 922 return len; 923 } 924 925 static void 926 longlink(ARCHD *arcn) 927 { 928 ARCHD larc; 929 930 memset(&larc, 0, sizeof(larc)); 931 932 switch (arcn->type) { 933 case PAX_SLK: 934 case PAX_HRG: 935 case PAX_HLK: 936 larc.type = PAX_GLL; 937 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 938 sizeof(larc.ln_name)); 939 gnu_hack_string = arcn->ln_name; 940 gnu_hack_len = arcn->ln_nlen + 1; 941 break; 942 default: 943 larc.nlen = strlcpy(larc.name, "././@LongLink", 944 sizeof(larc.name)); 945 gnu_hack_string = arcn->name; 946 gnu_hack_len = arcn->nlen + 1; 947 larc.type = PAX_GLF; 948 } 949 /* 950 * We need a longlink now. 951 */ 952 ustar_wr(&larc); 953 } 954 955 /* 956 * ustar_wr() 957 * write a ustar header for the file specified in the ARCHD to the archive 958 * Have to check for file types that cannot be stored and file names that 959 * are too long. Be careful of the term (last arg) to ul_oct, we only use 960 * '\0' for the termination character (this is different than picky tar) 961 * ASSUMED: space after header in header block is zero filled 962 * Return: 963 * 0 if file has data to be written after the header, 1 if file has NO 964 * data to write after the header, -1 if archive write failed 965 */ 966 967 int 968 ustar_wr(ARCHD *arcn) 969 { 970 HD_USTAR *hd; 971 char *pt; 972 char hdblk[sizeof(HD_USTAR)]; 973 const char *user, *group; 974 975 /* 976 * check for those file system types ustar cannot store 977 */ 978 if (arcn->type == PAX_SCK) { 979 tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name); 980 return(1); 981 } 982 983 /* 984 * check the length of the linkname 985 */ 986 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 987 (arcn->type == PAX_HRG)) && 988 (arcn->ln_nlen >= sizeof(hd->linkname))){ 989 if (is_gnutar) { 990 longlink(arcn); 991 } else { 992 tty_warn(1, "Link name too long for ustar %s", 993 arcn->ln_name); 994 return(1); 995 } 996 } 997 998 /* 999 * split the path name into prefix and name fields (if needed). if 1000 * pt != arcn->name, the name has to be split 1001 */ 1002 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1003 if (is_gnutar) { 1004 longlink(arcn); 1005 pt = arcn->name; 1006 } else { 1007 tty_warn(1, "File name too long for ustar %s", 1008 arcn->name); 1009 return(1); 1010 } 1011 } 1012 1013 /* 1014 * zero out the header so we don't have to worry about zero fill below 1015 */ 1016 memset(hdblk, 0, sizeof(hdblk)); 1017 hd = (HD_USTAR *)hdblk; 1018 arcn->pad = 0L; 1019 1020 /* 1021 * split the name, or zero out the prefix 1022 */ 1023 if (pt != arcn->name) { 1024 /* 1025 * name was split, pt points at the / where the split is to 1026 * occur, we remove the / and copy the first part to the prefix 1027 */ 1028 *pt = '\0'; 1029 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1030 *pt++ = '/'; 1031 } 1032 1033 /* 1034 * copy the name part. this may be the whole path or the part after 1035 * the prefix 1036 */ 1037 strlcpy(hd->name, pt, sizeof(hd->name)); 1038 1039 /* 1040 * set the fields in the header that are type dependent 1041 */ 1042 switch(arcn->type) { 1043 case PAX_DIR: 1044 hd->typeflag = DIRTYPE; 1045 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1046 goto out; 1047 break; 1048 case PAX_CHR: 1049 case PAX_BLK: 1050 if (arcn->type == PAX_CHR) 1051 hd->typeflag = CHRTYPE; 1052 else 1053 hd->typeflag = BLKTYPE; 1054 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1055 sizeof(hd->devmajor), 3) || 1056 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1057 sizeof(hd->devminor), 3) || 1058 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1059 goto out; 1060 break; 1061 case PAX_FIF: 1062 hd->typeflag = FIFOTYPE; 1063 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1064 goto out; 1065 break; 1066 case PAX_GLL: 1067 case PAX_SLK: 1068 case PAX_HLK: 1069 case PAX_HRG: 1070 if (arcn->type == PAX_SLK) 1071 hd->typeflag = SYMTYPE; 1072 else if (arcn->type == PAX_GLL) 1073 hd->typeflag = LONGLINKTYPE; 1074 else 1075 hd->typeflag = LNKTYPE; 1076 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1077 if (ul_oct((u_long)gnu_hack_len, hd->size, 1078 sizeof(hd->size), 3)) 1079 goto out; 1080 break; 1081 case PAX_GLF: 1082 case PAX_REG: 1083 case PAX_CTG: 1084 default: 1085 /* 1086 * file data with this type, set the padding 1087 */ 1088 if (arcn->type == PAX_GLF) { 1089 hd->typeflag = LONGNAMETYPE; 1090 arcn->pad = TAR_PAD(gnu_hack_len); 1091 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1092 sizeof(hd->size), 3)) { 1093 tty_warn(1,"File is too long for ustar %s", 1094 arcn->org_name); 1095 return(1); 1096 } 1097 } else { 1098 if (arcn->type == PAX_CTG) 1099 hd->typeflag = CONTTYPE; 1100 else 1101 hd->typeflag = REGTYPE; 1102 arcn->pad = TAR_PAD(arcn->sb.st_size); 1103 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1104 sizeof(hd->size), 3)) { 1105 tty_warn(1,"File is too long for ustar %s", 1106 arcn->org_name); 1107 return(1); 1108 } 1109 } 1110 break; 1111 } 1112 1113 strncpy(hd->magic, TMAGIC, TMAGLEN); 1114 if (is_gnutar) 1115 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1116 else 1117 strncpy(hd->version, TVERSION, TVERSLEN); 1118 1119 /* 1120 * set the remaining fields. Some versions want all 16 bits of mode 1121 * we better humor them (they really do not meet spec though).... 1122 */ 1123 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1124 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1125 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1126 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1127 goto out; 1128 user = user_from_uid(arcn->sb.st_uid, 1); 1129 group = group_from_gid(arcn->sb.st_gid, 1); 1130 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1131 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1132 1133 /* 1134 * calculate and store the checksum write the header to the archive 1135 * return 0 tells the caller to now write the file data, 1 says no data 1136 * needs to be written 1137 */ 1138 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1139 sizeof(hd->chksum), 3)) 1140 goto out; 1141 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1142 return(-1); 1143 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1144 return(-1); 1145 if (gnu_hack_string) { 1146 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1147 int pad = gnu_hack_len; 1148 gnu_hack_string = NULL; 1149 gnu_hack_len = 0; 1150 if (res < 0) 1151 return(-1); 1152 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1153 return(-1); 1154 } 1155 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1156 return(0); 1157 return(1); 1158 1159 out: 1160 /* 1161 * header field is out of range 1162 */ 1163 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1164 return(1); 1165 } 1166 1167 /* 1168 * name_split() 1169 * see if the name has to be split for storage in a ustar header. We try 1170 * to fit the entire name in the name field without splitting if we can. 1171 * The split point is always at a / 1172 * Return 1173 * character pointer to split point (always the / that is to be removed 1174 * if the split is not needed, the points is set to the start of the file 1175 * name (it would violate the spec to split there). A NULL is returned if 1176 * the file name is too long 1177 */ 1178 1179 static char * 1180 name_split(char *name, int len) 1181 { 1182 char *start; 1183 1184 /* 1185 * check to see if the file name is small enough to fit in the name 1186 * field. if so just return a pointer to the name. 1187 */ 1188 if (len < TNMSZ) 1189 return(name); 1190 if (len > (TPFSZ + TNMSZ)) 1191 return(NULL); 1192 1193 /* 1194 * we start looking at the biggest sized piece that fits in the name 1195 * field. We walk forward looking for a slash to split at. The idea is 1196 * to find the biggest piece to fit in the name field (or the smallest 1197 * prefix we can find) (the -1 is correct the biggest piece would 1198 * include the slash between the two parts that gets thrown away) 1199 */ 1200 start = name + len - TNMSZ; 1201 while ((*start != '\0') && (*start != '/')) 1202 ++start; 1203 1204 /* 1205 * if we hit the end of the string, this name cannot be split, so we 1206 * cannot store this file. 1207 */ 1208 if (*start == '\0') 1209 return(NULL); 1210 len = start - name; 1211 1212 /* 1213 * NOTE: /str where the length of str == TNMSZ can not be stored under 1214 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1215 * the file would then expand on extract to //str. The len == 0 below 1216 * makes this special case follow the spec to the letter. 1217 */ 1218 if ((len >= TPFSZ) || (len == 0)) 1219 return(NULL); 1220 1221 /* 1222 * ok have a split point, return it to the caller 1223 */ 1224 return(start); 1225 } 1226 1227 /* convert a glob into a RE, and add it to the list */ 1228 static int 1229 tar_gnutar_exclude_one(const char *line, size_t len) 1230 { 1231 char sbuf[MAXPATHLEN * 2 + 1 + 5]; 1232 int i, j; 1233 1234 if (line[len - 1] == '\n') 1235 len--; 1236 for (i = 0, j = 2; i < len; i++) { 1237 /* 1238 * convert glob to regexp, escaping everything 1239 */ 1240 if (line[i] == '*') 1241 sbuf[j++] = '.'; 1242 else if (line[i] == '?') { 1243 sbuf[j++] = '.'; 1244 continue; 1245 } else if (!isalnum(line[i]) && !isblank(line[i])) 1246 sbuf[j++] = '\\'; 1247 sbuf[j++] = line[i]; 1248 } 1249 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/'; 1250 sbuf[1] = '^'; 1251 sbuf[j] = '$'; 1252 sbuf[j + 3] = '\0'; 1253 if (rep_add(sbuf) < 0) 1254 return (-1); 1255 1256 return (0); 1257 } 1258 1259 /* 1260 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1261 * we go through each line of the file, building a string from the "glob" 1262 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1263 * to rep_add(), which will add a empty replacement (exclusion), for the 1264 * named files. 1265 */ 1266 int 1267 tar_gnutar_minus_minus_exclude(path) 1268 const char *path; 1269 { 1270 size_t len = strlen(path); 1271 1272 if (len > MAXPATHLEN) 1273 tty_warn(0, "pathname too long: %s", path); 1274 1275 return (tar_gnutar_exclude_one(path, len)); 1276 } 1277 1278 int 1279 tar_gnutar_X_compat(path) 1280 const char *path; 1281 { 1282 char *line; 1283 FILE *fp; 1284 int lineno = 0; 1285 size_t len; 1286 1287 fp = fopen(path, "r"); 1288 if (fp == NULL) { 1289 tty_warn(1, "can not open %s: %s", path, 1290 strerror(errno)); 1291 return(-1); 1292 } 1293 1294 while ((line = fgetln(fp, &len))) { 1295 lineno++; 1296 if (len > MAXPATHLEN) { 1297 tty_warn(0, "pathname too long, line %d of %s", 1298 lineno, path); 1299 } 1300 if (tar_gnutar_exclude_one(line, len)) 1301 return (-1); 1302 } 1303 return (0); 1304 } 1305