1 /* $OpenBSD: ar_subs.c,v 1.33 2009/10/27 23:59:22 deraadt Exp $ */ 2 /* $NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1992 Keith Muller. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Keith Muller of the University of California, San Diego. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/time.h> 39 #include <sys/stat.h> 40 #include <sys/param.h> 41 #include <signal.h> 42 #include <string.h> 43 #include <stdio.h> 44 #include <fcntl.h> 45 #include <errno.h> 46 #include <unistd.h> 47 #include <stdlib.h> 48 #include "pax.h" 49 #include "extern.h" 50 51 static void wr_archive(ARCHD *, int is_app); 52 static int get_arc(void); 53 static int next_head(ARCHD *); 54 extern sigset_t s_mask; 55 56 /* 57 * Routines which control the overall operation modes of pax as specified by 58 * the user: list, append, read ... 59 */ 60 61 static char hdbuf[BLKMULT]; /* space for archive header on read */ 62 u_long flcnt; /* number of files processed */ 63 64 /* 65 * list() 66 * list the contents of an archive which match user supplied pattern(s) 67 * (no pattern matches all). 68 */ 69 70 void 71 list(void) 72 { 73 ARCHD *arcn; 74 int res; 75 ARCHD archd; 76 time_t now; 77 78 arcn = &archd; 79 /* 80 * figure out archive type; pass any format specific options to the 81 * archive option processing routine; call the format init routine. We 82 * also save current time for ls_list() so we do not make a system 83 * call for each file we need to print. If verbose (vflag) start up 84 * the name and group caches. 85 */ 86 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 87 ((*frmt->st_rd)() < 0)) 88 return; 89 90 if (vflag && ((uidtb_start() < 0) || (gidtb_start() < 0))) 91 return; 92 93 now = time(NULL); 94 95 /* 96 * step through the archive until the format says it is done 97 */ 98 while (next_head(arcn) == 0) { 99 if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { 100 /* 101 * we need to read, to get the real filename 102 */ 103 off_t cnt; 104 if (!(*frmt->rd_data)(arcn, arcn->type == PAX_GLF 105 ? -1 : -2, &cnt)) 106 (void)rd_skip(cnt + arcn->pad); 107 continue; 108 } 109 110 /* 111 * check for pattern, and user specified options match. 112 * When all patterns are matched we are done. 113 */ 114 if ((res = pat_match(arcn)) < 0) 115 break; 116 117 if ((res == 0) && (sel_chk(arcn) == 0)) { 118 /* 119 * pattern resulted in a selected file 120 */ 121 if (pat_sel(arcn) < 0) 122 break; 123 124 /* 125 * modify the name as requested by the user if name 126 * survives modification, do a listing of the file 127 */ 128 if ((res = mod_name(arcn)) < 0) 129 break; 130 if (res == 0) 131 ls_list(arcn, now, stdout); 132 } 133 134 /* 135 * skip to next archive format header using values calculated 136 * by the format header read routine 137 */ 138 if (rd_skip(arcn->skip + arcn->pad) == 1) 139 break; 140 } 141 142 /* 143 * all done, let format have a chance to cleanup, and make sure that 144 * the patterns supplied by the user were all matched 145 */ 146 (void)(*frmt->end_rd)(); 147 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 148 ar_close(); 149 pat_chk(); 150 } 151 152 /* 153 * extract() 154 * extract the member(s) of an archive as specified by user supplied 155 * pattern(s) (no patterns extracts all members) 156 */ 157 158 void 159 extract(void) 160 { 161 ARCHD *arcn; 162 int res; 163 off_t cnt; 164 ARCHD archd; 165 struct stat sb; 166 int fd; 167 time_t now; 168 169 arcn = &archd; 170 /* 171 * figure out archive type; pass any format specific options to the 172 * archive option processing routine; call the format init routine; 173 * start up the directory modification time and access mode database 174 */ 175 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 176 ((*frmt->st_rd)() < 0) || (dir_start() < 0)) 177 return; 178 179 /* 180 * When we are doing interactive rename, we store the mapping of names 181 * so we can fix up hard links files later in the archive. 182 */ 183 if (iflag && (name_start() < 0)) 184 return; 185 186 now = time(NULL); 187 188 /* 189 * step through each entry on the archive until the format read routine 190 * says it is done 191 */ 192 while (next_head(arcn) == 0) { 193 if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { 194 /* 195 * we need to read, to get the real filename 196 */ 197 if (!(*frmt->rd_data)(arcn, arcn->type == PAX_GLF 198 ? -1 : -2, &cnt)) 199 (void)rd_skip(cnt + arcn->pad); 200 continue; 201 } 202 203 /* 204 * check for pattern, and user specified options match. When 205 * all the patterns are matched we are done 206 */ 207 if ((res = pat_match(arcn)) < 0) 208 break; 209 210 if ((res > 0) || (sel_chk(arcn) != 0)) { 211 /* 212 * file is not selected. skip past any file data and 213 * padding and go back for the next archive member 214 */ 215 (void)rd_skip(arcn->skip + arcn->pad); 216 continue; 217 } 218 219 /* 220 * with -u or -D only extract when the archive member is newer 221 * than the file with the same name in the file system (no 222 * test of being the same type is required). 223 * NOTE: this test is done BEFORE name modifications as 224 * specified by pax. this operation can be confusing to the 225 * user who might expect the test to be done on an existing 226 * file AFTER the name mod. In honesty the pax spec is probably 227 * flawed in this respect. 228 */ 229 if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) { 230 if (uflag && Dflag) { 231 if ((arcn->sb.st_mtime <= sb.st_mtime) && 232 (arcn->sb.st_ctime <= sb.st_ctime)) { 233 (void)rd_skip(arcn->skip + arcn->pad); 234 continue; 235 } 236 } else if (Dflag) { 237 if (arcn->sb.st_ctime <= sb.st_ctime) { 238 (void)rd_skip(arcn->skip + arcn->pad); 239 continue; 240 } 241 } else if (arcn->sb.st_mtime <= sb.st_mtime) { 242 (void)rd_skip(arcn->skip + arcn->pad); 243 continue; 244 } 245 } 246 247 /* 248 * this archive member is now been selected. modify the name. 249 */ 250 if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0)) 251 break; 252 if (res > 0) { 253 /* 254 * a bad name mod, skip and purge name from link table 255 */ 256 purg_lnk(arcn); 257 (void)rd_skip(arcn->skip + arcn->pad); 258 continue; 259 } 260 261 /* 262 * Non standard -Y and -Z flag. When the existing file is 263 * same age or newer skip 264 */ 265 if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { 266 if (Yflag && Zflag) { 267 if ((arcn->sb.st_mtime <= sb.st_mtime) && 268 (arcn->sb.st_ctime <= sb.st_ctime)) { 269 (void)rd_skip(arcn->skip + arcn->pad); 270 continue; 271 } 272 } else if (Yflag) { 273 if (arcn->sb.st_ctime <= sb.st_ctime) { 274 (void)rd_skip(arcn->skip + arcn->pad); 275 continue; 276 } 277 } else if (arcn->sb.st_mtime <= sb.st_mtime) { 278 (void)rd_skip(arcn->skip + arcn->pad); 279 continue; 280 } 281 } 282 283 if (vflag) { 284 if (vflag > 1) 285 ls_list(arcn, now, listf); 286 else { 287 (void)safe_print(arcn->name, listf); 288 vfpart = 1; 289 } 290 } 291 292 /* 293 * if required, chdir around. 294 */ 295 if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) 296 if (chdir(arcn->pat->chdname) != 0) 297 syswarn(1, errno, "Cannot chdir to %s", 298 arcn->pat->chdname); 299 300 /* 301 * all ok, extract this member based on type 302 */ 303 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 304 /* 305 * process archive members that are not regular files. 306 * throw out padding and any data that might follow the 307 * header (as determined by the format). 308 */ 309 if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) 310 res = lnk_creat(arcn); 311 else 312 res = node_creat(arcn); 313 314 (void)rd_skip(arcn->skip + arcn->pad); 315 if (res < 0) 316 purg_lnk(arcn); 317 318 if (vflag && vfpart) { 319 (void)putc('\n', listf); 320 vfpart = 0; 321 } 322 goto popd; 323 } 324 /* 325 * we have a file with data here. If we can not create it, skip 326 * over the data and purge the name from hard link table 327 */ 328 if ((fd = file_creat(arcn)) < 0) { 329 (void)rd_skip(arcn->skip + arcn->pad); 330 purg_lnk(arcn); 331 goto popd; 332 } 333 /* 334 * extract the file from the archive and skip over padding and 335 * any unprocessed data 336 */ 337 res = (*frmt->rd_data)(arcn, fd, &cnt); 338 file_close(arcn, fd); 339 if (vflag && vfpart) { 340 (void)putc('\n', listf); 341 vfpart = 0; 342 } 343 if (!res) 344 (void)rd_skip(cnt + arcn->pad); 345 346 popd: 347 /* 348 * if required, chdir around. 349 */ 350 if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) 351 if (fchdir(cwdfd) != 0) 352 syswarn(1, errno, 353 "Can't fchdir to starting directory"); 354 } 355 356 /* 357 * all done, restore directory modes and times as required; make sure 358 * all patterns supplied by the user were matched; block off signals 359 * to avoid chance for multiple entry into the cleanup code. 360 */ 361 (void)(*frmt->end_rd)(); 362 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 363 ar_close(); 364 proc_dir(); 365 pat_chk(); 366 } 367 368 /* 369 * wr_archive() 370 * Write an archive. used in both creating a new archive and appends on 371 * previously written archive. 372 */ 373 374 static void 375 wr_archive(ARCHD *arcn, int is_app) 376 { 377 int res; 378 int hlk; 379 int wr_one; 380 off_t cnt; 381 int (*wrf)(ARCHD *); 382 int fd = -1; 383 time_t now; 384 385 /* 386 * if this format supports hard link storage, start up the database 387 * that detects them. 388 */ 389 if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) 390 return; 391 392 /* 393 * if this is not append, and there are no files, we do not write a 394 * trailer 395 */ 396 wr_one = is_app; 397 398 /* 399 * start up the file traversal code and format specific write 400 */ 401 if (ftree_start() < 0) { 402 if (is_app) 403 goto trailer; 404 return; 405 } else if (((*frmt->st_wr)() < 0)) 406 return; 407 408 wrf = frmt->wr; 409 410 /* 411 * When we are doing interactive rename, we store the mapping of names 412 * so we can fix up hard links files later in the archive. 413 */ 414 if (iflag && (name_start() < 0)) 415 return; 416 417 now = time(NULL); 418 419 /* 420 * while there are files to archive, process them one at at time 421 */ 422 while (next_file(arcn) == 0) { 423 /* 424 * check if this file meets user specified options match. 425 */ 426 if (sel_chk(arcn) != 0) 427 continue; 428 fd = -1; 429 if (uflag) { 430 /* 431 * only archive if this file is newer than a file with 432 * the same name that is already stored on the archive 433 */ 434 if ((res = chk_ftime(arcn)) < 0) 435 break; 436 if (res > 0) { 437 ftree_skipped_newer(arcn); 438 continue; 439 } 440 } 441 442 /* 443 * this file is considered selected now. see if this is a hard 444 * link to a file already stored 445 */ 446 ftree_sel(arcn); 447 if (hlk && (chk_lnk(arcn) < 0)) 448 break; 449 450 if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || 451 (arcn->type == PAX_CTG)) { 452 /* 453 * we will have to read this file. by opening it now we 454 * can avoid writing a header to the archive for a file 455 * we were later unable to read (we also purge it from 456 * the link table). 457 */ 458 if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { 459 syswarn(1,errno, "Unable to open %s to read", 460 arcn->org_name); 461 purg_lnk(arcn); 462 continue; 463 } 464 } 465 466 /* 467 * Now modify the name as requested by the user 468 */ 469 if ((res = mod_name(arcn)) < 0) { 470 /* 471 * name modification says to skip this file, close the 472 * file and purge link table entry 473 */ 474 rdfile_close(arcn, &fd); 475 purg_lnk(arcn); 476 break; 477 } 478 479 if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { 480 /* 481 * unable to obtain the crc we need, close the file, 482 * purge link table entry 483 */ 484 rdfile_close(arcn, &fd); 485 purg_lnk(arcn); 486 continue; 487 } 488 489 if (vflag) { 490 if (vflag > 1) 491 ls_list(arcn, now, listf); 492 else { 493 (void)safe_print(arcn->name, listf); 494 vfpart = 1; 495 } 496 } 497 ++flcnt; 498 499 /* 500 * looks safe to store the file, have the format specific 501 * routine write routine store the file header on the archive 502 */ 503 if ((res = (*wrf)(arcn)) < 0) { 504 rdfile_close(arcn, &fd); 505 break; 506 } 507 wr_one = 1; 508 if (res > 0) { 509 /* 510 * format write says no file data needs to be stored 511 * so we are done messing with this file 512 */ 513 if (vflag && vfpart) { 514 (void)putc('\n', listf); 515 vfpart = 0; 516 } 517 rdfile_close(arcn, &fd); 518 continue; 519 } 520 521 /* 522 * Add file data to the archive, quit on write error. if we 523 * cannot write the entire file contents to the archive we 524 * must pad the archive to replace the missing file data 525 * (otherwise during an extract the file header for the file 526 * which FOLLOWS this one will not be where we expect it to 527 * be). 528 */ 529 res = (*frmt->wr_data)(arcn, fd, &cnt); 530 rdfile_close(arcn, &fd); 531 if (vflag && vfpart) { 532 (void)putc('\n', listf); 533 vfpart = 0; 534 } 535 if (res < 0) 536 break; 537 538 /* 539 * pad as required, cnt is number of bytes not written 540 */ 541 if (((cnt > 0) && (wr_skip(cnt) < 0)) || 542 ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) 543 break; 544 } 545 546 trailer: 547 /* 548 * tell format to write trailer; pad to block boundary; reset directory 549 * mode/access times, and check if all patterns supplied by the user 550 * were matched. block off signals to avoid chance for multiple entry 551 * into the cleanup code 552 */ 553 if (wr_one) { 554 (*frmt->end_wr)(); 555 wr_fin(); 556 } 557 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 558 ar_close(); 559 if (tflag) 560 proc_dir(); 561 ftree_chk(); 562 } 563 564 /* 565 * append() 566 * Add file to previously written archive. Archive format specified by the 567 * user must agree with archive. The archive is read first to collect 568 * modification times (if -u) and locate the archive trailer. The archive 569 * is positioned in front of the record with the trailer and wr_archive() 570 * is called to add the new members. 571 * PAX IMPLEMENTATION DETAIL NOTE: 572 * -u is implemented by adding the new members to the end of the archive. 573 * Care is taken so that these do not end up as links to the older 574 * version of the same file already stored in the archive. It is expected 575 * when extraction occurs these newer versions will over-write the older 576 * ones stored "earlier" in the archive (this may be a bad assumption as 577 * it depends on the implementation of the program doing the extraction). 578 * It is really difficult to splice in members without either re-writing 579 * the entire archive (from the point were the old version was), or having 580 * assistance of the format specification in terms of a special update 581 * header that invalidates a previous archive record. The posix spec left 582 * the method used to implement -u unspecified. This pax is able to 583 * over write existing files that it creates. 584 */ 585 586 void 587 append(void) 588 { 589 ARCHD *arcn; 590 int res; 591 ARCHD archd; 592 FSUB *orgfrmt; 593 int udev; 594 off_t tlen; 595 596 arcn = &archd; 597 orgfrmt = frmt; 598 599 /* 600 * Do not allow an append operation if the actual archive is of a 601 * different format than the user specified format. 602 */ 603 if (get_arc() < 0) 604 return; 605 if ((orgfrmt != NULL) && (orgfrmt != frmt)) { 606 paxwarn(1, "Cannot mix current archive format %s with %s", 607 frmt->name, orgfrmt->name); 608 return; 609 } 610 611 /* 612 * pass the format any options and start up format 613 */ 614 if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) 615 return; 616 617 /* 618 * if we only are adding members that are newer, we need to save the 619 * mod times for all files we see. 620 */ 621 if (uflag && (ftime_start() < 0)) 622 return; 623 624 /* 625 * some archive formats encode hard links by recording the device and 626 * file serial number (inode) but copy the file anyway (multiple times) 627 * to the archive. When we append, we run the risk that newly added 628 * files may have the same device and inode numbers as those recorded 629 * on the archive but during a previous run. If this happens, when the 630 * archive is extracted we get INCORRECT hard links. We avoid this by 631 * remapping the device numbers so that newly added files will never 632 * use the same device number as one found on the archive. remapping 633 * allows new members to safely have links among themselves. remapping 634 * also avoids problems with file inode (serial number) truncations 635 * when the inode number is larger than storage space in the archive 636 * header. See the remap routines for more details. 637 */ 638 if ((udev = frmt->udev) && (dev_start() < 0)) 639 return; 640 641 /* 642 * reading the archive may take a long time. If verbose tell the user 643 */ 644 if (vflag) { 645 (void)fprintf(listf, 646 "%s: Reading archive to position at the end...", argv0); 647 vfpart = 1; 648 } 649 650 /* 651 * step through the archive until the format says it is done 652 */ 653 while (next_head(arcn) == 0) { 654 /* 655 * check if this file meets user specified options. 656 */ 657 if (sel_chk(arcn) != 0) { 658 if (rd_skip(arcn->skip + arcn->pad) == 1) 659 break; 660 continue; 661 } 662 663 if (uflag) { 664 /* 665 * see if this is the newest version of this file has 666 * already been seen, if so skip. 667 */ 668 if ((res = chk_ftime(arcn)) < 0) 669 break; 670 if (res > 0) { 671 if (rd_skip(arcn->skip + arcn->pad) == 1) 672 break; 673 continue; 674 } 675 } 676 677 /* 678 * Store this device number. Device numbers seen during the 679 * read phase of append will cause newly appended files with a 680 * device number seen in the old part of the archive to be 681 * remapped to an unused device number. 682 */ 683 if ((udev && (add_dev(arcn) < 0)) || 684 (rd_skip(arcn->skip + arcn->pad) == 1)) 685 break; 686 } 687 688 /* 689 * done, finish up read and get the number of bytes to back up so we 690 * can add new members. The format might have used the hard link table, 691 * purge it. 692 */ 693 tlen = (*frmt->end_rd)(); 694 lnk_end(); 695 696 /* 697 * try to position for write, if this fails quit. if any error occurs, 698 * we will refuse to write 699 */ 700 if (appnd_start(tlen) < 0) 701 return; 702 703 /* 704 * tell the user we are done reading. 705 */ 706 if (vflag && vfpart) { 707 (void)fputs("done.\n", listf); 708 vfpart = 0; 709 } 710 711 /* 712 * go to the writing phase to add the new members 713 */ 714 wr_archive(arcn, 1); 715 } 716 717 /* 718 * archive() 719 * write a new archive 720 */ 721 722 void 723 archive(void) 724 { 725 ARCHD archd; 726 727 /* 728 * if we only are adding members that are newer, we need to save the 729 * mod times for all files; set up for writing; pass the format any 730 * options write the archive 731 */ 732 if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) 733 return; 734 if ((*frmt->options)() < 0) 735 return; 736 737 wr_archive(&archd, 0); 738 } 739 740 /* 741 * copy() 742 * copy files from one part of the file system to another. this does not 743 * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an 744 * archive was written and then extracted in the destination directory 745 * (except the files are forced to be under the destination directory). 746 */ 747 748 void 749 copy(void) 750 { 751 ARCHD *arcn; 752 int res; 753 int fddest; 754 char *dest_pt; 755 int dlen; 756 int drem; 757 int fdsrc = -1; 758 struct stat sb; 759 ARCHD archd; 760 char dirbuf[PAXPATHLEN+1]; 761 762 arcn = &archd; 763 /* 764 * set up the destination dir path and make sure it is a directory. We 765 * make sure we have a trailing / on the destination 766 */ 767 dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf)); 768 if (dlen >= sizeof(dirbuf) || 769 (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) { 770 paxwarn(1, "directory name is too long %s", dirptr); 771 return; 772 } 773 dest_pt = dirbuf + dlen; 774 if (*(dest_pt-1) != '/') { 775 *dest_pt++ = '/'; 776 *dest_pt = '\0'; 777 ++dlen; 778 } 779 drem = PAXPATHLEN - dlen; 780 781 if (stat(dirptr, &sb) < 0) { 782 syswarn(1, errno, "Cannot access destination directory %s", 783 dirptr); 784 return; 785 } 786 if (!S_ISDIR(sb.st_mode)) { 787 paxwarn(1, "Destination is not a directory %s", dirptr); 788 return; 789 } 790 791 /* 792 * start up the hard link table; file traversal routines and the 793 * modification time and access mode database 794 */ 795 if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) 796 return; 797 798 /* 799 * When we are doing interactive rename, we store the mapping of names 800 * so we can fix up hard links files later in the archive. 801 */ 802 if (iflag && (name_start() < 0)) 803 return; 804 805 /* 806 * set up to cp file trees 807 */ 808 cp_start(); 809 810 /* 811 * while there are files to archive, process them 812 */ 813 while (next_file(arcn) == 0) { 814 fdsrc = -1; 815 816 /* 817 * check if this file meets user specified options 818 */ 819 if (sel_chk(arcn) != 0) 820 continue; 821 822 /* 823 * if there is already a file in the destination directory with 824 * the same name and it is newer, skip the one stored on the 825 * archive. 826 * NOTE: this test is done BEFORE name modifications as 827 * specified by pax. this can be confusing to the user who 828 * might expect the test to be done on an existing file AFTER 829 * the name mod. In honesty the pax spec is probably flawed in 830 * this respect 831 */ 832 if (uflag || Dflag) { 833 /* 834 * create the destination name 835 */ 836 if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'), 837 drem + 1) > drem) { 838 paxwarn(1, "Destination pathname too long %s", 839 arcn->name); 840 continue; 841 } 842 843 /* 844 * if existing file is same age or newer skip 845 */ 846 res = lstat(dirbuf, &sb); 847 *dest_pt = '\0'; 848 849 if (res == 0) { 850 ftree_skipped_newer(arcn); 851 if (uflag && Dflag) { 852 if ((arcn->sb.st_mtime<=sb.st_mtime) && 853 (arcn->sb.st_ctime<=sb.st_ctime)) 854 continue; 855 } else if (Dflag) { 856 if (arcn->sb.st_ctime <= sb.st_ctime) 857 continue; 858 } else if (arcn->sb.st_mtime <= sb.st_mtime) 859 continue; 860 } 861 } 862 863 /* 864 * this file is considered selected. See if this is a hard link 865 * to a previous file; modify the name as requested by the 866 * user; set the final destination. 867 */ 868 ftree_sel(arcn); 869 if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0)) 870 break; 871 if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { 872 /* 873 * skip file, purge from link table 874 */ 875 purg_lnk(arcn); 876 continue; 877 } 878 879 /* 880 * Non standard -Y and -Z flag. When the existing file is 881 * same age or newer skip 882 */ 883 if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { 884 if (Yflag && Zflag) { 885 if ((arcn->sb.st_mtime <= sb.st_mtime) && 886 (arcn->sb.st_ctime <= sb.st_ctime)) 887 continue; 888 } else if (Yflag) { 889 if (arcn->sb.st_ctime <= sb.st_ctime) 890 continue; 891 } else if (arcn->sb.st_mtime <= sb.st_mtime) 892 continue; 893 } 894 895 if (vflag) { 896 (void)safe_print(arcn->name, listf); 897 vfpart = 1; 898 } 899 ++flcnt; 900 901 /* 902 * try to create a hard link to the src file if requested 903 * but make sure we are not trying to overwrite ourselves. 904 */ 905 if (lflag) 906 res = cross_lnk(arcn); 907 else 908 res = chk_same(arcn); 909 if (res <= 0) { 910 if (vflag && vfpart) { 911 (void)putc('\n', listf); 912 vfpart = 0; 913 } 914 continue; 915 } 916 917 /* 918 * have to create a new file 919 */ 920 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 921 /* 922 * create a link or special file 923 */ 924 if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) 925 res = lnk_creat(arcn); 926 else 927 res = node_creat(arcn); 928 if (res < 0) 929 purg_lnk(arcn); 930 if (vflag && vfpart) { 931 (void)putc('\n', listf); 932 vfpart = 0; 933 } 934 continue; 935 } 936 937 /* 938 * have to copy a regular file to the destination directory. 939 * first open source file and then create the destination file 940 */ 941 if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { 942 syswarn(1, errno, "Unable to open %s to read", 943 arcn->org_name); 944 purg_lnk(arcn); 945 continue; 946 } 947 if ((fddest = file_creat(arcn)) < 0) { 948 rdfile_close(arcn, &fdsrc); 949 purg_lnk(arcn); 950 continue; 951 } 952 953 /* 954 * copy source file data to the destination file 955 */ 956 cp_file(arcn, fdsrc, fddest); 957 file_close(arcn, fddest); 958 rdfile_close(arcn, &fdsrc); 959 960 if (vflag && vfpart) { 961 (void)putc('\n', listf); 962 vfpart = 0; 963 } 964 } 965 966 /* 967 * restore directory modes and times as required; make sure all 968 * patterns were selected block off signals to avoid chance for 969 * multiple entry into the cleanup code. 970 */ 971 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 972 ar_close(); 973 proc_dir(); 974 ftree_chk(); 975 } 976 977 /* 978 * next_head() 979 * try to find a valid header in the archive. Uses format specific 980 * routines to extract the header and id the trailer. Trailers may be 981 * located within a valid header or in an invalid header (the location 982 * is format specific. The inhead field from the option table tells us 983 * where to look for the trailer). 984 * We keep reading (and resyncing) until we get enough contiguous data 985 * to check for a header. If we cannot find one, we shift by a byte 986 * add a new byte from the archive to the end of the buffer and try again. 987 * If we get a read error, we throw out what we have (as we must have 988 * contiguous data) and start over again. 989 * ASSUMED: headers fit within a BLKMULT header. 990 * Return: 991 * 0 if we got a header, -1 if we are unable to ever find another one 992 * (we reached the end of input, or we reached the limit on retries. see 993 * the specs for rd_wrbuf() for more details) 994 */ 995 996 static int 997 next_head(ARCHD *arcn) 998 { 999 int ret; 1000 char *hdend; 1001 int res; 1002 int shftsz; 1003 int hsz; 1004 int in_resync = 0; /* set when we are in resync mode */ 1005 int cnt = 0; /* counter for trailer function */ 1006 int first = 1; /* on 1st read, EOF isn't premature. */ 1007 1008 /* 1009 * set up initial conditions, we want a whole frmt->hsz block as we 1010 * have no data yet. 1011 */ 1012 res = hsz = frmt->hsz; 1013 hdend = hdbuf; 1014 shftsz = hsz - 1; 1015 for (;;) { 1016 /* 1017 * keep looping until we get a contiguous FULL buffer 1018 * (frmt->hsz is the proper size) 1019 */ 1020 for (;;) { 1021 if ((ret = rd_wrbuf(hdend, res)) == res) 1022 break; 1023 1024 /* 1025 * If we read 0 bytes (EOF) from an archive when we 1026 * expect to find a header, we have stepped upon 1027 * an archive without the customary block of zeroes 1028 * end marker. It's just stupid to error out on 1029 * them, so exit gracefully. 1030 */ 1031 if (first && ret == 0) 1032 return(-1); 1033 first = 0; 1034 1035 /* 1036 * some kind of archive read problem, try to resync the 1037 * storage device, better give the user the bad news. 1038 */ 1039 if ((ret == 0) || (rd_sync() < 0)) { 1040 paxwarn(1,"Premature end of file on archive read"); 1041 return(-1); 1042 } 1043 if (!in_resync) { 1044 if (act == APPND) { 1045 paxwarn(1, 1046 "Archive I/O error, cannot continue"); 1047 return(-1); 1048 } 1049 paxwarn(1,"Archive I/O error. Trying to recover."); 1050 ++in_resync; 1051 } 1052 1053 /* 1054 * oh well, throw it all out and start over 1055 */ 1056 res = hsz; 1057 hdend = hdbuf; 1058 } 1059 1060 /* 1061 * ok we have a contiguous buffer of the right size. Call the 1062 * format read routine. If this was not a valid header and this 1063 * format stores trailers outside of the header, call the 1064 * format specific trailer routine to check for a trailer. We 1065 * have to watch out that we do not mis-identify file data or 1066 * block padding as a header or trailer. Format specific 1067 * trailer functions must NOT check for the trailer while we 1068 * are running in resync mode. Some trailer functions may tell 1069 * us that this block cannot contain a valid header either, so 1070 * we then throw out the entire block and start over. 1071 */ 1072 if ((*frmt->rd)(arcn, hdbuf) == 0) 1073 break; 1074 1075 if (!frmt->inhead) { 1076 /* 1077 * this format has trailers outside of valid headers 1078 */ 1079 if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){ 1080 /* 1081 * valid trailer found, drain input as required 1082 */ 1083 ar_drain(); 1084 return(-1); 1085 } 1086 1087 if (ret == 1) { 1088 /* 1089 * we are in resync and we were told to throw 1090 * the whole block out because none of the 1091 * bytes in this block can be used to form a 1092 * valid header 1093 */ 1094 res = hsz; 1095 hdend = hdbuf; 1096 continue; 1097 } 1098 } 1099 1100 /* 1101 * Brute force section. 1102 * not a valid header. We may be able to find a header yet. So 1103 * we shift over by one byte, and set up to read one byte at a 1104 * time from the archive and place it at the end of the buffer. 1105 * We will keep moving byte at a time until we find a header or 1106 * get a read error and have to start over. 1107 */ 1108 if (!in_resync) { 1109 if (act == APPND) { 1110 paxwarn(1,"Unable to append, archive header flaw"); 1111 return(-1); 1112 } 1113 paxwarn(1,"Invalid header, starting valid header search."); 1114 ++in_resync; 1115 } 1116 memmove(hdbuf, hdbuf+1, shftsz); 1117 res = 1; 1118 hdend = hdbuf + shftsz; 1119 } 1120 1121 /* 1122 * ok got a valid header, check for trailer if format encodes it in the 1123 * the header. NOTE: the parameters are different than trailer routines 1124 * which encode trailers outside of the header! 1125 */ 1126 if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) { 1127 /* 1128 * valid trailer found, drain input as required 1129 */ 1130 ar_drain(); 1131 return(-1); 1132 } 1133 1134 ++flcnt; 1135 return(0); 1136 } 1137 1138 /* 1139 * get_arc() 1140 * Figure out what format an archive is. Handles archive with flaws by 1141 * brute force searches for a legal header in any supported format. The 1142 * format id routines have to be careful to NOT mis-identify a format. 1143 * ASSUMED: headers fit within a BLKMULT header. 1144 * Return: 1145 * 0 if archive found -1 otherwise 1146 */ 1147 1148 static int 1149 get_arc(void) 1150 { 1151 int i; 1152 int hdsz = 0; 1153 int res; 1154 int minhd = BLKMULT; 1155 char *hdend; 1156 int notice = 0; 1157 1158 /* 1159 * find the smallest header size in all archive formats and then set up 1160 * to read the archive. 1161 */ 1162 for (i = 0; ford[i] >= 0; ++i) { 1163 if (fsub[ford[i]].hsz < minhd) 1164 minhd = fsub[ford[i]].hsz; 1165 } 1166 if (rd_start() < 0) 1167 return(-1); 1168 res = BLKMULT; 1169 hdsz = 0; 1170 hdend = hdbuf; 1171 for (;;) { 1172 for (;;) { 1173 /* 1174 * fill the buffer with at least the smallest header 1175 */ 1176 i = rd_wrbuf(hdend, res); 1177 if (i > 0) 1178 hdsz += i; 1179 if (hdsz >= minhd) 1180 break; 1181 1182 /* 1183 * if we cannot recover from a read error quit 1184 */ 1185 if ((i == 0) || (rd_sync() < 0)) 1186 goto out; 1187 1188 /* 1189 * when we get an error none of the data we already 1190 * have can be used to create a legal header (we just 1191 * got an error in the middle), so we throw it all out 1192 * and refill the buffer with fresh data. 1193 */ 1194 res = BLKMULT; 1195 hdsz = 0; 1196 hdend = hdbuf; 1197 if (!notice) { 1198 if (act == APPND) 1199 return(-1); 1200 paxwarn(1,"Cannot identify format. Searching..."); 1201 ++notice; 1202 } 1203 } 1204 1205 /* 1206 * we have at least the size of the smallest header in any 1207 * archive format. Look to see if we have a match. The array 1208 * ford[] is used to specify the header id order to reduce the 1209 * chance of incorrectly id'ing a valid header (some formats 1210 * may be subsets of each other and the order would then be 1211 * important). 1212 */ 1213 for (i = 0; ford[i] >= 0; ++i) { 1214 if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) 1215 continue; 1216 frmt = &(fsub[ford[i]]); 1217 /* 1218 * yuck, to avoid slow special case code in the extract 1219 * routines, just push this header back as if it was 1220 * not seen. We have left extra space at start of the 1221 * buffer for this purpose. This is a bit ugly, but 1222 * adding all the special case code is far worse. 1223 */ 1224 pback(hdbuf, hdsz); 1225 return(0); 1226 } 1227 1228 /* 1229 * We have a flawed archive, no match. we start searching, but 1230 * we never allow additions to flawed archives 1231 */ 1232 if (!notice) { 1233 if (act == APPND) 1234 return(-1); 1235 paxwarn(1, "Cannot identify format. Searching..."); 1236 ++notice; 1237 } 1238 1239 /* 1240 * brute force search for a header that we can id. 1241 * we shift through byte at a time. this is slow, but we cannot 1242 * determine the nature of the flaw in the archive in a 1243 * portable manner 1244 */ 1245 if (--hdsz > 0) { 1246 memmove(hdbuf, hdbuf+1, hdsz); 1247 res = BLKMULT - hdsz; 1248 hdend = hdbuf + hdsz; 1249 } else { 1250 res = BLKMULT; 1251 hdend = hdbuf; 1252 hdsz = 0; 1253 } 1254 } 1255 1256 out: 1257 /* 1258 * we cannot find a header, bow, apologize and quit 1259 */ 1260 paxwarn(1, "Sorry, unable to determine archive format."); 1261 return(-1); 1262 } 1263