1 /* $OpenBSD: tables.c,v 1.47 2015/03/19 05:14:24 guenther Exp $ */ 2 /* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1992 Keith Muller. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Keith Muller of the University of California, San Diego. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/time.h> 39 #include <sys/stat.h> 40 #include <fcntl.h> 41 #include <limits.h> 42 #include <signal.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <unistd.h> 46 #include <errno.h> 47 #include <stdlib.h> 48 #include "pax.h" 49 #include "tables.h" 50 #include "extern.h" 51 52 /* 53 * Routines for controlling the contents of all the different databases pax 54 * keeps. Tables are dynamically created only when they are needed. The 55 * goal was speed and the ability to work with HUGE archives. The databases 56 * were kept simple, but do have complex rules for when the contents change. 57 * As of this writing, the posix library functions were more complex than 58 * needed for this application (pax databases have very short lifetimes and 59 * do not survive after pax is finished). Pax is required to handle very 60 * large archives. These database routines carefully combine memory usage and 61 * temporary file storage in ways which will not significantly impact runtime 62 * performance while allowing the largest possible archives to be handled. 63 * Trying to force the fit to the posix database routines was not considered 64 * time well spent. 65 */ 66 67 static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ 68 static FTM **ftab = NULL; /* file time table for updating arch */ 69 static NAMT **ntab = NULL; /* interactive rename storage table */ 70 static DEVT **dtab = NULL; /* device/inode mapping tables */ 71 static ATDIR **atab = NULL; /* file tree directory time reset table */ 72 static DIRDATA *dirp = NULL; /* storage for setting created dir time/mode */ 73 static size_t dirsize; /* size of dirp table */ 74 static size_t dircnt = 0; /* entries in dir time/mode storage */ 75 static int ffd = -1; /* tmp file for file time table name storage */ 76 77 /* 78 * hard link table routines 79 * 80 * The hard link table tries to detect hard links to files using the device and 81 * inode values. We do this when writing an archive, so we can tell the format 82 * write routine that this file is a hard link to another file. The format 83 * write routine then can store this file in whatever way it wants (as a hard 84 * link if the format supports that like tar, or ignore this info like cpio). 85 * (Actually a field in the format driver table tells us if the format wants 86 * hard link info. if not, we do not waste time looking for them). We also use 87 * the same table when reading an archive. In that situation, this table is 88 * used by the format read routine to detect hard links from stored dev and 89 * inode numbers (like cpio). This will allow pax to create a link when one 90 * can be detected by the archive format. 91 */ 92 93 /* 94 * lnk_start 95 * Creates the hard link table. 96 * Return: 97 * 0 if created, -1 if failure 98 */ 99 100 int 101 lnk_start(void) 102 { 103 if (ltab != NULL) 104 return(0); 105 if ((ltab = calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { 106 paxwarn(1, "Cannot allocate memory for hard link table"); 107 return(-1); 108 } 109 return(0); 110 } 111 112 /* 113 * chk_lnk() 114 * Looks up entry in hard link hash table. If found, it copies the name 115 * of the file it is linked to (we already saw that file) into ln_name. 116 * lnkcnt is decremented and if goes to 1 the node is deleted from the 117 * database. (We have seen all the links to this file). If not found, 118 * we add the file to the database if it has the potential for having 119 * hard links to other files we may process (it has a link count > 1) 120 * Return: 121 * if found returns 1; if not found returns 0; -1 on error 122 */ 123 124 int 125 chk_lnk(ARCHD *arcn) 126 { 127 HRDLNK *pt; 128 HRDLNK **ppt; 129 u_int indx; 130 131 if (ltab == NULL) 132 return(-1); 133 /* 134 * ignore those nodes that cannot have hard links 135 */ 136 if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) 137 return(0); 138 139 /* 140 * hash inode number and look for this file 141 */ 142 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; 143 if ((pt = ltab[indx]) != NULL) { 144 /* 145 * its hash chain in not empty, walk down looking for it 146 */ 147 ppt = &(ltab[indx]); 148 while (pt != NULL) { 149 if ((pt->ino == arcn->sb.st_ino) && 150 (pt->dev == arcn->sb.st_dev)) 151 break; 152 ppt = &(pt->fow); 153 pt = pt->fow; 154 } 155 156 if (pt != NULL) { 157 /* 158 * found a link. set the node type and copy in the 159 * name of the file it is to link to. we need to 160 * handle hardlinks to regular files differently than 161 * other links. 162 */ 163 arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name, 164 sizeof(arcn->ln_name)); 165 /* XXX truncate? */ 166 if (arcn->nlen >= sizeof(arcn->name)) 167 arcn->nlen = sizeof(arcn->name) - 1; 168 if (arcn->type == PAX_REG) 169 arcn->type = PAX_HRG; 170 else 171 arcn->type = PAX_HLK; 172 173 /* 174 * if we have found all the links to this file, remove 175 * it from the database 176 */ 177 if (--pt->nlink <= 1) { 178 *ppt = pt->fow; 179 free(pt->name); 180 free(pt); 181 } 182 return(1); 183 } 184 } 185 186 /* 187 * we never saw this file before. It has links so we add it to the 188 * front of this hash chain 189 */ 190 if ((pt = malloc(sizeof(HRDLNK))) != NULL) { 191 if ((pt->name = strdup(arcn->name)) != NULL) { 192 pt->dev = arcn->sb.st_dev; 193 pt->ino = arcn->sb.st_ino; 194 pt->nlink = arcn->sb.st_nlink; 195 pt->fow = ltab[indx]; 196 ltab[indx] = pt; 197 return(0); 198 } 199 free(pt); 200 } 201 202 paxwarn(1, "Hard link table out of memory"); 203 return(-1); 204 } 205 206 /* 207 * purg_lnk 208 * remove reference for a file that we may have added to the data base as 209 * a potential source for hard links. We ended up not using the file, so 210 * we do not want to accidently point another file at it later on. 211 */ 212 213 void 214 purg_lnk(ARCHD *arcn) 215 { 216 HRDLNK *pt; 217 HRDLNK **ppt; 218 u_int indx; 219 220 if (ltab == NULL) 221 return; 222 /* 223 * do not bother to look if it could not be in the database 224 */ 225 if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || 226 PAX_IS_HARDLINK(arcn->type)) 227 return; 228 229 /* 230 * find the hash chain for this inode value, if empty return 231 */ 232 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; 233 if ((pt = ltab[indx]) == NULL) 234 return; 235 236 /* 237 * walk down the list looking for the inode/dev pair, unlink and 238 * free if found 239 */ 240 ppt = &(ltab[indx]); 241 while (pt != NULL) { 242 if ((pt->ino == arcn->sb.st_ino) && 243 (pt->dev == arcn->sb.st_dev)) 244 break; 245 ppt = &(pt->fow); 246 pt = pt->fow; 247 } 248 if (pt == NULL) 249 return; 250 251 /* 252 * remove and free it 253 */ 254 *ppt = pt->fow; 255 free(pt->name); 256 free(pt); 257 } 258 259 /* 260 * lnk_end() 261 * pull apart a existing link table so we can reuse it. We do this between 262 * read and write phases of append with update. (The format may have 263 * used the link table, and we need to start with a fresh table for the 264 * write phase 265 */ 266 267 void 268 lnk_end(void) 269 { 270 int i; 271 HRDLNK *pt; 272 HRDLNK *ppt; 273 274 if (ltab == NULL) 275 return; 276 277 for (i = 0; i < L_TAB_SZ; ++i) { 278 if (ltab[i] == NULL) 279 continue; 280 pt = ltab[i]; 281 ltab[i] = NULL; 282 283 /* 284 * free up each entry on this chain 285 */ 286 while (pt != NULL) { 287 ppt = pt; 288 pt = ppt->fow; 289 free(ppt->name); 290 free(ppt); 291 } 292 } 293 } 294 295 /* 296 * modification time table routines 297 * 298 * The modification time table keeps track of last modification times for all 299 * files stored in an archive during a write phase when -u is set. We only 300 * add a file to the archive if it is newer than a file with the same name 301 * already stored on the archive (if there is no other file with the same 302 * name on the archive it is added). This applies to writes and appends. 303 * An append with an -u must read the archive and store the modification time 304 * for every file on that archive before starting the write phase. It is clear 305 * that this is one HUGE database. To save memory space, the actual file names 306 * are stored in a scratch file and indexed by an in-memory hash table. The 307 * hash table is indexed by hashing the file path. The nodes in the table store 308 * the length of the filename and the lseek offset within the scratch file 309 * where the actual name is stored. Since there are never any deletions from 310 * this table, fragmentation of the scratch file is never a issue. Lookups 311 * seem to not exhibit any locality at all (files in the database are rarely 312 * looked up more than once...), so caching is just a waste of memory. The 313 * only limitation is the amount of scratch file space available to store the 314 * path names. 315 */ 316 317 /* 318 * ftime_start() 319 * create the file time hash table and open for read/write the scratch 320 * file. (after created it is unlinked, so when we exit we leave 321 * no witnesses). 322 * Return: 323 * 0 if the table and file was created ok, -1 otherwise 324 */ 325 326 int 327 ftime_start(void) 328 { 329 330 if (ftab != NULL) 331 return(0); 332 if ((ftab = calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { 333 paxwarn(1, "Cannot allocate memory for file time table"); 334 return(-1); 335 } 336 337 /* 338 * get random name and create temporary scratch file, unlink name 339 * so it will get removed on exit 340 */ 341 memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); 342 if ((ffd = mkstemp(tempfile)) < 0) { 343 syswarn(1, errno, "Unable to create temporary file: %s", 344 tempfile); 345 return(-1); 346 } 347 (void)unlink(tempfile); 348 349 return(0); 350 } 351 352 /* 353 * chk_ftime() 354 * looks up entry in file time hash table. If not found, the file is 355 * added to the hash table and the file named stored in the scratch file. 356 * If a file with the same name is found, the file times are compared and 357 * the most recent file time is retained. If the new file was younger (or 358 * was not in the database) the new file is selected for storage. 359 * Return: 360 * 0 if file should be added to the archive, 1 if it should be skipped, 361 * -1 on error 362 */ 363 364 int 365 chk_ftime(ARCHD *arcn) 366 { 367 FTM *pt; 368 int namelen; 369 u_int indx; 370 char ckname[PAXPATHLEN+1]; 371 372 /* 373 * no info, go ahead and add to archive 374 */ 375 if (ftab == NULL) 376 return(0); 377 378 /* 379 * hash the pathname and look up in table 380 */ 381 namelen = arcn->nlen; 382 indx = st_hash(arcn->name, namelen, F_TAB_SZ); 383 if ((pt = ftab[indx]) != NULL) { 384 /* 385 * the hash chain is not empty, walk down looking for match 386 * only read up the path names if the lengths match, speeds 387 * up the search a lot 388 */ 389 while (pt != NULL) { 390 if (pt->namelen == namelen) { 391 /* 392 * potential match, have to read the name 393 * from the scratch file. 394 */ 395 if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { 396 syswarn(1, errno, 397 "Failed ftime table seek"); 398 return(-1); 399 } 400 if (read(ffd, ckname, namelen) != namelen) { 401 syswarn(1, errno, 402 "Failed ftime table read"); 403 return(-1); 404 } 405 406 /* 407 * if the names match, we are done 408 */ 409 if (!strncmp(ckname, arcn->name, namelen)) 410 break; 411 } 412 413 /* 414 * try the next entry on the chain 415 */ 416 pt = pt->fow; 417 } 418 419 if (pt != NULL) { 420 /* 421 * found the file, compare the times, save the newer 422 */ 423 if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)) { 424 /* 425 * file is newer 426 */ 427 pt->mtim = arcn->sb.st_mtim; 428 return(0); 429 } 430 /* 431 * file is older 432 */ 433 return(1); 434 } 435 } 436 437 /* 438 * not in table, add it 439 */ 440 if ((pt = malloc(sizeof(FTM))) != NULL) { 441 /* 442 * add the name at the end of the scratch file, saving the 443 * offset. add the file to the head of the hash chain 444 */ 445 if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { 446 if (write(ffd, arcn->name, namelen) == namelen) { 447 pt->mtim = arcn->sb.st_mtim; 448 pt->namelen = namelen; 449 pt->fow = ftab[indx]; 450 ftab[indx] = pt; 451 return(0); 452 } 453 syswarn(1, errno, "Failed write to file time table"); 454 } else 455 syswarn(1, errno, "Failed seek on file time table"); 456 } else 457 paxwarn(1, "File time table ran out of memory"); 458 459 if (pt != NULL) 460 free(pt); 461 return(-1); 462 } 463 464 /* 465 * escaping (absolute or w/"..") symlink table routines 466 * 467 * By default, an archive shouldn't be able extract to outside of the 468 * current directory. What should we do if the archive contains a symlink 469 * whose value is either absolute or contains ".." components? What we'll 470 * do is initially create the path as an empty file (to block attempts to 471 * reference _through_ it) and instead record its path and desired 472 * final value and mode. Then once all the other archive 473 * members are created (but before the pass to set timestamps on 474 * directories) we'll process those records, replacing the placeholder with 475 * the correct symlink and setting them to the correct mode, owner, group, 476 * and timestamps. 477 * 478 * Note: we also need to handle hardlinks to symlinks (barf) as well as 479 * hardlinks whose target is replaced by a later entry in the archive (barf^2). 480 * 481 * So we track things by dev+ino of the placeholder file, associating with 482 * that the value and mode of the final symlink and a list of paths that 483 * should all be hardlinks of that. We'll 'store' the symlink's desired 484 * timestamps, owner, and group by setting them on the placeholder file. 485 * 486 * The operations are: 487 * a) create an escaping symlink: create the placeholder file and add an entry 488 * for the new link 489 * b) create a hardlink: do the link. If the target turns out to be a 490 * zero-length file whose dev+ino are in the symlink table, then add this 491 * path to the list of names for that link 492 * c) perform deferred processing: for each entry, check each associated path: 493 * if it's a zero-length file with the correct dev+ino then recreate it as 494 * the specified symlink or hardlink to the first such 495 */ 496 497 struct slpath { 498 char *sp_path; 499 struct slpath *sp_next; 500 }; 501 struct slinode { 502 ino_t sli_ino; 503 char *sli_value; 504 struct slpath sli_paths; 505 struct slinode *sli_fow; /* hash table chain */ 506 dev_t sli_dev; 507 mode_t sli_mode; 508 }; 509 510 static struct slinode **slitab = NULL; 511 512 /* 513 * sltab_start() 514 * create the hash table 515 * Return: 516 * 0 if the table and file was created ok, -1 otherwise 517 */ 518 519 int 520 sltab_start(void) 521 { 522 523 if ((slitab = calloc(SL_TAB_SZ, sizeof *slitab)) == NULL) { 524 syswarn(1, errno, "symlink table"); 525 return(-1); 526 } 527 528 return(0); 529 } 530 531 /* 532 * sltab_add_sym() 533 * Create the placeholder and tracking info for an escaping symlink. 534 * Return: 535 * 0 on success, -1 otherwise 536 */ 537 538 int 539 sltab_add_sym(const char *path0, const char *value0, mode_t mode) 540 { 541 struct stat sb; 542 struct slinode *s; 543 struct slpath *p; 544 char *path, *value; 545 u_int indx; 546 int fd; 547 548 /* create the placeholder */ 549 fd = open(path0, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0600); 550 if (fd == -1) 551 return (-1); 552 if (fstat(fd, &sb) == -1) { 553 unlink(path0); 554 close(fd); 555 return (-1); 556 } 557 close(fd); 558 559 if (havechd && *path0 != '/') { 560 if ((path = realpath(path0, NULL)) == NULL) { 561 syswarn(1, errno, "Cannot canonicalize %s", path0); 562 unlink(path0); 563 return (-1); 564 } 565 } else if ((path = strdup(path0)) == NULL) { 566 syswarn(1, errno, "defered symlink path"); 567 unlink(path0); 568 return (-1); 569 } 570 if ((value = strdup(value0)) == NULL) { 571 syswarn(1, errno, "defered symlink value"); 572 unlink(path); 573 free(path); 574 return (-1); 575 } 576 577 /* now check the hash table for conflicting entry */ 578 indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ; 579 for (s = slitab[indx]; s != NULL; s = s->sli_fow) { 580 if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev) 581 continue; 582 583 /* 584 * One of our placeholders got removed behind our back and 585 * we've reused the inode. Weird, but clean up the mess. 586 */ 587 free(s->sli_value); 588 free(s->sli_paths.sp_path); 589 p = s->sli_paths.sp_next; 590 while (p != NULL) { 591 struct slpath *next_p = p->sp_next; 592 593 free(p->sp_path); 594 free(p); 595 p = next_p; 596 } 597 goto set_value; 598 } 599 600 /* Normal case: create a new node */ 601 if ((s = malloc(sizeof *s)) == NULL) { 602 syswarn(1, errno, "defered symlink"); 603 unlink(path); 604 free(path); 605 free(value); 606 return (-1); 607 } 608 s->sli_ino = sb.st_ino; 609 s->sli_dev = sb.st_dev; 610 s->sli_fow = slitab[indx]; 611 slitab[indx] = s; 612 613 set_value: 614 s->sli_paths.sp_path = path; 615 s->sli_paths.sp_next = NULL; 616 s->sli_value = value; 617 s->sli_mode = mode; 618 return (0); 619 } 620 621 /* 622 * sltab_add_link() 623 * A hardlink was created; if it looks like a placeholder, handle the 624 * tracking. 625 * Return: 626 * 0 if things are ok, -1 if something went wrong 627 */ 628 629 int 630 sltab_add_link(const char *path, const struct stat *sb) 631 { 632 struct slinode *s; 633 struct slpath *p; 634 u_int indx; 635 636 if (!S_ISREG(sb->st_mode) || sb->st_size != 0) 637 return (1); 638 639 /* find the hash table entry for this hardlink */ 640 indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ; 641 for (s = slitab[indx]; s != NULL; s = s->sli_fow) { 642 if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev) 643 continue; 644 645 if ((p = malloc(sizeof *p)) == NULL) { 646 syswarn(1, errno, "deferred symlink hardlink"); 647 return (-1); 648 } 649 if (havechd && *path != '/') { 650 if ((p->sp_path = realpath(path, NULL)) == NULL) { 651 syswarn(1, errno, "Cannot canonicalize %s", 652 path); 653 free(p); 654 return (-1); 655 } 656 } else if ((p->sp_path = strdup(path)) == NULL) { 657 syswarn(1, errno, "defered symlink hardlink path"); 658 free(p); 659 return (-1); 660 } 661 662 /* link it in */ 663 p->sp_next = s->sli_paths.sp_next; 664 s->sli_paths.sp_next = p; 665 return (0); 666 } 667 668 /* not found */ 669 return (1); 670 } 671 672 673 static int 674 sltab_process_one(struct slinode *s, struct slpath *p, const char *first, 675 int in_sig) 676 { 677 struct stat sb; 678 char *path = p->sp_path; 679 mode_t mode; 680 int err; 681 682 /* 683 * is it the expected placeholder? This can fail legimately 684 * if the archive overwrote the link with another, later entry, 685 * so don't warn. 686 */ 687 if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode) || sb.st_size != 0 || 688 sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev) 689 return (0); 690 691 if (unlink(path) && errno != ENOENT) { 692 if (!in_sig) 693 syswarn(1, errno, "deferred symlink removal"); 694 return (0); 695 } 696 697 err = 0; 698 if (first != NULL) { 699 /* add another hardlink to the existing symlink */ 700 if (linkat(AT_FDCWD, first, AT_FDCWD, path, 0) == 0) 701 return (0); 702 703 /* 704 * Couldn't hardlink the symlink for some reason, so we'll 705 * try creating it as its own symlink, but save the error 706 * for reporting if that fails. 707 */ 708 err = errno; 709 } 710 711 if (symlink(s->sli_value, path)) { 712 if (!in_sig) { 713 const char *qualifier = ""; 714 if (err) 715 qualifier = " hardlink"; 716 else 717 err = errno; 718 719 syswarn(1, err, "deferred symlink%s: %s", 720 qualifier, path); 721 } 722 return (0); 723 } 724 725 /* success, so set the id, mode, and times */ 726 mode = s->sli_mode; 727 if (pids) { 728 /* if can't set the ids, force the set[ug]id bits off */ 729 if (set_ids(path, sb.st_uid, sb.st_gid)) 730 mode &= ~(SETBITS); 731 } 732 733 if (pmode) 734 set_pmode(path, mode); 735 736 if (patime || pmtime) 737 set_ftime(path, &sb.st_mtim, &sb.st_atim, 0); 738 739 /* 740 * If we tried to link to first but failed, then this new symlink 741 * might be a better one to try in the future. Guess from the errno. 742 */ 743 if (err == 0 || err == ENOENT || err == EMLINK || err == EOPNOTSUPP) 744 return (1); 745 return (0); 746 } 747 748 /* 749 * sltab_process() 750 * Do all the delayed process for escape symlinks 751 */ 752 753 void 754 sltab_process(int in_sig) 755 { 756 struct slinode *s; 757 struct slpath *p; 758 char *first; 759 u_int indx; 760 761 if (slitab == NULL) 762 return; 763 764 /* walk across the entire hash table */ 765 for (indx = 0; indx < SL_TAB_SZ; indx++) { 766 while ((s = slitab[indx]) != NULL) { 767 /* pop this entry */ 768 slitab[indx] = s->sli_fow; 769 770 first = NULL; 771 p = &s->sli_paths; 772 while (1) { 773 struct slpath *next_p; 774 775 if (sltab_process_one(s, p, first, in_sig)) { 776 if (!in_sig) 777 free(first); 778 first = p->sp_path; 779 } else if (!in_sig) 780 free(p->sp_path); 781 782 if ((next_p = p->sp_next) == NULL) 783 break; 784 *p = *next_p; 785 if (!in_sig) 786 free(next_p); 787 } 788 if (!in_sig) { 789 free(first); 790 free(s->sli_value); 791 free(s); 792 } 793 } 794 } 795 if (!in_sig) 796 free(slitab); 797 slitab = NULL; 798 } 799 800 801 /* 802 * Interactive rename table routines 803 * 804 * The interactive rename table keeps track of the new names that the user 805 * assigns to files from tty input. Since this map is unique for each file 806 * we must store it in case there is a reference to the file later in archive 807 * (a link). Otherwise we will be unable to find the file we know was 808 * extracted. The remapping of these files is stored in a memory based hash 809 * table (it is assumed since input must come from /dev/tty, it is unlikely to 810 * be a very large table). 811 */ 812 813 /* 814 * name_start() 815 * create the interactive rename table 816 * Return: 817 * 0 if successful, -1 otherwise 818 */ 819 820 int 821 name_start(void) 822 { 823 if (ntab != NULL) 824 return(0); 825 if ((ntab = calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { 826 paxwarn(1, "Cannot allocate memory for interactive rename table"); 827 return(-1); 828 } 829 return(0); 830 } 831 832 /* 833 * add_name() 834 * add the new name to old name mapping just created by the user. 835 * If an old name mapping is found (there may be duplicate names on an 836 * archive) only the most recent is kept. 837 * Return: 838 * 0 if added, -1 otherwise 839 */ 840 841 int 842 add_name(char *oname, int onamelen, char *nname) 843 { 844 NAMT *pt; 845 u_int indx; 846 847 if (ntab == NULL) { 848 /* 849 * should never happen 850 */ 851 paxwarn(0, "No interactive rename table, links may fail"); 852 return(0); 853 } 854 855 /* 856 * look to see if we have already mapped this file, if so we 857 * will update it 858 */ 859 indx = st_hash(oname, onamelen, N_TAB_SZ); 860 if ((pt = ntab[indx]) != NULL) { 861 /* 862 * look down the has chain for the file 863 */ 864 while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) 865 pt = pt->fow; 866 867 if (pt != NULL) { 868 /* 869 * found an old mapping, replace it with the new one 870 * the user just input (if it is different) 871 */ 872 if (strcmp(nname, pt->nname) == 0) 873 return(0); 874 875 free(pt->nname); 876 if ((pt->nname = strdup(nname)) == NULL) { 877 paxwarn(1, "Cannot update rename table"); 878 return(-1); 879 } 880 return(0); 881 } 882 } 883 884 /* 885 * this is a new mapping, add it to the table 886 */ 887 if ((pt = malloc(sizeof(NAMT))) != NULL) { 888 if ((pt->oname = strdup(oname)) != NULL) { 889 if ((pt->nname = strdup(nname)) != NULL) { 890 pt->fow = ntab[indx]; 891 ntab[indx] = pt; 892 return(0); 893 } 894 free(pt->oname); 895 } 896 free(pt); 897 } 898 paxwarn(1, "Interactive rename table out of memory"); 899 return(-1); 900 } 901 902 /* 903 * sub_name() 904 * look up a link name to see if it points at a file that has been 905 * remapped by the user. If found, the link is adjusted to contain the 906 * new name (oname is the link to name) 907 */ 908 909 void 910 sub_name(char *oname, int *onamelen, size_t onamesize) 911 { 912 NAMT *pt; 913 u_int indx; 914 915 if (ntab == NULL) 916 return; 917 /* 918 * look the name up in the hash table 919 */ 920 indx = st_hash(oname, *onamelen, N_TAB_SZ); 921 if ((pt = ntab[indx]) == NULL) 922 return; 923 924 while (pt != NULL) { 925 /* 926 * walk down the hash chain looking for a match 927 */ 928 if (strcmp(oname, pt->oname) == 0) { 929 /* 930 * found it, replace it with the new name 931 * and return (we know that oname has enough space) 932 */ 933 *onamelen = strlcpy(oname, pt->nname, onamesize); 934 if (*onamelen >= onamesize) 935 *onamelen = onamesize - 1; /* XXX truncate? */ 936 return; 937 } 938 pt = pt->fow; 939 } 940 941 /* 942 * no match, just return 943 */ 944 } 945 946 #ifndef NOCPIO 947 /* 948 * device/inode mapping table routines 949 * (used with formats that store device and inodes fields) 950 * 951 * device/inode mapping tables remap the device field in a archive header. The 952 * device/inode fields are used to determine when files are hard links to each 953 * other. However these values have very little meaning outside of that. This 954 * database is used to solve one of two different problems. 955 * 956 * 1) when files are appended to an archive, while the new files may have hard 957 * links to each other, you cannot determine if they have hard links to any 958 * file already stored on the archive from a prior run of pax. We must assume 959 * that these inode/device pairs are unique only within a SINGLE run of pax 960 * (which adds a set of files to an archive). So we have to make sure the 961 * inode/dev pairs we add each time are always unique. We do this by observing 962 * while the inode field is very dense, the use of the dev field is fairly 963 * sparse. Within each run of pax, we remap any device number of a new archive 964 * member that has a device number used in a prior run and already stored in a 965 * file on the archive. During the read phase of the append, we store the 966 * device numbers used and mark them to not be used by any file during the 967 * write phase. If during write we go to use one of those old device numbers, 968 * we remap it to a new value. 969 * 970 * 2) Often the fields in the archive header used to store these values are 971 * too small to store the entire value. The result is an inode or device value 972 * which can be truncated. This really can foul up an archive. With truncation 973 * we end up creating links between files that are really not links (after 974 * truncation the inodes are the same value). We address that by detecting 975 * truncation and forcing a remap of the device field to split truncated 976 * inodes away from each other. Each truncation creates a pattern of bits that 977 * are removed. We use this pattern of truncated bits to partition the inodes 978 * on a single device to many different devices (each one represented by the 979 * truncated bit pattern). All inodes on the same device that have the same 980 * truncation pattern are mapped to the same new device. Two inodes that 981 * truncate to the same value clearly will always have different truncation 982 * bit patterns, so they will be split from away each other. When we spot 983 * device truncation we remap the device number to a non truncated value. 984 * (for more info see table.h for the data structures involved). 985 */ 986 987 static DEVT *chk_dev(dev_t, int); 988 989 /* 990 * dev_start() 991 * create the device mapping table 992 * Return: 993 * 0 if successful, -1 otherwise 994 */ 995 996 int 997 dev_start(void) 998 { 999 if (dtab != NULL) 1000 return(0); 1001 if ((dtab = calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { 1002 paxwarn(1, "Cannot allocate memory for device mapping table"); 1003 return(-1); 1004 } 1005 return(0); 1006 } 1007 1008 /* 1009 * add_dev() 1010 * add a device number to the table. this will force the device to be 1011 * remapped to a new value if it be used during a write phase. This 1012 * function is called during the read phase of an append to prohibit the 1013 * use of any device number already in the archive. 1014 * Return: 1015 * 0 if added ok, -1 otherwise 1016 */ 1017 1018 int 1019 add_dev(ARCHD *arcn) 1020 { 1021 if (chk_dev(arcn->sb.st_dev, 1) == NULL) 1022 return(-1); 1023 return(0); 1024 } 1025 1026 /* 1027 * chk_dev() 1028 * check for a device value in the device table. If not found and the add 1029 * flag is set, it is added. This does NOT assign any mapping values, just 1030 * adds the device number as one that need to be remapped. If this device 1031 * is already mapped, just return with a pointer to that entry. 1032 * Return: 1033 * pointer to the entry for this device in the device map table. Null 1034 * if the add flag is not set and the device is not in the table (it is 1035 * not been seen yet). If add is set and the device cannot be added, null 1036 * is returned (indicates an error). 1037 */ 1038 1039 static DEVT * 1040 chk_dev(dev_t dev, int add) 1041 { 1042 DEVT *pt; 1043 u_int indx; 1044 1045 if (dtab == NULL) 1046 return(NULL); 1047 /* 1048 * look to see if this device is already in the table 1049 */ 1050 indx = ((unsigned)dev) % D_TAB_SZ; 1051 if ((pt = dtab[indx]) != NULL) { 1052 while ((pt != NULL) && (pt->dev != dev)) 1053 pt = pt->fow; 1054 1055 /* 1056 * found it, return a pointer to it 1057 */ 1058 if (pt != NULL) 1059 return(pt); 1060 } 1061 1062 /* 1063 * not in table, we add it only if told to as this may just be a check 1064 * to see if a device number is being used. 1065 */ 1066 if (add == 0) 1067 return(NULL); 1068 1069 /* 1070 * allocate a node for this device and add it to the front of the hash 1071 * chain. Note we do not assign remaps values here, so the pt->list 1072 * list must be NULL. 1073 */ 1074 if ((pt = malloc(sizeof(DEVT))) == NULL) { 1075 paxwarn(1, "Device map table out of memory"); 1076 return(NULL); 1077 } 1078 pt->dev = dev; 1079 pt->list = NULL; 1080 pt->fow = dtab[indx]; 1081 dtab[indx] = pt; 1082 return(pt); 1083 } 1084 /* 1085 * map_dev() 1086 * given an inode and device storage mask (the mask has a 1 for each bit 1087 * the archive format is able to store in a header), we check for inode 1088 * and device truncation and remap the device as required. Device mapping 1089 * can also occur when during the read phase of append a device number was 1090 * seen (and was marked as do not use during the write phase). WE ASSUME 1091 * that unsigned longs are the same size or bigger than the fields used 1092 * for ino_t and dev_t. If not the types will have to be changed. 1093 * Return: 1094 * 0 if all ok, -1 otherwise. 1095 */ 1096 1097 int 1098 map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) 1099 { 1100 DEVT *pt; 1101 DLIST *dpt; 1102 static dev_t lastdev = 0; /* next device number to try */ 1103 int trc_ino = 0; 1104 int trc_dev = 0; 1105 ino_t trunc_bits = 0; 1106 ino_t nino; 1107 1108 if (dtab == NULL) 1109 return(0); 1110 /* 1111 * check for device and inode truncation, and extract the truncated 1112 * bit pattern. 1113 */ 1114 if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) 1115 ++trc_dev; 1116 if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { 1117 ++trc_ino; 1118 trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); 1119 } 1120 1121 /* 1122 * see if this device is already being mapped, look up the device 1123 * then find the truncation bit pattern which applies 1124 */ 1125 if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { 1126 /* 1127 * this device is already marked to be remapped 1128 */ 1129 for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) 1130 if (dpt->trunc_bits == trunc_bits) 1131 break; 1132 1133 if (dpt != NULL) { 1134 /* 1135 * we are being remapped for this device and pattern 1136 * change the device number to be stored and return 1137 */ 1138 arcn->sb.st_dev = dpt->dev; 1139 arcn->sb.st_ino = nino; 1140 return(0); 1141 } 1142 } else { 1143 /* 1144 * this device is not being remapped YET. if we do not have any 1145 * form of truncation, we do not need a remap 1146 */ 1147 if (!trc_ino && !trc_dev) 1148 return(0); 1149 1150 /* 1151 * we have truncation, have to add this as a device to remap 1152 */ 1153 if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) 1154 goto bad; 1155 1156 /* 1157 * if we just have a truncated inode, we have to make sure that 1158 * all future inodes that do not truncate (they have the 1159 * truncation pattern of all 0's) continue to map to the same 1160 * device number. We probably have already written inodes with 1161 * this device number to the archive with the truncation 1162 * pattern of all 0's. So we add the mapping for all 0's to the 1163 * same device number. 1164 */ 1165 if (!trc_dev && (trunc_bits != 0)) { 1166 if ((dpt = malloc(sizeof(DLIST))) == NULL) 1167 goto bad; 1168 dpt->trunc_bits = 0; 1169 dpt->dev = arcn->sb.st_dev; 1170 dpt->fow = pt->list; 1171 pt->list = dpt; 1172 } 1173 } 1174 1175 /* 1176 * look for a device number not being used. We must watch for wrap 1177 * around on lastdev (so we do not get stuck looking forever!) 1178 */ 1179 while (++lastdev > 0) { 1180 if (chk_dev(lastdev, 0) != NULL) 1181 continue; 1182 /* 1183 * found an unused value. If we have reached truncation point 1184 * for this format we are hosed, so we give up. Otherwise we 1185 * mark it as being used. 1186 */ 1187 if (((lastdev & ((dev_t)dev_mask)) != lastdev) || 1188 (chk_dev(lastdev, 1) == NULL)) 1189 goto bad; 1190 break; 1191 } 1192 1193 if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL)) 1194 goto bad; 1195 1196 /* 1197 * got a new device number, store it under this truncation pattern. 1198 * change the device number this file is being stored with. 1199 */ 1200 dpt->trunc_bits = trunc_bits; 1201 dpt->dev = lastdev; 1202 dpt->fow = pt->list; 1203 pt->list = dpt; 1204 arcn->sb.st_dev = lastdev; 1205 arcn->sb.st_ino = nino; 1206 return(0); 1207 1208 bad: 1209 paxwarn(1, "Unable to fix truncated inode/device field when storing %s", 1210 arcn->name); 1211 paxwarn(0, "Archive may create improper hard links when extracted"); 1212 return(0); 1213 } 1214 #endif /* NOCPIO */ 1215 1216 /* 1217 * directory access/mod time reset table routines (for directories READ by pax) 1218 * 1219 * The pax -t flag requires that access times of archive files be the same 1220 * before being read by pax. For regular files, access time is restored after 1221 * the file has been copied. This database provides the same functionality for 1222 * directories read during file tree traversal. Restoring directory access time 1223 * is more complex than files since directories may be read several times until 1224 * all the descendants in their subtree are visited by fts. Directory access 1225 * and modification times are stored during the fts pre-order visit (done 1226 * before any descendants in the subtree are visited) and restored after the 1227 * fts post-order visit (after all the descendants have been visited). In the 1228 * case of premature exit from a subtree (like from the effects of -n), any 1229 * directory entries left in this database are reset during final cleanup 1230 * operations of pax. Entries are hashed by inode number for fast lookup. 1231 */ 1232 1233 /* 1234 * atdir_start() 1235 * create the directory access time database for directories READ by pax. 1236 * Return: 1237 * 0 is created ok, -1 otherwise. 1238 */ 1239 1240 int 1241 atdir_start(void) 1242 { 1243 if (atab != NULL) 1244 return(0); 1245 if ((atab = calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { 1246 paxwarn(1,"Cannot allocate space for directory access time table"); 1247 return(-1); 1248 } 1249 return(0); 1250 } 1251 1252 1253 /* 1254 * atdir_end() 1255 * walk through the directory access time table and reset the access time 1256 * of any directory who still has an entry left in the database. These 1257 * entries are for directories READ by pax 1258 */ 1259 1260 void 1261 atdir_end(void) 1262 { 1263 ATDIR *pt; 1264 int i; 1265 1266 if (atab == NULL) 1267 return; 1268 /* 1269 * for each non-empty hash table entry reset all the directories 1270 * chained there. 1271 */ 1272 for (i = 0; i < A_TAB_SZ; ++i) { 1273 if ((pt = atab[i]) == NULL) 1274 continue; 1275 /* 1276 * remember to force the times, set_ftime() looks at pmtime 1277 * and patime, which only applies to things CREATED by pax, 1278 * not read by pax. Read time reset is controlled by -t. 1279 */ 1280 for (; pt != NULL; pt = pt->fow) 1281 set_attr(&pt->ft, 1, 0, 0, 0); 1282 } 1283 } 1284 1285 /* 1286 * add_atdir() 1287 * add a directory to the directory access time table. Table is hashed 1288 * and chained by inode number. This is for directories READ by pax 1289 */ 1290 1291 void 1292 add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp, 1293 const struct timespec *atimp) 1294 { 1295 ATDIR *pt; 1296 sigset_t allsigs, savedsigs; 1297 u_int indx; 1298 1299 if (atab == NULL) 1300 return; 1301 1302 /* 1303 * make sure this directory is not already in the table, if so just 1304 * return (the older entry always has the correct time). The only 1305 * way this will happen is when the same subtree can be traversed by 1306 * different args to pax and the -n option is aborting fts out of a 1307 * subtree before all the post-order visits have been made. 1308 */ 1309 indx = ((unsigned)ino) % A_TAB_SZ; 1310 if ((pt = atab[indx]) != NULL) { 1311 while (pt != NULL) { 1312 if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) 1313 break; 1314 pt = pt->fow; 1315 } 1316 1317 /* 1318 * oops, already there. Leave it alone. 1319 */ 1320 if (pt != NULL) 1321 return; 1322 } 1323 1324 /* 1325 * add it to the front of the hash chain 1326 */ 1327 sigfillset(&allsigs); 1328 sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); 1329 if ((pt = malloc(sizeof *pt)) != NULL) { 1330 if ((pt->ft.ft_name = strdup(fname)) != NULL) { 1331 pt->ft.ft_dev = dev; 1332 pt->ft.ft_ino = ino; 1333 pt->ft.ft_mtim = *mtimp; 1334 pt->ft.ft_atim = *atimp; 1335 pt->fow = atab[indx]; 1336 atab[indx] = pt; 1337 sigprocmask(SIG_SETMASK, &savedsigs, NULL); 1338 return; 1339 } 1340 free(pt); 1341 } 1342 1343 sigprocmask(SIG_SETMASK, &savedsigs, NULL); 1344 paxwarn(1, "Directory access time reset table ran out of memory"); 1345 } 1346 1347 /* 1348 * get_atdir() 1349 * look up a directory by inode and device number to obtain the access 1350 * and modification time you want to set to. If found, the modification 1351 * and access time parameters are set and the entry is removed from the 1352 * table (as it is no longer needed). These are for directories READ by 1353 * pax 1354 * Return: 1355 * 0 if found, -1 if not found. 1356 */ 1357 1358 int 1359 do_atdir(const char *name, dev_t dev, ino_t ino) 1360 { 1361 ATDIR *pt; 1362 ATDIR **ppt; 1363 sigset_t allsigs, savedsigs; 1364 u_int indx; 1365 1366 if (atab == NULL) 1367 return(-1); 1368 /* 1369 * hash by inode and search the chain for an inode and device match 1370 */ 1371 indx = ((unsigned)ino) % A_TAB_SZ; 1372 if ((pt = atab[indx]) == NULL) 1373 return(-1); 1374 1375 ppt = &(atab[indx]); 1376 while (pt != NULL) { 1377 if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) 1378 break; 1379 /* 1380 * no match, go to next one 1381 */ 1382 ppt = &(pt->fow); 1383 pt = pt->fow; 1384 } 1385 1386 /* 1387 * return if we did not find it. 1388 */ 1389 if (pt == NULL || pt->ft.ft_name == NULL || 1390 strcmp(name, pt->ft.ft_name) == 0) 1391 return(-1); 1392 1393 /* 1394 * found it. set the times and remove the entry from the table. 1395 */ 1396 set_attr(&pt->ft, 1, 0, 0, 0); 1397 sigfillset(&allsigs); 1398 sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); 1399 *ppt = pt->fow; 1400 sigprocmask(SIG_SETMASK, &savedsigs, NULL); 1401 free(pt->ft.ft_name); 1402 free(pt); 1403 return(0); 1404 } 1405 1406 /* 1407 * directory access mode and time storage routines (for directories CREATED 1408 * by pax). 1409 * 1410 * Pax requires that extracted directories, by default, have their access/mod 1411 * times and permissions set to the values specified in the archive. During the 1412 * actions of extracting (and creating the destination subtree during -rw copy) 1413 * directories extracted may be modified after being created. Even worse is 1414 * that these directories may have been created with file permissions which 1415 * prohibits any descendants of these directories from being extracted. When 1416 * directories are created by pax, access rights may be added to permit the 1417 * creation of files in their subtree. Every time pax creates a directory, the 1418 * times and file permissions specified by the archive are stored. After all 1419 * files have been extracted (or copied), these directories have their times 1420 * and file modes reset to the stored values. The directory info is restored in 1421 * reverse order as entries were added from root to leaf: to restore atime 1422 * properly, we must go backwards. 1423 */ 1424 1425 /* 1426 * dir_start() 1427 * set up the directory time and file mode storage for directories CREATED 1428 * by pax. 1429 * Return: 1430 * 0 if ok, -1 otherwise 1431 */ 1432 1433 int 1434 dir_start(void) 1435 { 1436 if (dirp != NULL) 1437 return(0); 1438 1439 dirsize = DIRP_SIZE; 1440 if ((dirp = reallocarray(NULL, dirsize, sizeof(DIRDATA))) == NULL) { 1441 paxwarn(1, "Unable to allocate memory for directory times"); 1442 return(-1); 1443 } 1444 return(0); 1445 } 1446 1447 /* 1448 * add_dir() 1449 * add the mode and times for a newly CREATED directory 1450 * name is name of the directory, psb the stat buffer with the data in it, 1451 * frc_mode is a flag that says whether to force the setting of the mode 1452 * (ignoring the user set values for preserving file mode). Frc_mode is 1453 * for the case where we created a file and found that the resulting 1454 * directory was not writeable and the user asked for file modes to NOT 1455 * be preserved. (we have to preserve what was created by default, so we 1456 * have to force the setting at the end. this is stated explicitly in the 1457 * pax spec) 1458 */ 1459 1460 void 1461 add_dir(char *name, struct stat *psb, int frc_mode) 1462 { 1463 DIRDATA *dblk; 1464 sigset_t allsigs, savedsigs; 1465 char realname[PATH_MAX], *rp; 1466 1467 if (dirp == NULL) 1468 return; 1469 1470 if (havechd && *name != '/') { 1471 if ((rp = realpath(name, realname)) == NULL) { 1472 paxwarn(1, "Cannot canonicalize %s", name); 1473 return; 1474 } 1475 name = rp; 1476 } 1477 if (dircnt == dirsize) { 1478 dblk = reallocarray(dirp, dirsize, 2 * sizeof(DIRDATA)); 1479 if (dblk == NULL) { 1480 paxwarn(1, "Unable to store mode and times for created" 1481 " directory: %s", name); 1482 return; 1483 } 1484 sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); 1485 dirp = dblk; 1486 dirsize *= 2; 1487 sigprocmask(SIG_SETMASK, &savedsigs, NULL); 1488 } 1489 dblk = &dirp[dircnt]; 1490 if ((dblk->ft.ft_name = strdup(name)) == NULL) { 1491 paxwarn(1, "Unable to store mode and times for created" 1492 " directory: %s", name); 1493 return; 1494 } 1495 dblk->ft.ft_mtim = psb->st_mtim; 1496 dblk->ft.ft_atim = psb->st_atim; 1497 dblk->ft.ft_ino = psb->st_ino; 1498 dblk->ft.ft_dev = psb->st_dev; 1499 dblk->mode = psb->st_mode & ABITS; 1500 dblk->frc_mode = frc_mode; 1501 sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); 1502 ++dircnt; 1503 sigprocmask(SIG_SETMASK, &savedsigs, NULL); 1504 } 1505 1506 /* 1507 * delete_dir() 1508 * When we rmdir a directory, we may want to make sure we don't 1509 * later warn about being unable to set its mode and times. 1510 */ 1511 1512 void 1513 delete_dir(dev_t dev, ino_t ino) 1514 { 1515 DIRDATA *dblk; 1516 char *name; 1517 size_t i; 1518 1519 if (dirp == NULL) 1520 return; 1521 for (i = 0; i < dircnt; i++) { 1522 dblk = &dirp[i]; 1523 1524 if (dblk->ft.ft_name == NULL) 1525 continue; 1526 if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) { 1527 name = dblk->ft.ft_name; 1528 dblk->ft.ft_name = NULL; 1529 free(name); 1530 break; 1531 } 1532 } 1533 } 1534 1535 /* 1536 * proc_dir(int in_sig) 1537 * process all file modes and times stored for directories CREATED 1538 * by pax. If in_sig is set, we're in a signal handler and can't 1539 * free stuff. 1540 */ 1541 1542 void 1543 proc_dir(int in_sig) 1544 { 1545 DIRDATA *dblk; 1546 size_t cnt; 1547 1548 if (dirp == NULL) 1549 return; 1550 /* 1551 * read backwards through the file and process each directory 1552 */ 1553 cnt = dircnt; 1554 while (cnt-- > 0) { 1555 dblk = &dirp[cnt]; 1556 /* 1557 * If we remove a directory we created, we replace the 1558 * ft_name with NULL. Ignore those. 1559 */ 1560 if (dblk->ft.ft_name == NULL) 1561 continue; 1562 1563 /* 1564 * frc_mode set, make sure we set the file modes even if 1565 * the user didn't ask for it (see file_subs.c for more info) 1566 */ 1567 set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode, 1568 in_sig); 1569 if (!in_sig) 1570 free(dblk->ft.ft_name); 1571 } 1572 1573 if (!in_sig) 1574 free(dirp); 1575 dirp = NULL; 1576 dircnt = 0; 1577 } 1578 1579 /* 1580 * database independent routines 1581 */ 1582 1583 /* 1584 * st_hash() 1585 * hashes filenames to a u_int for hashing into a table. Looks at the tail 1586 * end of file, as this provides far better distribution than any other 1587 * part of the name. For performance reasons we only care about the last 1588 * MAXKEYLEN chars (should be at LEAST large enough to pick off the file 1589 * name). Was tested on 500,000 name file tree traversal from the root 1590 * and gave almost a perfectly uniform distribution of keys when used with 1591 * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) 1592 * chars at a time and pads with 0 for last addition. 1593 * Return: 1594 * the hash value of the string MOD (%) the table size. 1595 */ 1596 1597 u_int 1598 st_hash(const char *name, int len, int tabsz) 1599 { 1600 const char *pt; 1601 char *dest; 1602 const char *end; 1603 int i; 1604 u_int key = 0; 1605 int steps; 1606 int res; 1607 u_int val; 1608 1609 /* 1610 * only look at the tail up to MAXKEYLEN, we do not need to waste 1611 * time here (remember these are pathnames, the tail is what will 1612 * spread out the keys) 1613 */ 1614 if (len > MAXKEYLEN) { 1615 pt = &(name[len - MAXKEYLEN]); 1616 len = MAXKEYLEN; 1617 } else 1618 pt = name; 1619 1620 /* 1621 * calculate the number of u_int size steps in the string and if 1622 * there is a runt to deal with 1623 */ 1624 steps = len/sizeof(u_int); 1625 res = len % sizeof(u_int); 1626 1627 /* 1628 * add up the value of the string in unsigned integer sized pieces 1629 * too bad we cannot have unsigned int aligned strings, then we 1630 * could avoid the expensive copy. 1631 */ 1632 for (i = 0; i < steps; ++i) { 1633 end = pt + sizeof(u_int); 1634 dest = (char *)&val; 1635 while (pt < end) 1636 *dest++ = *pt++; 1637 key += val; 1638 } 1639 1640 /* 1641 * add in the runt padded with zero to the right 1642 */ 1643 if (res) { 1644 val = 0; 1645 end = pt + res; 1646 dest = (char *)&val; 1647 while (pt < end) 1648 *dest++ = *pt++; 1649 key += val; 1650 } 1651 1652 /* 1653 * return the result mod the table size 1654 */ 1655 return(key % tabsz); 1656 } 1657