1 /*- 2 * Copyright (c) 1992 Keith Muller. 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Keith Muller of the University of California, San Diego. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)pat_rep.c 8.2 (Berkeley) 4/18/94 38 * $FreeBSD: src/bin/pax/pat_rep.c,v 1.15.2.1 2001/08/01 05:03:11 obrien Exp $ 39 * $DragonFly: src/bin/pax/pat_rep.c,v 1.8 2006/09/27 21:58:08 pavalos Exp $ 40 */ 41 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <stdio.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include <stdlib.h> 48 #include <errno.h> 49 #include <regex.h> 50 #include "pax.h" 51 #include "pat_rep.h" 52 #include "extern.h" 53 54 /* 55 * routines to handle pattern matching, name modification (regular expression 56 * substitution and interactive renames), and destination name modification for 57 * copy (-rw). Both file name and link names are adjusted as required in these 58 * routines. 59 */ 60 61 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ 62 static PATTERN *pathead = NULL; /* file pattern match list head */ 63 static PATTERN *pattail = NULL; /* file pattern match list tail */ 64 static REPLACE *rephead = NULL; /* replacement string list head */ 65 static REPLACE *reptail = NULL; /* replacement string list tail */ 66 67 static int rep_name (char *, int *, int); 68 static int tty_rename (ARCHD *); 69 static int fix_path (char *, int *, char *, int); 70 static int fn_match (char *, char *, char **); 71 static char * range_match (char *, int); 72 static int resub (regex_t *, regmatch_t *, char *, char *, char *); 73 74 /* 75 * rep_add() 76 * parses the -s replacement string; compiles the regular expression 77 * and stores the compiled value and it's replacement string together in 78 * replacement string list. Input to this function is of the form: 79 * /old/new/pg 80 * The first char in the string specifies the delimiter used by this 81 * replacement string. "Old" is a regular expression in "ed" format which 82 * is compiled by regcomp() and is applied to filenames. "new" is the 83 * substitution string; p and g are options flags for printing and global 84 * replacement (over the single filename) 85 * Return: 86 * 0 if a proper replacement string and regular expression was added to 87 * the list of replacement patterns; -1 otherwise. 88 */ 89 90 int 91 rep_add(char *str) 92 { 93 char *pt1; 94 char *pt2; 95 REPLACE *rep; 96 int res; 97 char rebuf[BUFSIZ]; 98 99 /* 100 * throw out the bad parameters 101 */ 102 if ((str == NULL) || (*str == '\0')) { 103 paxwarn(1, "Empty replacement string"); 104 return(-1); 105 } 106 107 /* 108 * first character in the string specifies what the delimiter is for 109 * this expression 110 */ 111 if ((pt1 = strchr(str+1, *str)) == NULL) { 112 paxwarn(1, "Invalid replacement string %s", str); 113 return(-1); 114 } 115 116 /* 117 * allocate space for the node that handles this replacement pattern 118 * and split out the regular expression and try to compile it 119 */ 120 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { 121 paxwarn(1, "Unable to allocate memory for replacement string"); 122 return(-1); 123 } 124 125 *pt1 = '\0'; 126 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { 127 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); 128 paxwarn(1, "%s while compiling regular expression %s", rebuf, str); 129 free((char *)rep); 130 return(-1); 131 } 132 133 /* 134 * put the delimiter back in case we need an error message and 135 * locate the delimiter at the end of the replacement string 136 * we then point the node at the new substitution string 137 */ 138 *pt1++ = *str; 139 if ((pt2 = strchr(pt1, *str)) == NULL) { 140 regfree(&(rep->rcmp)); 141 free((char *)rep); 142 paxwarn(1, "Invalid replacement string %s", str); 143 return(-1); 144 } 145 146 *pt2 = '\0'; 147 rep->nstr = pt1; 148 pt1 = pt2++; 149 rep->flgs = 0; 150 151 /* 152 * set the options if any 153 */ 154 while (*pt2 != '\0') { 155 switch(*pt2) { 156 case 'g': 157 case 'G': 158 rep->flgs |= GLOB; 159 break; 160 case 'p': 161 case 'P': 162 rep->flgs |= PRNT; 163 break; 164 default: 165 regfree(&(rep->rcmp)); 166 free((char *)rep); 167 *pt1 = *str; 168 paxwarn(1, "Invalid replacement string option %s", str); 169 return(-1); 170 } 171 ++pt2; 172 } 173 174 /* 175 * all done, link it in at the end 176 */ 177 rep->fow = NULL; 178 if (rephead == NULL) { 179 reptail = rephead = rep; 180 return(0); 181 } 182 reptail->fow = rep; 183 reptail = rep; 184 return(0); 185 } 186 187 /* 188 * pat_add() 189 * add a pattern match to the pattern match list. Pattern matches are used 190 * to select which archive members are extracted. (They appear as 191 * arguments to pax in the list and read modes). If no patterns are 192 * supplied to pax, all members in the archive will be selected (and the 193 * pattern match list is empty). 194 * Return: 195 * 0 if the pattern was added to the list, -1 otherwise 196 */ 197 198 int 199 pat_add(char *str, char *chdname) 200 { 201 PATTERN *pt; 202 203 /* 204 * throw out the junk 205 */ 206 if ((str == NULL) || (*str == '\0')) { 207 paxwarn(1, "Empty pattern string"); 208 return(-1); 209 } 210 211 /* 212 * allocate space for the pattern and store the pattern. the pattern is 213 * part of argv so do not bother to copy it, just point at it. Add the 214 * node to the end of the pattern list 215 */ 216 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { 217 paxwarn(1, "Unable to allocate memory for pattern string"); 218 return(-1); 219 } 220 221 pt->pstr = str; 222 pt->pend = NULL; 223 pt->plen = strlen(str); 224 pt->fow = NULL; 225 pt->flgs = 0; 226 pt->chdname = chdname; 227 228 if (pathead == NULL) { 229 pattail = pathead = pt; 230 return(0); 231 } 232 pattail->fow = pt; 233 pattail = pt; 234 return(0); 235 } 236 237 /* 238 * pat_chk() 239 * complain if any the user supplied pattern did not result in a match to 240 * a selected archive member. 241 */ 242 243 void 244 pat_chk(void) 245 { 246 PATTERN *pt; 247 int wban = 0; 248 249 /* 250 * walk down the list checking the flags to make sure MTCH was set, 251 * if not complain 252 */ 253 for (pt = pathead; pt != NULL; pt = pt->fow) { 254 if (pt->flgs & MTCH) 255 continue; 256 if (!wban) { 257 paxwarn(1, "WARNING! These patterns were not matched:"); 258 ++wban; 259 } 260 fprintf(stderr, "%s\n", pt->pstr); 261 } 262 } 263 264 /* 265 * pat_sel() 266 * the archive member which matches a pattern was selected. Mark the 267 * pattern as having selected an archive member. arcn->pat points at the 268 * pattern that was matched. arcn->pat is set in pat_match() 269 * 270 * NOTE: When the -c option is used, we are called when there was no match 271 * by pat_match() (that means we did match before the inverted sense of 272 * the logic). Now this seems really strange at first, but with -c we 273 * need to keep track of those patterns that cause an archive member to NOT 274 * be selected (it found an archive member with a specified pattern) 275 * Return: 276 * 0 if the pattern pointed at by arcn->pat was tagged as creating a 277 * match, -1 otherwise. 278 */ 279 280 int 281 pat_sel(ARCHD *arcn) 282 { 283 PATTERN *pt; 284 PATTERN **ppt; 285 int len; 286 287 /* 288 * if no patterns just return 289 */ 290 if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) 291 return(0); 292 293 /* 294 * when we are NOT limited to a single match per pattern mark the 295 * pattern and return 296 */ 297 if (!nflag) { 298 pt->flgs |= MTCH; 299 return(0); 300 } 301 302 /* 303 * we reach this point only when we allow a single selected match per 304 * pattern, if the pattern matches a directory and we do not have -d 305 * (dflag) we are done with this pattern. We may also be handed a file 306 * in the subtree of a directory. in that case when we are operating 307 * with -d, this pattern was already selected and we are done 308 */ 309 if (pt->flgs & DIR_MTCH) 310 return(0); 311 312 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { 313 /* 314 * ok we matched a directory and we are allowing 315 * subtree matches but because of the -n only its children will 316 * match. This is tagged as a DIR_MTCH type. 317 * WATCH IT, the code assumes that pt->pend points 318 * into arcn->name and arcn->name has not been modified. 319 * If not we will have a big mess. Yup this is another kludge 320 */ 321 322 /* 323 * if this was a prefix match, remove trailing part of path 324 * so we can copy it. Future matches will be exact prefix match 325 */ 326 if (pt->pend != NULL) 327 *pt->pend = '\0'; 328 329 if ((pt->pstr = strdup(arcn->name)) == NULL) { 330 paxwarn(1, "Pattern select out of memory"); 331 if (pt->pend != NULL) 332 *pt->pend = '/'; 333 pt->pend = NULL; 334 return(-1); 335 } 336 337 /* 338 * put the trailing / back in the source string 339 */ 340 if (pt->pend != NULL) { 341 *pt->pend = '/'; 342 pt->pend = NULL; 343 } 344 pt->plen = strlen(pt->pstr); 345 346 /* 347 * strip off any trailing /, this should really never happen 348 */ 349 len = pt->plen - 1; 350 if (*(pt->pstr + len) == '/') { 351 *(pt->pstr + len) = '\0'; 352 pt->plen = len; 353 } 354 pt->flgs = DIR_MTCH | MTCH; 355 arcn->pat = pt; 356 return(0); 357 } 358 359 /* 360 * we are then done with this pattern, so we delete it from the list 361 * because it can never be used for another match. 362 * Seems kind of strange to do for a -c, but the pax spec is really 363 * vague on the interaction of -c, -n and -d. We assume that when -c 364 * and the pattern rejects a member (i.e. it matched it) it is done. 365 * In effect we place the order of the flags as having -c last. 366 */ 367 pt = pathead; 368 ppt = &pathead; 369 while ((pt != NULL) && (pt != arcn->pat)) { 370 ppt = &(pt->fow); 371 pt = pt->fow; 372 } 373 374 if (pt == NULL) { 375 /* 376 * should never happen.... 377 */ 378 paxwarn(1, "Pattern list inconsistent"); 379 return(-1); 380 } 381 *ppt = pt->fow; 382 free((char *)pt); 383 arcn->pat = NULL; 384 return(0); 385 } 386 387 /* 388 * pat_match() 389 * see if this archive member matches any supplied pattern, if a match 390 * is found, arcn->pat is set to point at the potential pattern. Later if 391 * this archive member is "selected" we process and mark the pattern as 392 * one which matched a selected archive member (see pat_sel()) 393 * Return: 394 * 0 if this archive member should be processed, 1 if it should be 395 * skipped and -1 if we are done with all patterns (and pax should quit 396 * looking for more members) 397 */ 398 399 int 400 pat_match(ARCHD *arcn) 401 { 402 PATTERN *pt; 403 404 arcn->pat = NULL; 405 406 /* 407 * if there are no more patterns and we have -n (and not -c) we are 408 * done. otherwise with no patterns to match, matches all 409 */ 410 if (pathead == NULL) { 411 if (nflag && !cflag) 412 return(-1); 413 return(0); 414 } 415 416 /* 417 * have to search down the list one at a time looking for a match. 418 */ 419 pt = pathead; 420 while (pt != NULL) { 421 /* 422 * check for a file name match unless we have DIR_MTCH set in 423 * this pattern then we want a prefix match 424 */ 425 if (pt->flgs & DIR_MTCH) { 426 /* 427 * this pattern was matched before to a directory 428 * as we must have -n set for this (but not -d). We can 429 * only match CHILDREN of that directory so we must use 430 * an exact prefix match (no wildcards). 431 */ 432 if ((arcn->name[pt->plen] == '/') && 433 (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) 434 break; 435 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) 436 break; 437 pt = pt->fow; 438 } 439 440 /* 441 * return the result, remember that cflag (-c) inverts the sense of a 442 * match 443 */ 444 if (pt == NULL) 445 return(cflag ? 0 : 1); 446 447 /* 448 * we had a match, now when we invert the sense (-c) we reject this 449 * member. However we have to tag the pattern a being successful, (in a 450 * match, not in selecting a archive member) so we call pat_sel() here. 451 */ 452 arcn->pat = pt; 453 if (!cflag) 454 return(0); 455 456 if (pat_sel(arcn) < 0) 457 return(-1); 458 arcn->pat = NULL; 459 return(1); 460 } 461 462 /* 463 * fn_match() 464 * Return: 465 * 0 if this archive member should be processed, 1 if it should be 466 * skipped and -1 if we are done with all patterns (and pax should quit 467 * looking for more members) 468 * Note: *pend may be changed to show where the prefix ends. 469 */ 470 471 static int 472 fn_match(char *pattern, char *string, char **pend) 473 { 474 char c; 475 char test; 476 477 *pend = NULL; 478 for (;;) { 479 switch (c = *pattern++) { 480 case '\0': 481 /* 482 * Ok we found an exact match 483 */ 484 if (*string == '\0') 485 return(0); 486 487 /* 488 * Check if it is a prefix match 489 */ 490 if ((dflag == 1) || (*string != '/')) 491 return(-1); 492 493 /* 494 * It is a prefix match, remember where the trailing 495 * / is located 496 */ 497 *pend = string; 498 return(0); 499 case '?': 500 if ((test = *string++) == '\0') 501 return (-1); 502 break; 503 case '*': 504 c = *pattern; 505 /* 506 * Collapse multiple *'s. 507 */ 508 while (c == '*') 509 c = *++pattern; 510 511 /* 512 * Optimized hack for pattern with a * at the end 513 */ 514 if (c == '\0') 515 return (0); 516 517 /* 518 * General case, use recursion. 519 */ 520 while ((test = *string) != '\0') { 521 if (!fn_match(pattern, string, pend)) 522 return (0); 523 ++string; 524 } 525 return (-1); 526 case '[': 527 /* 528 * range match 529 */ 530 if (((test = *string++) == '\0') || 531 ((pattern = range_match(pattern, test)) == NULL)) 532 return (-1); 533 break; 534 case '\\': 535 default: 536 if (c != *string++) 537 return (-1); 538 break; 539 } 540 } 541 /* NOTREACHED */ 542 } 543 544 static char * 545 range_match(char *pattern, int test) 546 { 547 char c; 548 char c2; 549 int negate; 550 int ok = 0; 551 552 if ((negate = (*pattern == '!')) != 0) 553 ++pattern; 554 555 while ((c = *pattern++) != ']') { 556 /* 557 * Illegal pattern 558 */ 559 if (c == '\0') 560 return (NULL); 561 562 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && 563 (c2 != ']')) { 564 if ((c <= test) && (test <= c2)) 565 ok = 1; 566 pattern += 2; 567 } else if (c == test) 568 ok = 1; 569 } 570 return (ok == negate ? NULL : pattern); 571 } 572 573 /* 574 * mod_name() 575 * modify a selected file name. first attempt to apply replacement string 576 * expressions, then apply interactive file rename. We apply replacement 577 * string expressions to both filenames and file links (if we didn't the 578 * links would point to the wrong place, and we could never be able to 579 * move an archive that has a file link in it). When we rename files 580 * interactively, we store that mapping (old name to user input name) so 581 * if we spot any file links to the old file name in the future, we will 582 * know exactly how to fix the file link. 583 * Return: 584 * 0 continue to process file, 1 skip this file, -1 pax is finished 585 */ 586 587 int 588 mod_name(ARCHD *arcn) 589 { 590 int res = 0; 591 592 /* 593 * Strip off leading '/' if appropriate. 594 * Currently, this option is only set for the tar format. 595 */ 596 if (rmleadslash && arcn->name[0] == '/') { 597 if (arcn->name[1] == '\0') { 598 arcn->name[0] = '.'; 599 } else { 600 memmove(arcn->name, &arcn->name[1], 601 strlen(arcn->name)); 602 arcn->nlen--; 603 } 604 if (rmleadslash < 2) { 605 rmleadslash = 2; 606 paxwarn(0, "Removing leading / from absolute path names in the archive"); 607 } 608 } 609 if (rmleadslash && arcn->ln_name[0] == '/' && 610 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { 611 if (arcn->ln_name[1] == '\0') { 612 arcn->ln_name[0] = '.'; 613 } else { 614 memmove(arcn->ln_name, &arcn->ln_name[1], 615 strlen(arcn->ln_name)); 616 arcn->ln_nlen--; 617 } 618 if (rmleadslash < 2) { 619 rmleadslash = 2; 620 paxwarn(0, "Removing leading / from absolute path names in the archive"); 621 } 622 } 623 624 /* 625 * IMPORTANT: We have a problem. what do we do with symlinks? 626 * Modifying a hard link name makes sense, as we know the file it 627 * points at should have been seen already in the archive (and if it 628 * wasn't seen because of a read error or a bad archive, we lose 629 * anyway). But there are no such requirements for symlinks. On one 630 * hand the symlink that refers to a file in the archive will have to 631 * be modified to so it will still work at its new location in the 632 * file system. On the other hand a symlink that points elsewhere (and 633 * should continue to do so) should not be modified. There is clearly 634 * no perfect solution here. So we handle them like hardlinks. Clearly 635 * a replacement made by the interactive rename mapping is very likely 636 * to be correct since it applies to a single file and is an exact 637 * match. The regular expression replacements are a little harder to 638 * justify though. We claim that the symlink name is only likely 639 * to be replaced when it points within the file tree being moved and 640 * in that case it should be modified. what we really need to do is to 641 * call an oracle here. :) 642 */ 643 if (rephead != NULL) { 644 /* 645 * we have replacement strings, modify the name and the link 646 * name if any. 647 */ 648 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) 649 return(res); 650 651 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 652 (arcn->type == PAX_HRG)) && 653 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) 654 return(res); 655 } 656 657 if (iflag) { 658 /* 659 * perform interactive file rename, then map the link if any 660 */ 661 if ((res = tty_rename(arcn)) != 0) 662 return(res); 663 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 664 (arcn->type == PAX_HRG)) 665 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); 666 } 667 return(res); 668 } 669 670 /* 671 * tty_rename() 672 * Prompt the user for a replacement file name. A "." keeps the old name, 673 * a empty line skips the file, and an EOF on reading the tty, will cause 674 * pax to stop processing and exit. Otherwise the file name input, replaces 675 * the old one. 676 * Return: 677 * 0 process this file, 1 skip this file, -1 we need to exit pax 678 */ 679 680 static int 681 tty_rename(ARCHD *arcn) 682 { 683 char tmpname[PAXPATHLEN+2]; 684 int res; 685 686 /* 687 * prompt user for the replacement name for a file, keep trying until 688 * we get some reasonable input. Archives may have more than one file 689 * on them with the same name (from updates etc). We print verbose info 690 * on the file so the user knows what is up. 691 */ 692 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); 693 694 for (;;) { 695 ls_tty(arcn); 696 tty_prnt("Input new name, or a \".\" to keep the old name, "); 697 tty_prnt("or a \"return\" to skip this file.\n"); 698 tty_prnt("Input > "); 699 if (tty_read(tmpname, sizeof(tmpname)) < 0) 700 return(-1); 701 if (strcmp(tmpname, "..") == 0) { 702 tty_prnt("Try again, illegal file name: ..\n"); 703 continue; 704 } 705 if (strlen(tmpname) > PAXPATHLEN) { 706 tty_prnt("Try again, file name too long\n"); 707 continue; 708 } 709 break; 710 } 711 712 /* 713 * empty file name, skips this file. a "." leaves it alone 714 */ 715 if (tmpname[0] == '\0') { 716 tty_prnt("Skipping file.\n"); 717 return(1); 718 } 719 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { 720 tty_prnt("Processing continues, name unchanged.\n"); 721 return(0); 722 } 723 724 /* 725 * ok the name changed. We may run into links that point at this 726 * file later. we have to remember where the user sent the file 727 * in order to repair any links. 728 */ 729 tty_prnt("Processing continues, name changed to: %s\n", tmpname); 730 res = add_name(arcn->name, arcn->nlen, tmpname); 731 arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1); 732 arcn->name[arcn->nlen] = '\0'; 733 if (res < 0) 734 return(-1); 735 return(0); 736 } 737 738 /* 739 * set_dest() 740 * fix up the file name and the link name (if any) so this file will land 741 * in the destination directory (used during copy() -rw). 742 * Return: 743 * 0 if ok, -1 if failure (name too long) 744 */ 745 746 int 747 set_dest(ARCHD *arcn, char *dest_dir, int dir_len) 748 { 749 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) 750 return(-1); 751 752 /* 753 * It is really hard to deal with symlinks here, we cannot be sure 754 * if the name they point was moved (or will be moved). It is best to 755 * leave them alone. 756 */ 757 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) 758 return(0); 759 760 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) 761 return(-1); 762 return(0); 763 } 764 765 /* 766 * fix_path 767 * concatenate dir_name and or_name and store the result in or_name (if 768 * it fits). This is one ugly function. 769 * Return: 770 * 0 if ok, -1 if the final name is too long 771 */ 772 773 static int 774 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) 775 { 776 char *src; 777 char *dest; 778 char *start; 779 int len; 780 781 /* 782 * we shift the or_name to the right enough to tack in the dir_name 783 * at the front. We make sure we have enough space for it all before 784 * we start. since dest always ends in a slash, we skip of or_name 785 * if it also starts with one. 786 */ 787 start = or_name; 788 src = start + *or_len; 789 dest = src + dir_len; 790 if (*start == '/') { 791 ++start; 792 --dest; 793 } 794 if ((len = dest - or_name) > PAXPATHLEN) { 795 paxwarn(1, "File name %s/%s, too long", dir_name, start); 796 return(-1); 797 } 798 *or_len = len; 799 800 /* 801 * enough space, shift 802 */ 803 while (src >= start) 804 *dest-- = *src--; 805 src = dir_name + dir_len - 1; 806 807 /* 808 * splice in the destination directory name 809 */ 810 while (src >= dir_name) 811 *dest-- = *src--; 812 813 *(or_name + len) = '\0'; 814 return(0); 815 } 816 817 /* 818 * rep_name() 819 * walk down the list of replacement strings applying each one in order. 820 * when we find one with a successful substitution, we modify the name 821 * as specified. if required, we print the results. if the resulting name 822 * is empty, we will skip this archive member. We use the regexp(3) 823 * routines (regexp() ought to win a prize as having the most cryptic 824 * library function manual page). 825 * --Parameters-- 826 * name is the file name we are going to apply the regular expressions to 827 * (and may be modified) 828 * nlen is the length of this name (and is modified to hold the length of 829 * the final string). 830 * prnt is a flag that says whether to print the final result. 831 * Return: 832 * 0 if substitution was successful, 1 if we are to skip the file (the name 833 * ended up empty) 834 */ 835 836 static int 837 rep_name(char *name, int *nlen, int prnt) 838 { 839 REPLACE *pt; 840 char *inpt; 841 char *outpt; 842 char *endpt; 843 char *rpt; 844 int found = 0; 845 int res; 846 regmatch_t pm[MAXSUBEXP]; 847 char nname[PAXPATHLEN+1]; /* final result of all replacements */ 848 char buf1[PAXPATHLEN+1]; /* where we work on the name */ 849 850 /* 851 * copy the name into buf1, where we will work on it. We need to keep 852 * the orig string around so we can print out the result of the final 853 * replacement. We build up the final result in nname. inpt points at 854 * the string we apply the regular expression to. prnt is used to 855 * suppress printing when we handle replacements on the link field 856 * (the user already saw that substitution go by) 857 */ 858 pt = rephead; 859 strcpy(buf1, name); 860 inpt = buf1; 861 outpt = nname; 862 endpt = outpt + PAXPATHLEN; 863 864 /* 865 * try each replacement string in order 866 */ 867 while (pt != NULL) { 868 do { 869 /* 870 * check for a successful substitution, if not go to 871 * the next pattern, or cleanup if we were global 872 */ 873 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) 874 break; 875 876 /* 877 * ok we found one. We have three parts, the prefix 878 * which did not match, the section that did and the 879 * tail (that also did not match). Copy the prefix to 880 * the final output buffer (watching to make sure we 881 * do not create a string too long). 882 */ 883 found = 1; 884 rpt = inpt + pm[0].rm_so; 885 886 while ((inpt < rpt) && (outpt < endpt)) 887 *outpt++ = *inpt++; 888 if (outpt == endpt) 889 break; 890 891 /* 892 * for the second part (which matched the regular 893 * expression) apply the substitution using the 894 * replacement string and place it the prefix in the 895 * final output. If we have problems, skip it. 896 */ 897 if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) 898 < 0) { 899 if (prnt) 900 paxwarn(1, "Replacement name error %s", 901 name); 902 return(1); 903 } 904 outpt += res; 905 906 /* 907 * we set up to look again starting at the first 908 * character in the tail (of the input string right 909 * after the last character matched by the regular 910 * expression (inpt always points at the first char in 911 * the string to process). If we are not doing a global 912 * substitution, we will use inpt to copy the tail to 913 * the final result. Make sure we do not overrun the 914 * output buffer 915 */ 916 inpt += pm[0].rm_eo - pm[0].rm_so; 917 918 if ((outpt == endpt) || (*inpt == '\0')) 919 break; 920 921 /* 922 * if the user wants global we keep trying to 923 * substitute until it fails, then we are done. 924 */ 925 } while (pt->flgs & GLOB); 926 927 if (found) 928 break; 929 930 /* 931 * a successful substitution did NOT occur, try the next one 932 */ 933 pt = pt->fow; 934 } 935 936 if (found) { 937 /* 938 * we had a substitution, copy the last tail piece (if there is 939 * room) to the final result 940 */ 941 while ((outpt < endpt) && (*inpt != '\0')) 942 *outpt++ = *inpt++; 943 944 *outpt = '\0'; 945 if ((outpt == endpt) && (*inpt != '\0')) { 946 if (prnt) 947 paxwarn(1,"Replacement name too long %s >> %s", 948 name, nname); 949 return(1); 950 } 951 952 /* 953 * inform the user of the result if wanted 954 */ 955 if (prnt && (pt->flgs & PRNT)) { 956 if (*nname == '\0') 957 fprintf(stderr,"%s >> <empty string>\n", 958 name); 959 else 960 fprintf(stderr,"%s >> %s\n", name, nname); 961 } 962 963 /* 964 * if empty inform the caller this file is to be skipped 965 * otherwise copy the new name over the orig name and return 966 */ 967 if (*nname == '\0') 968 return(1); 969 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); 970 name[PAXPATHLEN] = '\0'; 971 } 972 return(0); 973 } 974 975 /* 976 * resub() 977 * apply the replacement to the matched expression. expand out the old 978 * style ed(1) subexpression expansion. 979 * Return: 980 * -1 if error, or the number of characters added to the destination. 981 */ 982 983 static int 984 resub(regex_t *rp, regmatch_t *pm, char *src, char *dest, 985 char *destend) 986 { 987 char *spt; 988 char *dpt; 989 char c; 990 regmatch_t *pmpt; 991 int len; 992 int subexcnt; 993 994 spt = src; 995 dpt = dest; 996 subexcnt = rp->re_nsub; 997 while ((dpt < destend) && ((c = *spt++) != '\0')) { 998 /* 999 * see if we just have an ordinary replacement character 1000 * or we refer to a subexpression. 1001 */ 1002 if (c == '&') { 1003 pmpt = pm; 1004 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { 1005 /* 1006 * make sure there is a subexpression as specified 1007 */ 1008 if ((len = *spt++ - '0') > subexcnt) 1009 return(-1); 1010 pmpt = pm + len; 1011 } else { 1012 /* 1013 * Ordinary character, just copy it 1014 */ 1015 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1016 c = *spt++; 1017 *dpt++ = c; 1018 continue; 1019 } 1020 1021 /* 1022 * continue if the subexpression is bogus 1023 */ 1024 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || 1025 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) 1026 continue; 1027 1028 /* 1029 * copy the subexpression to the destination. 1030 * fail if we run out of space or the match string is damaged 1031 */ 1032 if (len > (destend - dpt)) 1033 len = destend - dpt; 1034 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) 1035 return(-1); 1036 dpt += len; 1037 } 1038 return(dpt - dest); 1039 } 1040