xref: /freebsd/bin/pax/pat_rep.c (revision e043f372)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <regex.h>
42 #include "pax.h"
43 #include "pat_rep.h"
44 #include "extern.h"
45 
46 /*
47  * routines to handle pattern matching, name modification (regular expression
48  * substitution and interactive renames), and destination name modification for
49  * copy (-rw). Both file name and link names are adjusted as required in these
50  * routines.
51  */
52 
53 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
54 static PATTERN *pathead = NULL;		/* file pattern match list head */
55 static PATTERN *pattail = NULL;		/* file pattern match list tail */
56 static REPLACE *rephead = NULL;		/* replacement string list head */
57 static REPLACE *reptail = NULL;		/* replacement string list tail */
58 
59 static int rep_name(char *, int *, int);
60 static int tty_rename(ARCHD *);
61 static int fix_path(char *, int *, char *, int);
62 static int fn_match(char *, char *, char **);
63 static char * range_match(char *, int);
64 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
65 
66 /*
67  * rep_add()
68  *	parses the -s replacement string; compiles the regular expression
69  *	and stores the compiled value and it's replacement string together in
70  *	replacement string list. Input to this function is of the form:
71  *		/old/new/pg
72  *	The first char in the string specifies the delimiter used by this
73  *	replacement string. "Old" is a regular expression in "ed" format which
74  *	is compiled by regcomp() and is applied to filenames. "new" is the
75  *	substitution string; p and g are options flags for printing and global
76  *	replacement (over the single filename)
77  * Return:
78  *	0 if a proper replacement string and regular expression was added to
79  *	the list of replacement patterns; -1 otherwise.
80  */
81 
82 int
rep_add(char * str)83 rep_add(char *str)
84 {
85 	char *pt1;
86 	char *pt2;
87 	REPLACE *rep;
88 	int res;
89 	char rebuf[BUFSIZ];
90 
91 	/*
92 	 * throw out the bad parameters
93 	 */
94 	if ((str == NULL) || (*str == '\0')) {
95 		paxwarn(1, "Empty replacement string");
96 		return(-1);
97 	}
98 
99 	/*
100 	 * first character in the string specifies what the delimiter is for
101 	 * this expression
102 	 */
103 	if ((pt1 = strchr(str+1, *str)) == NULL) {
104 		paxwarn(1, "Invalid replacement string %s", str);
105 		return(-1);
106 	}
107 
108 	/*
109 	 * allocate space for the node that handles this replacement pattern
110 	 * and split out the regular expression and try to compile it
111 	 */
112 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
113 		paxwarn(1, "Unable to allocate memory for replacement string");
114 		return(-1);
115 	}
116 
117 	*pt1 = '\0';
118 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
119 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
120 		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
121 		free(rep);
122 		return(-1);
123 	}
124 
125 	/*
126 	 * put the delimiter back in case we need an error message and
127 	 * locate the delimiter at the end of the replacement string
128 	 * we then point the node at the new substitution string
129 	 */
130 	*pt1++ = *str;
131 	if ((pt2 = strchr(pt1, *str)) == NULL) {
132 		regfree(&rep->rcmp);
133 		free(rep);
134 		paxwarn(1, "Invalid replacement string %s", str);
135 		return(-1);
136 	}
137 
138 	*pt2 = '\0';
139 	rep->nstr = pt1;
140 	pt1 = pt2++;
141 	rep->flgs = 0;
142 
143 	/*
144 	 * set the options if any
145 	 */
146 	while (*pt2 != '\0') {
147 		switch(*pt2) {
148 		case 'g':
149 		case 'G':
150 			rep->flgs  |= GLOB;
151 			break;
152 		case 'p':
153 		case 'P':
154 			rep->flgs  |= PRNT;
155 			break;
156 		default:
157 			regfree(&rep->rcmp);
158 			free(rep);
159 			*pt1 = *str;
160 			paxwarn(1, "Invalid replacement string option %s", str);
161 			return(-1);
162 		}
163 		++pt2;
164 	}
165 
166 	/*
167 	 * all done, link it in at the end
168 	 */
169 	rep->fow = NULL;
170 	if (rephead == NULL) {
171 		reptail = rephead = rep;
172 		return(0);
173 	}
174 	reptail->fow = rep;
175 	reptail = rep;
176 	return(0);
177 }
178 
179 /*
180  * pat_add()
181  *	add a pattern match to the pattern match list. Pattern matches are used
182  *	to select which archive members are extracted. (They appear as
183  *	arguments to pax in the list and read modes). If no patterns are
184  *	supplied to pax, all members in the archive will be selected (and the
185  *	pattern match list is empty).
186  * Return:
187  *	0 if the pattern was added to the list, -1 otherwise
188  */
189 
190 int
pat_add(char * str,char * chdnam)191 pat_add(char *str, char *chdnam)
192 {
193 	PATTERN *pt;
194 
195 	/*
196 	 * throw out the junk
197 	 */
198 	if ((str == NULL) || (*str == '\0')) {
199 		paxwarn(1, "Empty pattern string");
200 		return(-1);
201 	}
202 
203 	/*
204 	 * allocate space for the pattern and store the pattern. the pattern is
205 	 * part of argv so do not bother to copy it, just point at it. Add the
206 	 * node to the end of the pattern list
207 	 */
208 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
209 		paxwarn(1, "Unable to allocate memory for pattern string");
210 		return(-1);
211 	}
212 
213 	pt->pstr = str;
214 	pt->pend = NULL;
215 	pt->plen = strlen(str);
216 	pt->fow = NULL;
217 	pt->flgs = 0;
218 	pt->chdname = chdnam;
219 
220 	if (pathead == NULL) {
221 		pattail = pathead = pt;
222 		return(0);
223 	}
224 	pattail->fow = pt;
225 	pattail = pt;
226 	return(0);
227 }
228 
229 /*
230  * pat_chk()
231  *	complain if any the user supplied pattern did not result in a match to
232  *	a selected archive member.
233  */
234 
235 void
pat_chk(void)236 pat_chk(void)
237 {
238 	PATTERN *pt;
239 	int wban = 0;
240 
241 	/*
242 	 * walk down the list checking the flags to make sure MTCH was set,
243 	 * if not complain
244 	 */
245 	for (pt = pathead; pt != NULL; pt = pt->fow) {
246 		if (pt->flgs & MTCH)
247 			continue;
248 		if (!wban) {
249 			paxwarn(1, "WARNING! These patterns were not matched:");
250 			++wban;
251 		}
252 		(void)fprintf(stderr, "%s\n", pt->pstr);
253 	}
254 }
255 
256 /*
257  * pat_sel()
258  *	the archive member which matches a pattern was selected. Mark the
259  *	pattern as having selected an archive member. arcn->pat points at the
260  *	pattern that was matched. arcn->pat is set in pat_match()
261  *
262  *	NOTE: When the -c option is used, we are called when there was no match
263  *	by pat_match() (that means we did match before the inverted sense of
264  *	the logic). Now this seems really strange at first, but with -c  we
265  *	need to keep track of those patterns that cause an archive member to NOT
266  *	be selected (it found an archive member with a specified pattern)
267  * Return:
268  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
269  *	match, -1 otherwise.
270  */
271 
272 int
pat_sel(ARCHD * arcn)273 pat_sel(ARCHD *arcn)
274 {
275 	PATTERN *pt;
276 	PATTERN **ppt;
277 	int len;
278 
279 	/*
280 	 * if no patterns just return
281 	 */
282 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
283 		return(0);
284 
285 	/*
286 	 * when we are NOT limited to a single match per pattern mark the
287 	 * pattern and return
288 	 */
289 	if (!nflag) {
290 		pt->flgs |= MTCH;
291 		return(0);
292 	}
293 
294 	/*
295 	 * we reach this point only when we allow a single selected match per
296 	 * pattern, if the pattern matches a directory and we do not have -d
297 	 * (dflag) we are done with this pattern. We may also be handed a file
298 	 * in the subtree of a directory. in that case when we are operating
299 	 * with -d, this pattern was already selected and we are done
300 	 */
301 	if (pt->flgs & DIR_MTCH)
302 		return(0);
303 
304 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
305 		/*
306 		 * ok we matched a directory and we are allowing
307 		 * subtree matches but because of the -n only its children will
308 		 * match. This is tagged as a DIR_MTCH type.
309 		 * WATCH IT, the code assumes that pt->pend points
310 		 * into arcn->name and arcn->name has not been modified.
311 		 * If not we will have a big mess. Yup this is another kludge
312 		 */
313 
314 		/*
315 		 * if this was a prefix match, remove trailing part of path
316 		 * so we can copy it. Future matches will be exact prefix match
317 		 */
318 		if (pt->pend != NULL)
319 			*pt->pend = '\0';
320 
321 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
322 			paxwarn(1, "Pattern select out of memory");
323 			if (pt->pend != NULL)
324 				*pt->pend = '/';
325 			pt->pend = NULL;
326 			return(-1);
327 		}
328 
329 		/*
330 		 * put the trailing / back in the source string
331 		 */
332 		if (pt->pend != NULL) {
333 			*pt->pend = '/';
334 			pt->pend = NULL;
335 		}
336 		pt->plen = strlen(pt->pstr);
337 
338 		/*
339 		 * strip off any trailing /, this should really never happen
340 		 */
341 		len = pt->plen - 1;
342 		if (*(pt->pstr + len) == '/') {
343 			*(pt->pstr + len) = '\0';
344 			pt->plen = len;
345 		}
346 		pt->flgs = DIR_MTCH | MTCH;
347 		arcn->pat = pt;
348 		return(0);
349 	}
350 
351 	/*
352 	 * we are then done with this pattern, so we delete it from the list
353 	 * because it can never be used for another match.
354 	 * Seems kind of strange to do for a -c, but the pax spec is really
355 	 * vague on the interaction of -c -n and -d. We assume that when -c
356 	 * and the pattern rejects a member (i.e. it matched it) it is done.
357 	 * In effect we place the order of the flags as having -c last.
358 	 */
359 	pt = pathead;
360 	ppt = &pathead;
361 	while ((pt != NULL) && (pt != arcn->pat)) {
362 		ppt = &(pt->fow);
363 		pt = pt->fow;
364 	}
365 
366 	if (pt == NULL) {
367 		/*
368 		 * should never happen....
369 		 */
370 		paxwarn(1, "Pattern list inconsistent");
371 		return(-1);
372 	}
373 	*ppt = pt->fow;
374 	free(pt);
375 	arcn->pat = NULL;
376 	return(0);
377 }
378 
379 /*
380  * pat_match()
381  *	see if this archive member matches any supplied pattern, if a match
382  *	is found, arcn->pat is set to point at the potential pattern. Later if
383  *	this archive member is "selected" we process and mark the pattern as
384  *	one which matched a selected archive member (see pat_sel())
385  * Return:
386  *	0 if this archive member should be processed, 1 if it should be
387  *	skipped and -1 if we are done with all patterns (and pax should quit
388  *	looking for more members)
389  */
390 
391 int
pat_match(ARCHD * arcn)392 pat_match(ARCHD *arcn)
393 {
394 	PATTERN *pt;
395 
396 	arcn->pat = NULL;
397 
398 	/*
399 	 * if there are no more patterns and we have -n (and not -c) we are
400 	 * done. otherwise with no patterns to match, matches all
401 	 */
402 	if (pathead == NULL) {
403 		if (nflag && !cflag)
404 			return(-1);
405 		return(0);
406 	}
407 
408 	/*
409 	 * have to search down the list one at a time looking for a match.
410 	 */
411 	pt = pathead;
412 	while (pt != NULL) {
413 		/*
414 		 * check for a file name match unless we have DIR_MTCH set in
415 		 * this pattern then we want a prefix match
416 		 */
417 		if (pt->flgs & DIR_MTCH) {
418 			/*
419 			 * this pattern was matched before to a directory
420 			 * as we must have -n set for this (but not -d). We can
421 			 * only match CHILDREN of that directory so we must use
422 			 * an exact prefix match (no wildcards).
423 			 */
424 			if ((arcn->name[pt->plen] == '/') &&
425 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
426 				break;
427 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
428 			break;
429 		pt = pt->fow;
430 	}
431 
432 	/*
433 	 * return the result, remember that cflag (-c) inverts the sense of a
434 	 * match
435 	 */
436 	if (pt == NULL)
437 		return(cflag ? 0 : 1);
438 
439 	/*
440 	 * We had a match, now when we invert the sense (-c) we reject this
441 	 * member. However we have to tag the pattern a being successful, (in a
442 	 * match, not in selecting an archive member) so we call pat_sel() here.
443 	 */
444 	arcn->pat = pt;
445 	if (!cflag)
446 		return(0);
447 
448 	if (pat_sel(arcn) < 0)
449 		return(-1);
450 	arcn->pat = NULL;
451 	return(1);
452 }
453 
454 /*
455  * fn_match()
456  * Return:
457  *	0 if this archive member should be processed, 1 if it should be
458  *	skipped and -1 if we are done with all patterns (and pax should quit
459  *	looking for more members)
460  *	Note: *pend may be changed to show where the prefix ends.
461  */
462 
463 static int
fn_match(char * pattern,char * string,char ** pend)464 fn_match(char *pattern, char *string, char **pend)
465 {
466 	char c;
467 	char test;
468 
469 	*pend = NULL;
470 	for (;;) {
471 		switch (c = *pattern++) {
472 		case '\0':
473 			/*
474 			 * Ok we found an exact match
475 			 */
476 			if (*string == '\0')
477 				return(0);
478 
479 			/*
480 			 * Check if it is a prefix match
481 			 */
482 			if ((dflag == 1) || (*string != '/'))
483 				return(-1);
484 
485 			/*
486 			 * It is a prefix match, remember where the trailing
487 			 * / is located
488 			 */
489 			*pend = string;
490 			return(0);
491 		case '?':
492 			if ((test = *string++) == '\0')
493 				return (-1);
494 			break;
495 		case '*':
496 			c = *pattern;
497 			/*
498 			 * Collapse multiple *'s.
499 			 */
500 			while (c == '*')
501 				c = *++pattern;
502 
503 			/*
504 			 * Optimized hack for pattern with a * at the end
505 			 */
506 			if (c == '\0')
507 				return (0);
508 
509 			/*
510 			 * General case, use recursion.
511 			 */
512 			while ((test = *string) != '\0') {
513 				if (!fn_match(pattern, string, pend))
514 					return (0);
515 				++string;
516 			}
517 			return (-1);
518 		case '[':
519 			/*
520 			 * range match
521 			 */
522 			if (((test = *string++) == '\0') ||
523 			    ((pattern = range_match(pattern, test)) == NULL))
524 				return (-1);
525 			break;
526 		case '\\':
527 		default:
528 			if (c != *string++)
529 				return (-1);
530 			break;
531 		}
532 	}
533 	/* NOTREACHED */
534 }
535 
536 static char *
range_match(char * pattern,int test)537 range_match(char *pattern, int test)
538 {
539 	char c;
540 	char c2;
541 	int negate;
542 	int ok = 0;
543 
544 	if ((negate = (*pattern == '!')) != 0)
545 		++pattern;
546 
547 	while ((c = *pattern++) != ']') {
548 		/*
549 		 * Illegal pattern
550 		 */
551 		if (c == '\0')
552 			return (NULL);
553 
554 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
555 		    (c2 != ']')) {
556 			if ((c <= test) && (test <= c2))
557 				ok = 1;
558 			pattern += 2;
559 		} else if (c == test)
560 			ok = 1;
561 	}
562 	return (ok == negate ? NULL : pattern);
563 }
564 
565 /*
566  * mod_name()
567  *	modify a selected file name. first attempt to apply replacement string
568  *	expressions, then apply interactive file rename. We apply replacement
569  *	string expressions to both filenames and file links (if we didn't the
570  *	links would point to the wrong place, and we could never be able to
571  *	move an archive that has a file link in it). When we rename files
572  *	interactively, we store that mapping (old name to user input name) so
573  *	if we spot any file links to the old file name in the future, we will
574  *	know exactly how to fix the file link.
575  * Return:
576  *	0 continue to  process file, 1 skip this file, -1 pax is finished
577  */
578 
579 int
mod_name(ARCHD * arcn)580 mod_name(ARCHD *arcn)
581 {
582 	int res = 0;
583 
584 	/*
585 	 * Strip off leading '/' if appropriate.
586 	 * Currently, this option is only set for the tar format.
587 	 */
588 	if (rmleadslash && arcn->name[0] == '/') {
589 		if (arcn->name[1] == '\0') {
590 			arcn->name[0] = '.';
591 		} else {
592 			(void)memmove(arcn->name, &arcn->name[1],
593 			    strlen(arcn->name));
594 			arcn->nlen--;
595 		}
596 		if (rmleadslash < 2) {
597 			rmleadslash = 2;
598 			paxwarn(0, "Removing leading / from absolute path names in the archive");
599 		}
600 	}
601 	if (rmleadslash && arcn->ln_name[0] == '/' &&
602 	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
603 		if (arcn->ln_name[1] == '\0') {
604 			arcn->ln_name[0] = '.';
605 		} else {
606 			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
607 			    strlen(arcn->ln_name));
608 			arcn->ln_nlen--;
609 		}
610 		if (rmleadslash < 2) {
611 			rmleadslash = 2;
612 			paxwarn(0, "Removing leading / from absolute path names in the archive");
613 		}
614 	}
615 
616 	/*
617 	 * IMPORTANT: We have a problem. what do we do with symlinks?
618 	 * Modifying a hard link name makes sense, as we know the file it
619 	 * points at should have been seen already in the archive (and if it
620 	 * wasn't seen because of a read error or a bad archive, we lose
621 	 * anyway). But there are no such requirements for symlinks. On one
622 	 * hand the symlink that refers to a file in the archive will have to
623 	 * be modified to so it will still work at its new location in the
624 	 * file system. On the other hand a symlink that points elsewhere (and
625 	 * should continue to do so) should not be modified. There is clearly
626 	 * no perfect solution here. So we handle them like hardlinks. Clearly
627 	 * a replacement made by the interactive rename mapping is very likely
628 	 * to be correct since it applies to a single file and is an exact
629 	 * match. The regular expression replacements are a little harder to
630 	 * justify though. We claim that the symlink name is only likely
631 	 * to be replaced when it points within the file tree being moved and
632 	 * in that case it should be modified. what we really need to do is to
633 	 * call an oracle here. :)
634 	 */
635 	if (rephead != NULL) {
636 		/*
637 		 * we have replacement strings, modify the name and the link
638 		 * name if any.
639 		 */
640 		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
641 			return(res);
642 
643 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
644 		    (arcn->type == PAX_HRG)) &&
645 		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
646 			return(res);
647 	}
648 
649 	if (iflag) {
650 		/*
651 		 * perform interactive file rename, then map the link if any
652 		 */
653 		if ((res = tty_rename(arcn)) != 0)
654 			return(res);
655 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
656 		    (arcn->type == PAX_HRG))
657 			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
658 	}
659 	return(res);
660 }
661 
662 /*
663  * tty_rename()
664  *	Prompt the user for a replacement file name. A "." keeps the old name,
665  *	a empty line skips the file, and an EOF on reading the tty, will cause
666  *	pax to stop processing and exit. Otherwise the file name input, replaces
667  *	the old one.
668  * Return:
669  *	0 process this file, 1 skip this file, -1 we need to exit pax
670  */
671 
672 static int
tty_rename(ARCHD * arcn)673 tty_rename(ARCHD *arcn)
674 {
675 	char tmpname[PAXPATHLEN+2];
676 	int res;
677 
678 	/*
679 	 * prompt user for the replacement name for a file, keep trying until
680 	 * we get some reasonable input. Archives may have more than one file
681 	 * on them with the same name (from updates etc). We print verbose info
682 	 * on the file so the user knows what is up.
683 	 */
684 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
685 
686 	for (;;) {
687 		ls_tty(arcn);
688 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
689 		tty_prnt("or a \"return\" to skip this file.\n");
690 		tty_prnt("Input > ");
691 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
692 			return(-1);
693 		if (strcmp(tmpname, "..") == 0) {
694 			tty_prnt("Try again, illegal file name: ..\n");
695 			continue;
696 		}
697 		if (strlen(tmpname) > PAXPATHLEN) {
698 			tty_prnt("Try again, file name too long\n");
699 			continue;
700 		}
701 		break;
702 	}
703 
704 	/*
705 	 * empty file name, skips this file. a "." leaves it alone
706 	 */
707 	if (tmpname[0] == '\0') {
708 		tty_prnt("Skipping file.\n");
709 		return(1);
710 	}
711 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
712 		tty_prnt("Processing continues, name unchanged.\n");
713 		return(0);
714 	}
715 
716 	/*
717 	 * ok the name changed. We may run into links that point at this
718 	 * file later. we have to remember where the user sent the file
719 	 * in order to repair any links.
720 	 */
721 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
722 	res = add_name(arcn->name, arcn->nlen, tmpname);
723 	arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
724 	arcn->name[arcn->nlen] = '\0';
725 	if (res < 0)
726 		return(-1);
727 	return(0);
728 }
729 
730 /*
731  * set_dest()
732  *	fix up the file name and the link name (if any) so this file will land
733  *	in the destination directory (used during copy() -rw).
734  * Return:
735  *	0 if ok, -1 if failure (name too long)
736  */
737 
738 int
set_dest(ARCHD * arcn,char * dest_dir,int dir_len)739 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
740 {
741 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
742 		return(-1);
743 
744 	/*
745 	 * It is really hard to deal with symlinks here, we cannot be sure
746 	 * if the name they point was moved (or will be moved). It is best to
747 	 * leave them alone.
748 	 */
749 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
750 		return(0);
751 
752 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
753 		return(-1);
754 	return(0);
755 }
756 
757 /*
758  * fix_path
759  *	concatenate dir_name and or_name and store the result in or_name (if
760  *	it fits). This is one ugly function.
761  * Return:
762  *	0 if ok, -1 if the final name is too long
763  */
764 
765 static int
fix_path(char * or_name,int * or_len,char * dir_name,int dir_len)766 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
767 {
768 	char *src;
769 	char *dest;
770 	char *start;
771 	int len;
772 
773 	/*
774 	 * we shift the or_name to the right enough to tack in the dir_name
775 	 * at the front. We make sure we have enough space for it all before
776 	 * we start. since dest always ends in a slash, we skip of or_name
777 	 * if it also starts with one.
778 	 */
779 	start = or_name;
780 	src = start + *or_len;
781 	dest = src + dir_len;
782 	if (*start == '/') {
783 		++start;
784 		--dest;
785 	}
786 	if ((len = dest - or_name) > PAXPATHLEN) {
787 		paxwarn(1, "File name %s/%s, too long", dir_name, start);
788 		return(-1);
789 	}
790 	*or_len = len;
791 
792 	/*
793 	 * enough space, shift
794 	 */
795 	while (src >= start)
796 		*dest-- = *src--;
797 	src = dir_name + dir_len - 1;
798 
799 	/*
800 	 * splice in the destination directory name
801 	 */
802 	while (src >= dir_name)
803 		*dest-- = *src--;
804 
805 	*(or_name + len) = '\0';
806 	return(0);
807 }
808 
809 /*
810  * rep_name()
811  *	walk down the list of replacement strings applying each one in order.
812  *	when we find one with a successful substitution, we modify the name
813  *	as specified. if required, we print the results. if the resulting name
814  *	is empty, we will skip this archive member. We use the regexp(3)
815  *	routines (regexp() ought to win a prize as having the most cryptic
816  *	library function manual page).
817  *	--Parameters--
818  *	name is the file name we are going to apply the regular expressions to
819  *	(and may be modified)
820  *	nlen is the length of this name (and is modified to hold the length of
821  *	the final string).
822  *	prnt is a flag that says whether to print the final result.
823  * Return:
824  *	0 if substitution was successful, 1 if we are to skip the file (the name
825  *	ended up empty)
826  */
827 
828 static int
rep_name(char * name,int * nlen,int prnt)829 rep_name(char *name, int *nlen, int prnt)
830 {
831 	REPLACE *pt;
832 	char *inpt;
833 	char *outpt;
834 	char *endpt;
835 	char *rpt;
836 	int found = 0;
837 	int res;
838 	regmatch_t pm[MAXSUBEXP];
839 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
840 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
841 
842 	/*
843 	 * copy the name into buf1, where we will work on it. We need to keep
844 	 * the orig string around so we can print out the result of the final
845 	 * replacement. We build up the final result in nname. inpt points at
846 	 * the string we apply the regular expression to. prnt is used to
847 	 * suppress printing when we handle replacements on the link field
848 	 * (the user already saw that substitution go by)
849 	 */
850 	pt = rephead;
851 	(void)strlcpy(buf1, name, sizeof(buf1));
852 	inpt = buf1;
853 	outpt = nname;
854 	endpt = outpt + PAXPATHLEN;
855 
856 	/*
857 	 * try each replacement string in order
858 	 */
859 	while (pt != NULL) {
860 		do {
861 			/*
862 			 * check for a successful substitution, if not go to
863 			 * the next pattern, or cleanup if we were global
864 			 */
865 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
866 				break;
867 
868 			/*
869 			 * ok we found one. We have three parts, the prefix
870 			 * which did not match, the section that did and the
871 			 * tail (that also did not match). Copy the prefix to
872 			 * the final output buffer (watching to make sure we
873 			 * do not create a string too long).
874 			 */
875 			found = 1;
876 			rpt = inpt + pm[0].rm_so;
877 
878 			while ((inpt < rpt) && (outpt < endpt))
879 				*outpt++ = *inpt++;
880 			if (outpt == endpt)
881 				break;
882 
883 			/*
884 			 * for the second part (which matched the regular
885 			 * expression) apply the substitution using the
886 			 * replacement string and place it the prefix in the
887 			 * final output. If we have problems, skip it.
888 			 */
889 			if ((res = resub(&(pt->rcmp),pm,inpt,pt->nstr,outpt,endpt))
890 			    < 0) {
891 				if (prnt)
892 					paxwarn(1, "Replacement name error %s",
893 					    name);
894 				return(1);
895 			}
896 			outpt += res;
897 
898 			/*
899 			 * we set up to look again starting at the first
900 			 * character in the tail (of the input string right
901 			 * after the last character matched by the regular
902 			 * expression (inpt always points at the first char in
903 			 * the string to process). If we are not doing a global
904 			 * substitution, we will use inpt to copy the tail to
905 			 * the final result. Make sure we do not overrun the
906 			 * output buffer
907 			 */
908 			inpt += pm[0].rm_eo - pm[0].rm_so;
909 
910 			if ((outpt == endpt) || (*inpt == '\0'))
911 				break;
912 
913 			/*
914 			 * if the user wants global we keep trying to
915 			 * substitute until it fails, then we are done.
916 			 */
917 		} while (pt->flgs & GLOB);
918 
919 		if (found)
920 			break;
921 
922 		/*
923 		 * a successful substitution did NOT occur, try the next one
924 		 */
925 		pt = pt->fow;
926 	}
927 
928 	if (found) {
929 		/*
930 		 * we had a substitution, copy the last tail piece (if there is
931 		 * room) to the final result
932 		 */
933 		while ((outpt < endpt) && (*inpt != '\0'))
934 			*outpt++ = *inpt++;
935 
936 		*outpt = '\0';
937 		if ((outpt == endpt) && (*inpt != '\0')) {
938 			if (prnt)
939 				paxwarn(1,"Replacement name too long %s >> %s",
940 				    name, nname);
941 			return(1);
942 		}
943 
944 		/*
945 		 * inform the user of the result if wanted
946 		 */
947 		if (prnt && (pt->flgs & PRNT)) {
948 			if (*nname == '\0')
949 				(void)fprintf(stderr,"%s >> <empty string>\n",
950 				    name);
951 			else
952 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
953 		}
954 
955 		/*
956 		 * if empty inform the caller this file is to be skipped
957 		 * otherwise copy the new name over the orig name and return
958 		 */
959 		if (*nname == '\0')
960 			return(1);
961 		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
962 		name[PAXPATHLEN] = '\0';
963 	}
964 	return(0);
965 }
966 
967 
968 /*
969  * resub()
970  *	apply the replacement to the matched expression. expand out the old
971  * 	style ed(1) subexpression expansion.
972  * Return:
973  *	-1 if error, or the number of characters added to the destination.
974  */
975 
976 static int
resub(regex_t * rp,regmatch_t * pm,char * orig,char * src,char * dest,char * destend)977 resub(regex_t *rp, regmatch_t *pm, char *orig, char *src, char *dest,
978 	char *destend)
979 {
980 	char *spt;
981 	char *dpt;
982 	char c;
983 	regmatch_t *pmpt;
984 	int len;
985 	int subexcnt;
986 
987 	spt =  src;
988 	dpt = dest;
989 	subexcnt = rp->re_nsub;
990 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
991 		/*
992 		 * see if we just have an ordinary replacement character
993 		 * or we refer to a subexpression.
994 		 */
995 		if (c == '&') {
996 			pmpt = pm;
997 		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
998 			/*
999 			 * make sure there is a subexpression as specified
1000 			 */
1001 			if ((len = *spt++ - '0') > subexcnt)
1002 				return(-1);
1003 			pmpt = pm + len;
1004 		} else {
1005  			/*
1006 			 * Ordinary character, just copy it
1007 			 */
1008  			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1009  				c = *spt++;
1010  			*dpt++ = c;
1011 			continue;
1012 		}
1013 
1014 		/*
1015 		 * continue if the subexpression is bogus
1016 		 */
1017 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1018 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1019 			continue;
1020 
1021 		/*
1022 		 * copy the subexpression to the destination.
1023 		 * fail if we run out of space or the match string is damaged
1024 		 */
1025 		if (len > (destend - dpt))
1026 			len = destend - dpt;
1027 		if (l_strncpy(dpt, orig + pmpt->rm_so, len) != len)
1028 			return(-1);
1029 		dpt += len;
1030 	}
1031 	return(dpt - dest);
1032 }
1033