1 /* @(#)subst.c	1.27 20/06/11 Copyright 1986,2003-2020 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)subst.c	1.27 20/06/11 Copyright 1986,2003-2020 J. Schilling";
6 #endif
7 /*
8  *	Substitution commands
9  *
10  *	Copyright (c) 1986,2003-2020 J. Schilling
11  */
12 /*
13  * The contents of this file are subject to the terms of the
14  * Common Development and Distribution License, Version 1.0 only
15  * (the "License").  You may not use this file except in compliance
16  * with the License.
17  *
18  * See the file CDDL.Schily.txt in this distribution for details.
19  * A copy of the CDDL is also available via the Internet at
20  * http://www.opensource.org/licenses/cddl1.txt
21  *
22  * When distributing Covered Code, include this CDDL HEADER in each
23  * file and include the License file CDDL.Schily.txt from this distribution.
24  */
25 
26 #include <schily/stdio.h>
27 #include <schily/stdlib.h>
28 #include <schily/standard.h>
29 #include <schily/patmatch.h>
30 #include <schily/string.h>
31 #include <schily/utypes.h>
32 #define	GT_COMERR		/* #define comerr gtcomerr */
33 #define	GT_ERROR		/* #define error gterror   */
34 #include <schily/schily.h>
35 
36 #include <schily/patmatch.h>
37 #ifdef	HAVE_REGEX_H
38 #include <regex.h>
39 #endif
40 
41 #include "star.h"
42 #include "starsubs.h"
43 #include "pathname.h"
44 
45 EXPORT	int	paxpsubst	__PR((char *cmd, BOOL *arg));
46 EXPORT	int	parsesubst	__PR((char *cmd, BOOL *arg));
47 LOCAL	int	_parsesubst	__PR((char *cmd, BOOL *arg, BOOL paxmode));
48 EXPORT	BOOL	subst		__PR((FINFO *info));
49 LOCAL	char	*substitute	__PR((char *from, long fromlen, int idx, char *to, long tolen));
50 LOCAL	BOOL	simpleto	__PR((char *s, long len));
51 #ifdef	HAVE_REGEX_H
52 LOCAL	int	catsub		__PR((char *here, char *old, long oldlen,
53 					char *to, long tolen, char *limit, regmatch_t *));
54 #else
55 LOCAL	int	catsub		__PR((char *here, char *old, long oldlen,
56 					char *to, long tolen, char *limit));
57 #endif
58 EXPORT	BOOL	ia_change	__PR((TCB *ptb, FINFO *info));
59 LOCAL	BOOL	pax_change	__PR((TCB *ptb, FINFO *info));
60 LOCAL	void	s_enomem	__PR((void));
61 EXPORT	int	fpgetstr	__PR((FILE *, pathstore_t *));
62 
63 #define	NPAT	100
64 LOCAL	int	npat;		/* Number of defined patterns */
65 LOCAL	Uchar	*pat[NPAT];	/* Saved list of defined 'from' patterns */
66 LOCAL	int	patlen[NPAT];	/* Length of the 'from' pattern */
67 LOCAL	int	maxplen;	/* Maximum length of 'from' pattern */
68 LOCAL	char	*substpat[NPAT]; /* Saved list of defined 'to' patterns */
69 LOCAL	int	substlen[NPAT];	/* Length of the 'to' pattern */
70 LOCAL	int	*aux[NPAT];	/* Aux array (compiled pattern) */
71 LOCAL	int	alt[NPAT];	/* List of results from patcompile() */
72 LOCAL	int	*state;		/* State array used by patmatch() */
73 LOCAL	Int32_t	substcnt[NPAT];	/* Subst. count or MAXINT32 for 'g', < 0: 'v' */
74 LOCAL	char	isreg[NPAT];	/* Whether we use sed(1) or change(1) style */
75 
76 extern	FILE	*tty;
77 extern	FILE	*vpr;
78 extern	int	verbose;
79 extern	BOOL	xflag;
80 extern	BOOL	nflag;
81 extern	BOOL	debug;
82 extern	BOOL	paxinteract;
83 
84 /*
85  * This is the command line parser for tar/pax substitution commands.
86  * Syntax is: -s '/old/new/v'
87  * Supporting sed(1) like substitutions.
88  */
89 EXPORT int
paxpsubst(cmd,arg)90 paxpsubst(cmd, arg)
91 	char	*cmd;		/* The subst command string		*/
92 	BOOL	*arg;		/* Set to TRUE if we have a valid stubst */
93 {
94 #ifdef	HAVE_REGEX_H
95 	return (_parsesubst(cmd, arg, TRUE));
96 #else
97 	return (_parsesubst(cmd, arg, FALSE));
98 #endif
99 }
100 
101 /*
102  * This is the command line parser for tar/pax substitution commands.
103  * Syntax is: -s '/old/new/v'
104  * Supporting change(1) like substitutions.
105  */
106 EXPORT int
parsesubst(cmd,arg)107 parsesubst(cmd, arg)
108 	char	*cmd;		/* The subst command string		*/
109 	BOOL	*arg;		/* Set to TRUE if we have a valid stubst */
110 {
111 	return (_parsesubst(cmd, arg, FALSE));
112 }
113 
114 /*
115  * This is the command line parser for tar/pax substitution commands.
116  * Syntax is: -s '/old/new/v'
117  * Supporting both variants of the substitutions.
118  */
119 LOCAL int
_parsesubst(cmd,arg,paxmode)120 _parsesubst(cmd, arg, paxmode)
121 	char	*cmd;		/* The subst command string		*/
122 	BOOL	*arg;		/* Set to TRUE if we have a valid stubst */
123 	BOOL	paxmode;	/* Whether to use sed(1) instead of change(1) */
124 {
125 	register char	*from;
126 	register char	*to;
127 	register char	*cp;
128 	register char	*endp;
129 	register char	c = '/';
130 	register char	dc;		/* Delimiting character */
131 		long	fromlen;
132 		long	tolen;
133 		int	cmdlen;
134 		char	*subopts = NULL;
135 		BOOL	printsubst = FALSE;
136 		Int32_t	count = 1;
137 
138 	if (debug) {
139 		error("Add subst pattern: '%s'.\n", cmd);
140 	}
141 
142 	cmdlen = strlen(cmd);
143 	from = cmd;
144 	endp = &cmd[cmdlen];
145 
146 	dc = c = *from;
147 	to = ++from;
148 	while (to < endp) {
149 		c = *to;
150 		if (c == '\\')
151 			to += 2;
152 		else if (c != dc)
153 			to++;
154 		else
155 			break;
156 	}
157 	if (to >= endp || c != dc)
158 		comerrno(EX_BAD, "Missing '%c' delimiter after 'from' substitute string.\n", dc);
159 
160 	fromlen = to-from;
161 	*to++ = '\0';
162 	cp = to;
163 	while (cp < endp) {
164 		c = *cp;
165 		if (c == '\\')
166 			cp += 2;
167 		else if (c != dc)
168 			cp++;
169 		else
170 			break;
171 	}
172 	if (to >= endp || c != dc)
173 		comerrno(EX_BAD, "Missing '%c' delimiter after 'to' substitute string.\n", dc);
174 
175 	tolen = cp-to;
176 	*cp = '\0';
177 	if (++cp < endp)
178 		subopts = cp;
179 
180 	while (cp < endp) {
181 		c = *cp++;
182 		if (c == 'p') {
183 			printsubst = TRUE;
184 		} else if (c == 'g') {
185 			count = MAXINT32;
186 		} else {
187 			comerrno(EX_BAD, "Bad substitute option '%c'.\n", c);
188 		}
189 	}
190 
191 	if (debug) {
192 		error("Resulting subst:   '%s'%s'(%ld,%ld) opts '%s' simpleto: %d\n",
193 			from, to, fromlen, tolen,
194 			subopts, simpleto(to, tolen));
195 	}
196 
197 	if (npat >= NPAT)
198 		comerrno(EX_BAD, "Too many substitute patterns (max is %d).\n", NPAT);
199 
200 	pat[npat] = (Uchar *)___savestr(from);
201 	patlen[npat] = fromlen;
202 	substpat[npat] = ___savestr(to);
203 	substlen[npat] = tolen;
204 
205 
206 	if (fromlen > maxplen)
207 		maxplen = fromlen;
208 
209 	if (paxmode) {
210 #ifdef	HAVE_REGEX_H
211 		int	ret;
212 
213 		aux[npat] = ___malloc(sizeof (regex_t),
214 					"compiled subst pattern");
215 		ret = regcomp((regex_t *) aux[npat], (char *)pat[npat], 0);
216 		if (ret != 0) {
217 			char	eb[1024];
218 
219 			regerror(ret, (regex_t *) aux[npat], eb, sizeof (eb));
220 			comerrno(EX_BAD, "Bad pattern: '%s'. %s\n",
221 				pat[npat], eb);
222 			return (-2);
223 		}
224 #endif
225 	} else {
226 		aux[npat] = ___malloc(fromlen*sizeof (int),
227 					"compiled subst pattern");
228 		if ((alt[npat] = patcompile(pat[npat], patlen[npat],
229 						aux[npat])) == 0) {
230 			comerrno(EX_BAD, "Bad pattern: '%s'.\n", pat[npat]);
231 			return (-2);
232 		}
233 	}
234 	isreg[npat] = paxmode;
235 
236 	if (printsubst)
237 		count *= -1;
238 	substcnt[npat] = count;
239 	*arg = TRUE;
240 	npat++;
241 	return (1);
242 }
243 
244 
245 EXPORT BOOL
subst(info)246 subst(info)
247 	FINFO	*info;
248 {
249 	char	*to = NULL;
250 	register int	i;
251 
252 	if (!state) {
253 		state = ___malloc((maxplen+1)*sizeof (int), "pattern state");
254 	}
255 
256 	info->f_namelen = strlen(info->f_name);
257 	/*
258 	 * Loop over all match & Subst Patterns.
259 	 * Stop after the first match has been seen.
260 	 */
261 	for (i = 0; i < npat; i++) {
262 		to = substitute(info->f_name, info->f_namelen, i, substpat[i], substlen[i]);
263 		if (to)
264 			break;
265 	}
266 	if (to) {
267 		if (substcnt[i] < 0)
268 			error("%s >> %s\n", info->f_name, to);
269 		info->f_namelen = strlen(to);
270 		info->f_name = to;
271 		return (TRUE);
272 	}
273 
274 	return (FALSE);
275 }
276 
277 
278 LOCAL	pathstore_t	new;
279 /*
280  * This is the 'real' substitution routine.
281  * It gets called with pre-parsed strings.
282  *
283  * Returns NULL on no-match and on error.
284  */
285 LOCAL char *
substitute(from,fromlen,idx,to,tolen)286 substitute(from, fromlen, idx, to, tolen)
287 	char	*from;			/* The original string to modify */
288 	long	fromlen;		/* strlen(from)			*/
289 	int	idx;			/* The index in the pat[] array	*/
290 	char	*to;			/* The substitution		*/
291 	long	tolen;			/* strlen(to)			*/
292 {
293 	char	old[PATH_MAX+1];
294 	char	*oldp = old;
295 	long	oldlen = 0;
296 	BOOL	tosimple;
297 	Int32_t n = substcnt[idx];
298 	char	*end;
299 	char	*string;
300 	size_t	soff;
301 	int	slen;
302 	BOOL	didmatch = FALSE;
303 	BOOL	paxmode;
304 #ifdef	HAVE_REGEX_H
305 	regmatch_t	mat[10];
306 	regmatch_t	*matp;
307 	regex_t		*re = (regex_t *) aux[idx];
308 #endif
309 #define	limit	(new.ps_path + new.ps_size)
310 
311 	if (fromlen == 0)
312 		return (NULL);
313 	if (new.ps_size == 0 && init_pspace(PS_EXIT, &new) < 0)
314 		return (NULL);
315 
316 	paxmode = isreg[idx];
317 
318 	tosimple = simpleto(to, tolen);
319 
320 	string = from;
321 	slen = strlen(string);
322 	end = string;
323 	/*
324 	 * We simply ignore the 'p'rint statement here as the printing happens
325 	 * in the subst() function.
326 	 */
327 	if (n < 0)
328 		n *= -1;
329 	while (n-- > 0) {
330 
331 		/*
332 		 * Search the next occurence of the pattern in the 'from' string.
333 		 */
334 		while (*string != '\0') {
335 			/*
336 			 * Loop over the from string for a possible match
337 			 */
338 #ifdef	HAVE_REGEX_H
339 			matp = NULL;
340 			if (paxmode) {
341 				if (regexec(re, string, 10, mat, 0) != 0) {
342 					string++;
343 					slen--;
344 					continue;
345 				}
346 				end = string + mat[0].rm_eo;
347 				matp = mat;
348 			} else
349 #endif
350 			if ((end = (char *)patmatch(pat[idx], aux[idx],
351 			    (Uchar *)string, 0, slen, alt[idx],
352 			    state)) == NULL) {
353 
354 				string++;
355 				slen--;
356 				continue;
357 			}
358 
359 			if (!didmatch) {
360 				/*
361 				 * We had a first match. Copy the 'from' string
362 				 * into our result storage.
363 				 */
364 				didmatch = TRUE;
365 				strcpy_pspace(PS_EXIT, &new, from);
366 
367 				/*
368 				 * Let 'string' and 'end' have the same offset
369 				 * in 'new' as they had in 'from' before.
370 				 */
371 				string = new.ps_path + (string - from);
372 				end = new.ps_path + (end - from);
373 
374 				if (!tosimple) {
375 					/*
376 					 * We need to remember the old 'from'
377 					 * string before, since the replacement
378 					 * refers to the old 'from' string.
379 					 */
380 					oldlen = end - string;
381 					if (strlcpy(old, string, oldlen+1) >=
382 									oldlen) {
383 						oldp = strndup(string, oldlen);
384 						if (oldp == NULL) {
385 							s_enomem();
386 							return (NULL);
387 						}
388 					} else {
389 						oldp = old;
390 					}
391 				}
392 
393 			}
394 			break;
395 		}
396 		if (*string == '\0')
397 			break;
398 
399 		/*
400 		 * Now delete the old string in the buffer
401 		 * and insert substitution
402 		 */
403 		if (tosimple) {
404 			char	xold[PATH_MAX+1];
405 			char	*xoldp;
406 
407 			/*
408 			 * Remember the old string after the matching part.
409 			 */
410 			if (strlcpy(xold, end, sizeof (xold)) >=
411 							sizeof (xold)) {
412 				xoldp = strdup(end);
413 				if (xoldp == NULL) {
414 					s_enomem();
415 					return (NULL);
416 				}
417 			} else {
418 				xoldp = xold;
419 			}
420 
421 
422 			if ((string+tolen) >= limit) {
423 				soff = string - new.ps_path;
424 				if (incr_pspace(PS_STDERR, &new,
425 					    1 + (string+tolen) - limit) < 0) {
426 					s_enomem();
427 					if (xoldp != xold)
428 						free(xoldp);
429 					goto over;
430 				}
431 				string = new.ps_path + soff;
432 			}
433 			strlcpy((char *)string, (char *)to, tolen+1);	/* insert */
434 
435 			/*
436 			 * Append non-maching old tail.
437 			 */
438 			if ((&string[tolen] + strlen(xoldp)) >= limit) {
439 				soff = string - new.ps_path;
440 				if (incr_pspace(PS_STDERR, &new,
441 					    1 + (string+tolen) - limit) < 0) {
442 					s_enomem();
443 					if (xoldp != xold)
444 						free(xoldp);
445 					goto over;
446 				}
447 				string = new.ps_path + soff;
448 			}
449 			strcpy((char *)&string[tolen], xoldp);
450 			if (xoldp != xold)
451 				free(xoldp);
452 		} else {
453 			soff = string - new.ps_path;
454 #ifdef	HAVE_REGEX_H
455 			tolen = catsub(string, oldp, oldlen, to, tolen, limit,
456 					matp);
457 #else
458 			tolen = catsub(string, oldp, oldlen, to, tolen, limit);
459 #endif
460 			string = new.ps_path + soff;
461 			if (oldp != old)
462 				free(oldp);
463 			if (tolen < 0) {
464 				if (new.ps_path)
465 					new.ps_path[0] = '\0';
466 				return (new.ps_path);
467 			}
468 		}
469 		string = &string[tolen];
470 		slen = strlen(string);
471 	}
472 	if (didmatch)
473 		return (new.ps_path);
474 	return (NULL);
475 over:
476 	errmsgno(EX_BAD, "Substitution path overflow.\n");
477 	if (new.ps_path)
478 		new.ps_path[0] = '\0';
479 	return (new.ps_path);
480 }
481 #undef	limit
482 
483 /*
484  * Check is this is a 'simple' 'to'-substitution string
485  * that does not require to be expanded via 'catsub()'.
486  */
487 LOCAL BOOL
simpleto(s,len)488 simpleto(s, len)
489 	register char	*s;
490 	register long	len;
491 {
492 	register char	c;
493 
494 	if (len <= 0)
495 		return (TRUE);
496 	while (--len >= 0) {
497 		c = *s++;
498 		if (c == '\\' || c == '&')
499 			return (FALSE);
500 	}
501 	return (TRUE);
502 }
503 
504 /*
505  * Insert the substitution string.
506  * The '&' character in the to string is substituted with the old from string.
507  */
508 LOCAL int
509 #ifdef	HAVE_REGEX_H
catsub(here,old,oldlen,to,tolen,limit,mat)510 catsub(here, old, oldlen, to, tolen, limit, mat)
511 #else
512 catsub(here, old, oldlen, to, tolen, limit)
513 #endif
514 	register char	*here;
515 	register char	*old;
516 	register long	oldlen;
517 	register char	*to;
518 	register long	tolen;
519 	register char	*limit;
520 #ifdef	HAVE_REGEX_H
521 	regmatch_t	*mat;
522 #endif
523 {
524 	char	xold[PATH_MAX+1];
525 	char	*xoldp;
526 	char	*p = here;
527 	size_t	len;
528 	size_t	hoff;
529 
530 	if (tolen <= 0)
531 		return (0);
532 
533 	/*
534 	 * Remember the old string after the matching part.
535 	 */
536 	if (strlcpy(xold, &here[oldlen], sizeof (xold)) >= sizeof (xold)) {
537 		xoldp = strdup(&here[oldlen]);
538 		if (xoldp == NULL) {
539 			s_enomem();
540 			return (-1);
541 		}
542 	} else {
543 		xoldp = xold;
544 	}
545 
546 	while (--tolen >= 0) {
547 		if (here >= limit) {
548 			hoff = here - new.ps_path;
549 			if (incr_pspace(PS_STDERR, &new,
550 				    1 + here - limit) < 0) {
551 				s_enomem();
552 				goto over;
553 			}
554 			here = new.ps_path + hoff;
555 		}
556 #ifdef	HAVE_REGEX_H
557 		if (*to == '\\' && mat && to[1] >= '1' && to[1] <= '9') {
558 			int	i = to[1] - '0';
559 			size_t	olen;
560 
561 			to += 2;
562 			tolen--;
563 			if (mat[i].rm_so == -1)
564 				continue;
565 
566 			olen = mat[i].rm_eo - mat[i].rm_so;
567 			if ((here+olen) >= limit) {
568 				hoff = here - new.ps_path;
569 				if (incr_pspace(PS_STDERR, &new,
570 					    1 + (here+olen) - limit) < 0) {
571 					s_enomem();
572 					goto over;
573 				}
574 				here = new.ps_path + hoff;
575 			}
576 			strlcpy(here, old+mat[i].rm_so, olen+1);
577 			here += olen;
578 			continue;
579 		} else
580 #endif
581 		if (*to == '\\') {
582 			if (--tolen >= 0)
583 				*here++ = *++to;
584 		} else if (*to == '&') {
585 			if ((here+oldlen) >= limit) {
586 				hoff = here - new.ps_path;
587 				if (incr_pspace(PS_STDERR, &new,
588 					    1 + (here+oldlen) - limit) < 0) {
589 					s_enomem();
590 					goto over;
591 				}
592 				here = new.ps_path + hoff;
593 			}
594 			strlcpy(here, old, oldlen+1);
595 			here += oldlen;
596 		} else {
597 			*here++ = *to;
598 		}
599 		to++;
600 	}
601 	len = strlen(xoldp);
602 	if ((here+len) >= limit) {
603 		hoff = here - new.ps_path;
604 		if (incr_pspace(PS_STDERR, &new,
605 			    1 + (here+len) - limit) < 0) {
606 			s_enomem();
607 			goto over;
608 		}
609 		here = new.ps_path + hoff;
610 	}
611 	strcpy(here, xoldp);
612 	if (xoldp != xold)
613 		free(xoldp);
614 	return (here - p);
615 over:
616 	errmsgno(EX_BAD, "Substitution path overflow.\n");
617 	if (xoldp != xold)
618 		free(xoldp);
619 	return (-1);
620 }
621 
622 /* ARGSUSED */
623 EXPORT BOOL
ia_change(ptb,info)624 ia_change(ptb, info)
625 	TCB	*ptb;
626 	FINFO	*info;
627 {
628 	FINFO	cinfo;
629 	char	ans;
630 	char	abuf[3];
631 	int	len;
632 
633 	if (paxinteract)
634 		return (pax_change(ptb, info));
635 
636 	if (verbose)
637 		list_file(info);
638 	else
639 		vprint(info);
640 	if (nflag)
641 		return (FALSE);
642 	fgtprintf(vpr, "get/put ? Y(es)/N(o)/C(hange name) :"); fflush(vpr);
643 	abuf[0] = '\0';
644 	len = fgetstr(tty, abuf, sizeof (abuf));
645 	if (len > 0 && abuf[len-1] != '\n') {
646 		while (getc(tty) != '\n') {
647 			if (feof(tty) || ferror(tty))
648 				break;
649 		}
650 	}
651 	if ((ans = toupper(abuf[0])) == 'Y')
652 		return (TRUE);
653 	else if (ans == 'C') {
654 		for (;;) {
655 			fgtprintf(vpr, "Enter new name:");
656 			fflush(vpr);
657 			len = fpgetstr(tty, &new);
658 			if (len < 0)
659 				comexit(-2);
660 			else if (len > 0)
661 				break;
662 		}
663 		info->f_name = new.ps_path;
664 		if (xflag) {
665 			if (newer(info, &cinfo))
666 				return (FALSE);
667 			if (is_symlink(info) && same_symlink(info))
668 				return (FALSE);
669 		}
670 		return (TRUE);
671 	}
672 	return (FALSE);
673 }
674 
675 /* ARGSUSED */
676 LOCAL BOOL
pax_change(ptb,info)677 pax_change(ptb, info)
678 	TCB	*ptb;
679 	FINFO	*info;
680 {
681 	FINFO	cinfo;
682 	int	len;
683 
684 	if (verbose)
685 		list_file(info);
686 	else
687 		vprint(info);
688 	if (nflag)
689 		return (FALSE);
690 
691 	for (;;) {
692 		fgtprintf(vpr, "%s change?", info->f_name);
693 		fflush(vpr);
694 		len = fpgetstr(tty, &new);
695 		if (len < 0)
696 			comexit(-2);
697 		else
698 			break;
699 	}
700 	if (new.ps_path[0] == '\0')		/* Skip file */
701 		return (FALSE);
702 	if (new.ps_path[0] == '.' &&
703 	    new.ps_path[1] == '\0')		/* Leave name as is */
704 		return (TRUE);
705 
706 	info->f_name = new.ps_path;
707 	if (xflag && newer(info, &cinfo))
708 		return (FALSE);
709 	return (TRUE);
710 }
711 
712 LOCAL void
s_enomem()713 s_enomem()
714 {
715 	errmsgno(EX_BAD, "No memory for substitution.\n");
716 	xstats.s_substerrs++;
717 }
718 
719 /*
720  * Read a line of unspecified and arbitrary length from FILE *
721  * and place the result in a pathstore_t object.
722  */
723 EXPORT int
fpgetstr(f,p)724 fpgetstr(f, p)
725 	register	FILE	*f;
726 		pathstore_t	*p;
727 {
728 	int	ret = getdelim(&p->ps_path, &p->ps_size, '\n', f);
729 
730 	if (ret <= 0)
731 		return (ret);
732 
733 	if (p->ps_path[ret-1] == '\n')
734 		p->ps_path[--ret] = '\0';
735 	return (ret);
736 }
737