1 /* @(#)subst.c 1.27 20/06/11 Copyright 1986,2003-2020 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)subst.c 1.27 20/06/11 Copyright 1986,2003-2020 J. Schilling";
6 #endif
7 /*
8 * Substitution commands
9 *
10 * Copyright (c) 1986,2003-2020 J. Schilling
11 */
12 /*
13 * The contents of this file are subject to the terms of the
14 * Common Development and Distribution License, Version 1.0 only
15 * (the "License"). You may not use this file except in compliance
16 * with the License.
17 *
18 * See the file CDDL.Schily.txt in this distribution for details.
19 * A copy of the CDDL is also available via the Internet at
20 * http://www.opensource.org/licenses/cddl1.txt
21 *
22 * When distributing Covered Code, include this CDDL HEADER in each
23 * file and include the License file CDDL.Schily.txt from this distribution.
24 */
25
26 #include <schily/stdio.h>
27 #include <schily/stdlib.h>
28 #include <schily/standard.h>
29 #include <schily/patmatch.h>
30 #include <schily/string.h>
31 #include <schily/utypes.h>
32 #define GT_COMERR /* #define comerr gtcomerr */
33 #define GT_ERROR /* #define error gterror */
34 #include <schily/schily.h>
35
36 #include <schily/patmatch.h>
37 #ifdef HAVE_REGEX_H
38 #include <regex.h>
39 #endif
40
41 #include "star.h"
42 #include "starsubs.h"
43 #include "pathname.h"
44
45 EXPORT int paxpsubst __PR((char *cmd, BOOL *arg));
46 EXPORT int parsesubst __PR((char *cmd, BOOL *arg));
47 LOCAL int _parsesubst __PR((char *cmd, BOOL *arg, BOOL paxmode));
48 EXPORT BOOL subst __PR((FINFO *info));
49 LOCAL char *substitute __PR((char *from, long fromlen, int idx, char *to, long tolen));
50 LOCAL BOOL simpleto __PR((char *s, long len));
51 #ifdef HAVE_REGEX_H
52 LOCAL int catsub __PR((char *here, char *old, long oldlen,
53 char *to, long tolen, char *limit, regmatch_t *));
54 #else
55 LOCAL int catsub __PR((char *here, char *old, long oldlen,
56 char *to, long tolen, char *limit));
57 #endif
58 EXPORT BOOL ia_change __PR((TCB *ptb, FINFO *info));
59 LOCAL BOOL pax_change __PR((TCB *ptb, FINFO *info));
60 LOCAL void s_enomem __PR((void));
61 EXPORT int fpgetstr __PR((FILE *, pathstore_t *));
62
63 #define NPAT 100
64 LOCAL int npat; /* Number of defined patterns */
65 LOCAL Uchar *pat[NPAT]; /* Saved list of defined 'from' patterns */
66 LOCAL int patlen[NPAT]; /* Length of the 'from' pattern */
67 LOCAL int maxplen; /* Maximum length of 'from' pattern */
68 LOCAL char *substpat[NPAT]; /* Saved list of defined 'to' patterns */
69 LOCAL int substlen[NPAT]; /* Length of the 'to' pattern */
70 LOCAL int *aux[NPAT]; /* Aux array (compiled pattern) */
71 LOCAL int alt[NPAT]; /* List of results from patcompile() */
72 LOCAL int *state; /* State array used by patmatch() */
73 LOCAL Int32_t substcnt[NPAT]; /* Subst. count or MAXINT32 for 'g', < 0: 'v' */
74 LOCAL char isreg[NPAT]; /* Whether we use sed(1) or change(1) style */
75
76 extern FILE *tty;
77 extern FILE *vpr;
78 extern int verbose;
79 extern BOOL xflag;
80 extern BOOL nflag;
81 extern BOOL debug;
82 extern BOOL paxinteract;
83
84 /*
85 * This is the command line parser for tar/pax substitution commands.
86 * Syntax is: -s '/old/new/v'
87 * Supporting sed(1) like substitutions.
88 */
89 EXPORT int
paxpsubst(cmd,arg)90 paxpsubst(cmd, arg)
91 char *cmd; /* The subst command string */
92 BOOL *arg; /* Set to TRUE if we have a valid stubst */
93 {
94 #ifdef HAVE_REGEX_H
95 return (_parsesubst(cmd, arg, TRUE));
96 #else
97 return (_parsesubst(cmd, arg, FALSE));
98 #endif
99 }
100
101 /*
102 * This is the command line parser for tar/pax substitution commands.
103 * Syntax is: -s '/old/new/v'
104 * Supporting change(1) like substitutions.
105 */
106 EXPORT int
parsesubst(cmd,arg)107 parsesubst(cmd, arg)
108 char *cmd; /* The subst command string */
109 BOOL *arg; /* Set to TRUE if we have a valid stubst */
110 {
111 return (_parsesubst(cmd, arg, FALSE));
112 }
113
114 /*
115 * This is the command line parser for tar/pax substitution commands.
116 * Syntax is: -s '/old/new/v'
117 * Supporting both variants of the substitutions.
118 */
119 LOCAL int
_parsesubst(cmd,arg,paxmode)120 _parsesubst(cmd, arg, paxmode)
121 char *cmd; /* The subst command string */
122 BOOL *arg; /* Set to TRUE if we have a valid stubst */
123 BOOL paxmode; /* Whether to use sed(1) instead of change(1) */
124 {
125 register char *from;
126 register char *to;
127 register char *cp;
128 register char *endp;
129 register char c = '/';
130 register char dc; /* Delimiting character */
131 long fromlen;
132 long tolen;
133 int cmdlen;
134 char *subopts = NULL;
135 BOOL printsubst = FALSE;
136 Int32_t count = 1;
137
138 if (debug) {
139 error("Add subst pattern: '%s'.\n", cmd);
140 }
141
142 cmdlen = strlen(cmd);
143 from = cmd;
144 endp = &cmd[cmdlen];
145
146 dc = c = *from;
147 to = ++from;
148 while (to < endp) {
149 c = *to;
150 if (c == '\\')
151 to += 2;
152 else if (c != dc)
153 to++;
154 else
155 break;
156 }
157 if (to >= endp || c != dc)
158 comerrno(EX_BAD, "Missing '%c' delimiter after 'from' substitute string.\n", dc);
159
160 fromlen = to-from;
161 *to++ = '\0';
162 cp = to;
163 while (cp < endp) {
164 c = *cp;
165 if (c == '\\')
166 cp += 2;
167 else if (c != dc)
168 cp++;
169 else
170 break;
171 }
172 if (to >= endp || c != dc)
173 comerrno(EX_BAD, "Missing '%c' delimiter after 'to' substitute string.\n", dc);
174
175 tolen = cp-to;
176 *cp = '\0';
177 if (++cp < endp)
178 subopts = cp;
179
180 while (cp < endp) {
181 c = *cp++;
182 if (c == 'p') {
183 printsubst = TRUE;
184 } else if (c == 'g') {
185 count = MAXINT32;
186 } else {
187 comerrno(EX_BAD, "Bad substitute option '%c'.\n", c);
188 }
189 }
190
191 if (debug) {
192 error("Resulting subst: '%s'%s'(%ld,%ld) opts '%s' simpleto: %d\n",
193 from, to, fromlen, tolen,
194 subopts, simpleto(to, tolen));
195 }
196
197 if (npat >= NPAT)
198 comerrno(EX_BAD, "Too many substitute patterns (max is %d).\n", NPAT);
199
200 pat[npat] = (Uchar *)___savestr(from);
201 patlen[npat] = fromlen;
202 substpat[npat] = ___savestr(to);
203 substlen[npat] = tolen;
204
205
206 if (fromlen > maxplen)
207 maxplen = fromlen;
208
209 if (paxmode) {
210 #ifdef HAVE_REGEX_H
211 int ret;
212
213 aux[npat] = ___malloc(sizeof (regex_t),
214 "compiled subst pattern");
215 ret = regcomp((regex_t *) aux[npat], (char *)pat[npat], 0);
216 if (ret != 0) {
217 char eb[1024];
218
219 regerror(ret, (regex_t *) aux[npat], eb, sizeof (eb));
220 comerrno(EX_BAD, "Bad pattern: '%s'. %s\n",
221 pat[npat], eb);
222 return (-2);
223 }
224 #endif
225 } else {
226 aux[npat] = ___malloc(fromlen*sizeof (int),
227 "compiled subst pattern");
228 if ((alt[npat] = patcompile(pat[npat], patlen[npat],
229 aux[npat])) == 0) {
230 comerrno(EX_BAD, "Bad pattern: '%s'.\n", pat[npat]);
231 return (-2);
232 }
233 }
234 isreg[npat] = paxmode;
235
236 if (printsubst)
237 count *= -1;
238 substcnt[npat] = count;
239 *arg = TRUE;
240 npat++;
241 return (1);
242 }
243
244
245 EXPORT BOOL
subst(info)246 subst(info)
247 FINFO *info;
248 {
249 char *to = NULL;
250 register int i;
251
252 if (!state) {
253 state = ___malloc((maxplen+1)*sizeof (int), "pattern state");
254 }
255
256 info->f_namelen = strlen(info->f_name);
257 /*
258 * Loop over all match & Subst Patterns.
259 * Stop after the first match has been seen.
260 */
261 for (i = 0; i < npat; i++) {
262 to = substitute(info->f_name, info->f_namelen, i, substpat[i], substlen[i]);
263 if (to)
264 break;
265 }
266 if (to) {
267 if (substcnt[i] < 0)
268 error("%s >> %s\n", info->f_name, to);
269 info->f_namelen = strlen(to);
270 info->f_name = to;
271 return (TRUE);
272 }
273
274 return (FALSE);
275 }
276
277
278 LOCAL pathstore_t new;
279 /*
280 * This is the 'real' substitution routine.
281 * It gets called with pre-parsed strings.
282 *
283 * Returns NULL on no-match and on error.
284 */
285 LOCAL char *
substitute(from,fromlen,idx,to,tolen)286 substitute(from, fromlen, idx, to, tolen)
287 char *from; /* The original string to modify */
288 long fromlen; /* strlen(from) */
289 int idx; /* The index in the pat[] array */
290 char *to; /* The substitution */
291 long tolen; /* strlen(to) */
292 {
293 char old[PATH_MAX+1];
294 char *oldp = old;
295 long oldlen = 0;
296 BOOL tosimple;
297 Int32_t n = substcnt[idx];
298 char *end;
299 char *string;
300 size_t soff;
301 int slen;
302 BOOL didmatch = FALSE;
303 BOOL paxmode;
304 #ifdef HAVE_REGEX_H
305 regmatch_t mat[10];
306 regmatch_t *matp;
307 regex_t *re = (regex_t *) aux[idx];
308 #endif
309 #define limit (new.ps_path + new.ps_size)
310
311 if (fromlen == 0)
312 return (NULL);
313 if (new.ps_size == 0 && init_pspace(PS_EXIT, &new) < 0)
314 return (NULL);
315
316 paxmode = isreg[idx];
317
318 tosimple = simpleto(to, tolen);
319
320 string = from;
321 slen = strlen(string);
322 end = string;
323 /*
324 * We simply ignore the 'p'rint statement here as the printing happens
325 * in the subst() function.
326 */
327 if (n < 0)
328 n *= -1;
329 while (n-- > 0) {
330
331 /*
332 * Search the next occurence of the pattern in the 'from' string.
333 */
334 while (*string != '\0') {
335 /*
336 * Loop over the from string for a possible match
337 */
338 #ifdef HAVE_REGEX_H
339 matp = NULL;
340 if (paxmode) {
341 if (regexec(re, string, 10, mat, 0) != 0) {
342 string++;
343 slen--;
344 continue;
345 }
346 end = string + mat[0].rm_eo;
347 matp = mat;
348 } else
349 #endif
350 if ((end = (char *)patmatch(pat[idx], aux[idx],
351 (Uchar *)string, 0, slen, alt[idx],
352 state)) == NULL) {
353
354 string++;
355 slen--;
356 continue;
357 }
358
359 if (!didmatch) {
360 /*
361 * We had a first match. Copy the 'from' string
362 * into our result storage.
363 */
364 didmatch = TRUE;
365 strcpy_pspace(PS_EXIT, &new, from);
366
367 /*
368 * Let 'string' and 'end' have the same offset
369 * in 'new' as they had in 'from' before.
370 */
371 string = new.ps_path + (string - from);
372 end = new.ps_path + (end - from);
373
374 if (!tosimple) {
375 /*
376 * We need to remember the old 'from'
377 * string before, since the replacement
378 * refers to the old 'from' string.
379 */
380 oldlen = end - string;
381 if (strlcpy(old, string, oldlen+1) >=
382 oldlen) {
383 oldp = strndup(string, oldlen);
384 if (oldp == NULL) {
385 s_enomem();
386 return (NULL);
387 }
388 } else {
389 oldp = old;
390 }
391 }
392
393 }
394 break;
395 }
396 if (*string == '\0')
397 break;
398
399 /*
400 * Now delete the old string in the buffer
401 * and insert substitution
402 */
403 if (tosimple) {
404 char xold[PATH_MAX+1];
405 char *xoldp;
406
407 /*
408 * Remember the old string after the matching part.
409 */
410 if (strlcpy(xold, end, sizeof (xold)) >=
411 sizeof (xold)) {
412 xoldp = strdup(end);
413 if (xoldp == NULL) {
414 s_enomem();
415 return (NULL);
416 }
417 } else {
418 xoldp = xold;
419 }
420
421
422 if ((string+tolen) >= limit) {
423 soff = string - new.ps_path;
424 if (incr_pspace(PS_STDERR, &new,
425 1 + (string+tolen) - limit) < 0) {
426 s_enomem();
427 if (xoldp != xold)
428 free(xoldp);
429 goto over;
430 }
431 string = new.ps_path + soff;
432 }
433 strlcpy((char *)string, (char *)to, tolen+1); /* insert */
434
435 /*
436 * Append non-maching old tail.
437 */
438 if ((&string[tolen] + strlen(xoldp)) >= limit) {
439 soff = string - new.ps_path;
440 if (incr_pspace(PS_STDERR, &new,
441 1 + (string+tolen) - limit) < 0) {
442 s_enomem();
443 if (xoldp != xold)
444 free(xoldp);
445 goto over;
446 }
447 string = new.ps_path + soff;
448 }
449 strcpy((char *)&string[tolen], xoldp);
450 if (xoldp != xold)
451 free(xoldp);
452 } else {
453 soff = string - new.ps_path;
454 #ifdef HAVE_REGEX_H
455 tolen = catsub(string, oldp, oldlen, to, tolen, limit,
456 matp);
457 #else
458 tolen = catsub(string, oldp, oldlen, to, tolen, limit);
459 #endif
460 string = new.ps_path + soff;
461 if (oldp != old)
462 free(oldp);
463 if (tolen < 0) {
464 if (new.ps_path)
465 new.ps_path[0] = '\0';
466 return (new.ps_path);
467 }
468 }
469 string = &string[tolen];
470 slen = strlen(string);
471 }
472 if (didmatch)
473 return (new.ps_path);
474 return (NULL);
475 over:
476 errmsgno(EX_BAD, "Substitution path overflow.\n");
477 if (new.ps_path)
478 new.ps_path[0] = '\0';
479 return (new.ps_path);
480 }
481 #undef limit
482
483 /*
484 * Check is this is a 'simple' 'to'-substitution string
485 * that does not require to be expanded via 'catsub()'.
486 */
487 LOCAL BOOL
simpleto(s,len)488 simpleto(s, len)
489 register char *s;
490 register long len;
491 {
492 register char c;
493
494 if (len <= 0)
495 return (TRUE);
496 while (--len >= 0) {
497 c = *s++;
498 if (c == '\\' || c == '&')
499 return (FALSE);
500 }
501 return (TRUE);
502 }
503
504 /*
505 * Insert the substitution string.
506 * The '&' character in the to string is substituted with the old from string.
507 */
508 LOCAL int
509 #ifdef HAVE_REGEX_H
catsub(here,old,oldlen,to,tolen,limit,mat)510 catsub(here, old, oldlen, to, tolen, limit, mat)
511 #else
512 catsub(here, old, oldlen, to, tolen, limit)
513 #endif
514 register char *here;
515 register char *old;
516 register long oldlen;
517 register char *to;
518 register long tolen;
519 register char *limit;
520 #ifdef HAVE_REGEX_H
521 regmatch_t *mat;
522 #endif
523 {
524 char xold[PATH_MAX+1];
525 char *xoldp;
526 char *p = here;
527 size_t len;
528 size_t hoff;
529
530 if (tolen <= 0)
531 return (0);
532
533 /*
534 * Remember the old string after the matching part.
535 */
536 if (strlcpy(xold, &here[oldlen], sizeof (xold)) >= sizeof (xold)) {
537 xoldp = strdup(&here[oldlen]);
538 if (xoldp == NULL) {
539 s_enomem();
540 return (-1);
541 }
542 } else {
543 xoldp = xold;
544 }
545
546 while (--tolen >= 0) {
547 if (here >= limit) {
548 hoff = here - new.ps_path;
549 if (incr_pspace(PS_STDERR, &new,
550 1 + here - limit) < 0) {
551 s_enomem();
552 goto over;
553 }
554 here = new.ps_path + hoff;
555 }
556 #ifdef HAVE_REGEX_H
557 if (*to == '\\' && mat && to[1] >= '1' && to[1] <= '9') {
558 int i = to[1] - '0';
559 size_t olen;
560
561 to += 2;
562 tolen--;
563 if (mat[i].rm_so == -1)
564 continue;
565
566 olen = mat[i].rm_eo - mat[i].rm_so;
567 if ((here+olen) >= limit) {
568 hoff = here - new.ps_path;
569 if (incr_pspace(PS_STDERR, &new,
570 1 + (here+olen) - limit) < 0) {
571 s_enomem();
572 goto over;
573 }
574 here = new.ps_path + hoff;
575 }
576 strlcpy(here, old+mat[i].rm_so, olen+1);
577 here += olen;
578 continue;
579 } else
580 #endif
581 if (*to == '\\') {
582 if (--tolen >= 0)
583 *here++ = *++to;
584 } else if (*to == '&') {
585 if ((here+oldlen) >= limit) {
586 hoff = here - new.ps_path;
587 if (incr_pspace(PS_STDERR, &new,
588 1 + (here+oldlen) - limit) < 0) {
589 s_enomem();
590 goto over;
591 }
592 here = new.ps_path + hoff;
593 }
594 strlcpy(here, old, oldlen+1);
595 here += oldlen;
596 } else {
597 *here++ = *to;
598 }
599 to++;
600 }
601 len = strlen(xoldp);
602 if ((here+len) >= limit) {
603 hoff = here - new.ps_path;
604 if (incr_pspace(PS_STDERR, &new,
605 1 + (here+len) - limit) < 0) {
606 s_enomem();
607 goto over;
608 }
609 here = new.ps_path + hoff;
610 }
611 strcpy(here, xoldp);
612 if (xoldp != xold)
613 free(xoldp);
614 return (here - p);
615 over:
616 errmsgno(EX_BAD, "Substitution path overflow.\n");
617 if (xoldp != xold)
618 free(xoldp);
619 return (-1);
620 }
621
622 /* ARGSUSED */
623 EXPORT BOOL
ia_change(ptb,info)624 ia_change(ptb, info)
625 TCB *ptb;
626 FINFO *info;
627 {
628 FINFO cinfo;
629 char ans;
630 char abuf[3];
631 int len;
632
633 if (paxinteract)
634 return (pax_change(ptb, info));
635
636 if (verbose)
637 list_file(info);
638 else
639 vprint(info);
640 if (nflag)
641 return (FALSE);
642 fgtprintf(vpr, "get/put ? Y(es)/N(o)/C(hange name) :"); fflush(vpr);
643 abuf[0] = '\0';
644 len = fgetstr(tty, abuf, sizeof (abuf));
645 if (len > 0 && abuf[len-1] != '\n') {
646 while (getc(tty) != '\n') {
647 if (feof(tty) || ferror(tty))
648 break;
649 }
650 }
651 if ((ans = toupper(abuf[0])) == 'Y')
652 return (TRUE);
653 else if (ans == 'C') {
654 for (;;) {
655 fgtprintf(vpr, "Enter new name:");
656 fflush(vpr);
657 len = fpgetstr(tty, &new);
658 if (len < 0)
659 comexit(-2);
660 else if (len > 0)
661 break;
662 }
663 info->f_name = new.ps_path;
664 if (xflag) {
665 if (newer(info, &cinfo))
666 return (FALSE);
667 if (is_symlink(info) && same_symlink(info))
668 return (FALSE);
669 }
670 return (TRUE);
671 }
672 return (FALSE);
673 }
674
675 /* ARGSUSED */
676 LOCAL BOOL
pax_change(ptb,info)677 pax_change(ptb, info)
678 TCB *ptb;
679 FINFO *info;
680 {
681 FINFO cinfo;
682 int len;
683
684 if (verbose)
685 list_file(info);
686 else
687 vprint(info);
688 if (nflag)
689 return (FALSE);
690
691 for (;;) {
692 fgtprintf(vpr, "%s change?", info->f_name);
693 fflush(vpr);
694 len = fpgetstr(tty, &new);
695 if (len < 0)
696 comexit(-2);
697 else
698 break;
699 }
700 if (new.ps_path[0] == '\0') /* Skip file */
701 return (FALSE);
702 if (new.ps_path[0] == '.' &&
703 new.ps_path[1] == '\0') /* Leave name as is */
704 return (TRUE);
705
706 info->f_name = new.ps_path;
707 if (xflag && newer(info, &cinfo))
708 return (FALSE);
709 return (TRUE);
710 }
711
712 LOCAL void
s_enomem()713 s_enomem()
714 {
715 errmsgno(EX_BAD, "No memory for substitution.\n");
716 xstats.s_substerrs++;
717 }
718
719 /*
720 * Read a line of unspecified and arbitrary length from FILE *
721 * and place the result in a pathstore_t object.
722 */
723 EXPORT int
fpgetstr(f,p)724 fpgetstr(f, p)
725 register FILE *f;
726 pathstore_t *p;
727 {
728 int ret = getdelim(&p->ps_path, &p->ps_size, '\n', f);
729
730 if (ret <= 0)
731 return (ret);
732
733 if (p->ps_path[ret-1] == '\n')
734 p->ps_path[--ret] = '\0';
735 return (ret);
736 }
737