1 /* This file is part of GNU tar.
2    Copyright 2006-2021 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify it
5    under the terms of the GNU General Public License as published by the
6    Free Software Foundation; either version 3, or (at your option) any later
7    version.
8 
9    This program is distributed in the hope that it will be useful, but
10    WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
12    Public License for more details.
13 
14    You should have received a copy of the GNU General Public License along
15    with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16 
17 #include <system.h>
18 #include <regex.h>
19 #include "common.h"
20 
21 enum transform_type
22   {
23     transform_first,
24     transform_global
25   };
26 
27 enum replace_segm_type
28   {
29     segm_literal,   /* Literal segment */
30     segm_backref,   /* Back-reference segment */
31     segm_case_ctl   /* Case control segment (GNU extension) */
32   };
33 
34 enum case_ctl_type
35   {
36     ctl_stop,       /* Stop case conversion */
37     ctl_upcase_next,/* Turn the next character to uppercase */
38     ctl_locase_next,/* Turn the next character to lowercase */
39     ctl_upcase,     /* Turn the replacement to uppercase until ctl_stop */
40     ctl_locase      /* Turn the replacement to lowercase until ctl_stop */
41   };
42 
43 struct replace_segm
44 {
45   struct replace_segm *next;
46   enum replace_segm_type type;
47   union
48   {
49     struct
50     {
51       char *ptr;
52       size_t size;
53     } literal;                /* type == segm_literal */
54     size_t ref;               /* type == segm_backref */
55     enum case_ctl_type ctl;   /* type == segm_case_ctl */
56   } v;
57 };
58 
59 struct transform
60 {
61   struct transform *next;
62   enum transform_type transform_type;
63   int flags;
64   unsigned match_number;
65   regex_t regex;
66   /* Compiled replacement expression */
67   struct replace_segm *repl_head, *repl_tail;
68   size_t segm_count; /* Number of elements in the above list */
69 };
70 
71 
72 
73 static int transform_flags = XFORM_ALL;
74 static struct transform *transform_head, *transform_tail;
75 
76 static struct transform *
new_transform(void)77 new_transform (void)
78 {
79   struct transform *p = xzalloc (sizeof *p);
80   if (transform_tail)
81     transform_tail->next = p;
82   else
83     transform_head = p;
84   transform_tail = p;
85   return p;
86 }
87 
88 static struct replace_segm *
add_segment(struct transform * tf)89 add_segment (struct transform *tf)
90 {
91   struct replace_segm *segm = xmalloc (sizeof *segm);
92   segm->next = NULL;
93   if (tf->repl_tail)
94     tf->repl_tail->next = segm;
95   else
96     tf->repl_head = segm;
97   tf->repl_tail = segm;
98   tf->segm_count++;
99   return segm;
100 }
101 
102 static void
add_literal_segment(struct transform * tf,const char * str,const char * end)103 add_literal_segment (struct transform *tf, const char *str, const char *end)
104 {
105   size_t len = end - str;
106   if (len)
107     {
108       struct replace_segm *segm = add_segment (tf);
109       segm->type = segm_literal;
110       segm->v.literal.ptr = xmalloc (len + 1);
111       memcpy (segm->v.literal.ptr, str, len);
112       segm->v.literal.ptr[len] = 0;
113       segm->v.literal.size = len;
114     }
115 }
116 
117 static void
add_char_segment(struct transform * tf,int chr)118 add_char_segment (struct transform *tf, int chr)
119 {
120   struct replace_segm *segm = add_segment (tf);
121   segm->type = segm_literal;
122   segm->v.literal.ptr = xmalloc (2);
123   segm->v.literal.ptr[0] = chr;
124   segm->v.literal.ptr[1] = 0;
125   segm->v.literal.size = 1;
126 }
127 
128 static void
add_backref_segment(struct transform * tf,size_t ref)129 add_backref_segment (struct transform *tf, size_t ref)
130 {
131   struct replace_segm *segm = add_segment (tf);
132   segm->type = segm_backref;
133   segm->v.ref = ref;
134 }
135 
136 static int
parse_xform_flags(int * pflags,int c)137 parse_xform_flags (int *pflags, int c)
138 {
139   switch (c)
140     {
141     case 'r':
142       *pflags |= XFORM_REGFILE;
143       break;
144 
145     case 'R':
146       *pflags &= ~XFORM_REGFILE;
147       break;
148 
149     case 'h':
150       *pflags |= XFORM_LINK;
151       break;
152 
153     case 'H':
154       *pflags &= ~XFORM_LINK;
155       break;
156 
157     case 's':
158       *pflags |= XFORM_SYMLINK;
159       break;
160 
161     case 'S':
162       *pflags &= ~XFORM_SYMLINK;
163       break;
164 
165     default:
166       return 1;
167     }
168   return 0;
169 }
170 
171 static void
add_case_ctl_segment(struct transform * tf,enum case_ctl_type ctl)172 add_case_ctl_segment (struct transform *tf, enum case_ctl_type ctl)
173 {
174   struct replace_segm *segm = add_segment (tf);
175   segm->type = segm_case_ctl;
176   segm->v.ctl = ctl;
177 }
178 
179 static const char *
parse_transform_expr(const char * expr)180 parse_transform_expr (const char *expr)
181 {
182   int delim;
183   int i, j, rc;
184   char *str, *beg, *cur;
185   const char *p;
186   int cflags = 0;
187   struct transform *tf = new_transform ();
188 
189   if (expr[0] != 's')
190     {
191       if (strncmp (expr, "flags=", 6) == 0)
192 	{
193 	  transform_flags = 0;
194 	  for (expr += 6; *expr; expr++)
195 	    {
196 	      if (*expr == ';')
197 		{
198 		  expr++;
199 		  break;
200 		}
201 	      if (parse_xform_flags (&transform_flags, *expr))
202 		USAGE_ERROR ((0, 0, _("Unknown transform flag: %c"),
203 			      *expr));
204 	    }
205 	  return expr;
206 	}
207       USAGE_ERROR ((0, 0, _("Invalid transform expression")));
208     }
209 
210   delim = expr[1];
211   if (!delim)
212     USAGE_ERROR ((0, 0, _("Invalid transform expression")));
213 
214   /* Scan regular expression */
215   for (i = 2; expr[i] && expr[i] != delim; i++)
216     if (expr[i] == '\\' && expr[i+1])
217       i++;
218 
219   if (expr[i] != delim)
220     USAGE_ERROR ((0, 0, _("Invalid transform expression")));
221 
222   /* Scan replacement expression */
223   for (j = i + 1; expr[j] && expr[j] != delim; j++)
224     if (expr[j] == '\\' && expr[j+1])
225       j++;
226 
227   if (expr[j] != delim)
228     USAGE_ERROR ((0, 0, _("Invalid transform expression")));
229 
230   /* Check flags */
231   tf->transform_type = transform_first;
232   tf->flags = transform_flags;
233   for (p = expr + j + 1; *p && *p != ';'; p++)
234     switch (*p)
235       {
236       case 'g':
237 	tf->transform_type = transform_global;
238 	break;
239 
240       case 'i':
241 	cflags |= REG_ICASE;
242 	break;
243 
244       case 'x':
245 	cflags |= REG_EXTENDED;
246 	break;
247 
248       case '0': case '1': case '2': case '3': case '4':
249       case '5': case '6': case '7': case '8': case '9':
250 	tf->match_number = strtoul (p, (char**) &p, 0);
251 	p--;
252 	break;
253 
254       default:
255 	if (parse_xform_flags (&tf->flags, *p))
256 	  USAGE_ERROR ((0, 0, _("Unknown flag in transform expression: %c"),
257 			*p));
258       }
259 
260   if (*p == ';')
261     p++;
262 
263   /* Extract and compile regex */
264   str = xmalloc (i - 1);
265   memcpy (str, expr + 2, i - 2);
266   str[i - 2] = 0;
267 
268   rc = regcomp (&tf->regex, str, cflags);
269 
270   if (rc)
271     {
272       char errbuf[512];
273       regerror (rc, &tf->regex, errbuf, sizeof (errbuf));
274       USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
275     }
276 
277   if (str[0] == '^' || (i > 2 && str[i - 3] == '$'))
278     tf->transform_type = transform_first;
279 
280   free (str);
281 
282   /* Extract and compile replacement expr */
283   i++;
284   str = xmalloc (j - i + 1);
285   memcpy (str, expr + i, j - i);
286   str[j - i] = 0;
287 
288   for (cur = beg = str; *cur;)
289     {
290       if (*cur == '\\')
291 	{
292 	  size_t n;
293 
294 	  add_literal_segment (tf, beg, cur);
295 	  switch (*++cur)
296 	    {
297 	    case '0': case '1': case '2': case '3': case '4':
298 	    case '5': case '6': case '7': case '8': case '9':
299 	      n = strtoul (cur, &cur, 10);
300 	      if (n > tf->regex.re_nsub)
301 		USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
302 	      add_backref_segment (tf, n);
303 	      break;
304 
305 	    case '\\':
306 	      add_char_segment (tf, '\\');
307 	      cur++;
308 	      break;
309 
310 	    case 'a':
311 	      add_char_segment (tf, '\a');
312 	      cur++;
313 	      break;
314 
315 	    case 'b':
316 	      add_char_segment (tf, '\b');
317 	      cur++;
318 	      break;
319 
320 	    case 'f':
321 	      add_char_segment (tf, '\f');
322 	      cur++;
323 	      break;
324 
325 	    case 'n':
326 	      add_char_segment (tf, '\n');
327 	      cur++;
328 	      break;
329 
330 	    case 'r':
331 	      add_char_segment (tf, '\r');
332 	      cur++;
333 	      break;
334 
335 	    case 't':
336 	      add_char_segment (tf, '\t');
337 	      cur++;
338 	      break;
339 
340 	    case 'v':
341 	      add_char_segment (tf, '\v');
342 	      cur++;
343 	      break;
344 
345 	    case '&':
346 	      add_char_segment (tf, '&');
347 	      cur++;
348 	      break;
349 
350 	    case 'L':
351 	      /* Turn the replacement to lowercase until a '\U' or '\E'
352 		 is found, */
353 	      add_case_ctl_segment (tf, ctl_locase);
354 	      cur++;
355 	      break;
356 
357 	    case 'l':
358 	      /* Turn the next character to lowercase, */
359 	      add_case_ctl_segment (tf, ctl_locase_next);
360 	      cur++;
361 	      break;
362 
363 	    case 'U':
364 	      /* Turn the replacement to uppercase until a '\L' or '\E'
365 		 is found, */
366 	      add_case_ctl_segment (tf, ctl_upcase);
367 	      cur++;
368 	      break;
369 
370 	    case 'u':
371 	      /* Turn the next character to uppercase, */
372 	      add_case_ctl_segment (tf, ctl_upcase_next);
373 	      cur++;
374 	      break;
375 
376 	    case 'E':
377 	      /* Stop case conversion started by '\L' or '\U'. */
378 	      add_case_ctl_segment (tf, ctl_stop);
379 	      cur++;
380 	      break;
381 
382 	    default:
383 	      if (*cur == delim)
384 		add_char_segment (tf, delim);
385 	      else
386 		{
387 		  char buf[2];
388 		  buf[0] = '\\';
389 		  buf[1] = *cur;
390 		  add_literal_segment (tf, buf, buf + 2);
391 		}
392 	      cur++;
393 	      break;
394 	    }
395 	  beg = cur;
396 	}
397       else if (*cur == '&')
398 	{
399 	  add_literal_segment (tf, beg, cur);
400 	  add_backref_segment (tf, 0);
401 	  beg = ++cur;
402 	}
403       else
404 	cur++;
405     }
406   add_literal_segment (tf, beg, cur);
407   free(str);
408 
409   return p;
410 }
411 
412 void
set_transform_expr(const char * expr)413 set_transform_expr (const char *expr)
414 {
415   while (*expr)
416     expr = parse_transform_expr (expr);
417 }
418 
419 /* Run case conversion specified by CASE_CTL on array PTR of SIZE
420    characters. Returns pointer to statically allocated storage. */
421 static char *
run_case_conv(enum case_ctl_type case_ctl,char * ptr,size_t size)422 run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
423 {
424   static char *case_ctl_buffer;
425   static size_t case_ctl_bufsize;
426   char *p;
427 
428   if (case_ctl_bufsize < size)
429     {
430       case_ctl_bufsize = size;
431       case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
432     }
433   memcpy (case_ctl_buffer, ptr, size);
434   switch (case_ctl)
435     {
436     case ctl_upcase_next:
437       case_ctl_buffer[0] = toupper ((unsigned char) case_ctl_buffer[0]);
438       break;
439 
440     case ctl_locase_next:
441       case_ctl_buffer[0] = tolower ((unsigned char) case_ctl_buffer[0]);
442       break;
443 
444     case ctl_upcase:
445       for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
446 	*p = toupper ((unsigned char) *p);
447       break;
448 
449     case ctl_locase:
450       for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
451 	*p = tolower ((unsigned char) *p);
452       break;
453 
454     case ctl_stop:
455       break;
456     }
457   return case_ctl_buffer;
458 }
459 
460 
461 static struct obstack stk;
462 static bool stk_init;
463 
464 static void
_single_transform_name_to_obstack(struct transform * tf,char * input)465 _single_transform_name_to_obstack (struct transform *tf, char *input)
466 {
467   regmatch_t *rmp;
468   int rc;
469   size_t nmatches = 0;
470   enum case_ctl_type case_ctl = ctl_stop,  /* Current case conversion op */
471                      save_ctl = ctl_stop;  /* Saved case_ctl for \u and \l */
472 
473   /* Reset case conversion after a single-char operation */
474 #define CASE_CTL_RESET()  if (case_ctl == ctl_upcase_next     \
475 			      || case_ctl == ctl_locase_next) \
476                             {                                 \
477                               case_ctl = save_ctl;            \
478                               save_ctl = ctl_stop;            \
479 			    }
480 
481   rmp = xmalloc ((tf->regex.re_nsub + 1) * sizeof (*rmp));
482 
483   while (*input)
484     {
485       size_t disp;
486       char *ptr;
487 
488       rc = regexec (&tf->regex, input, tf->regex.re_nsub + 1, rmp, 0);
489 
490       if (rc == 0)
491 	{
492 	  struct replace_segm *segm;
493 
494 	  disp = rmp[0].rm_eo;
495 
496 	  if (rmp[0].rm_so)
497 	    obstack_grow (&stk, input, rmp[0].rm_so);
498 
499 	  nmatches++;
500 	  if (tf->match_number && nmatches < tf->match_number)
501 	    {
502 	      obstack_grow (&stk, input, disp);
503 	      input += disp;
504 	      continue;
505 	    }
506 
507 	  for (segm = tf->repl_head; segm; segm = segm->next)
508 	    {
509 	      switch (segm->type)
510 		{
511 		case segm_literal:    /* Literal segment */
512 		  if (case_ctl == ctl_stop)
513 		    ptr = segm->v.literal.ptr;
514 		  else
515 		    {
516 		      ptr = run_case_conv (case_ctl,
517 					   segm->v.literal.ptr,
518 					   segm->v.literal.size);
519 		      CASE_CTL_RESET();
520 		    }
521 		  obstack_grow (&stk, ptr, segm->v.literal.size);
522 		  break;
523 
524 		case segm_backref:    /* Back-reference segment */
525 		  if (rmp[segm->v.ref].rm_so != -1
526 		      && rmp[segm->v.ref].rm_eo != -1)
527 		    {
528 		      size_t size = rmp[segm->v.ref].rm_eo
529 			              - rmp[segm->v.ref].rm_so;
530 		      ptr = input + rmp[segm->v.ref].rm_so;
531 		      if (case_ctl != ctl_stop)
532 			{
533 			  ptr = run_case_conv (case_ctl, ptr, size);
534 			  CASE_CTL_RESET();
535 			}
536 
537 		      obstack_grow (&stk, ptr, size);
538 		    }
539 		  break;
540 
541 		case segm_case_ctl:
542 		  switch (segm->v.ctl)
543 		    {
544 		    case ctl_upcase_next:
545 		    case ctl_locase_next:
546 		      switch (save_ctl)
547 			{
548 			case ctl_stop:
549 			case ctl_upcase:
550 			case ctl_locase:
551 			  save_ctl = case_ctl;
552 			default:
553 			  break;
554 			}
555 		      FALLTHROUGH;
556 
557 		    case ctl_upcase:
558 		    case ctl_locase:
559 		    case ctl_stop:
560 		      case_ctl = segm->v.ctl;
561 		    }
562 		}
563 	    }
564 	}
565       else
566 	{
567 	  disp = strlen (input);
568 	  obstack_grow (&stk, input, disp);
569 	}
570 
571       input += disp;
572 
573       if (tf->transform_type == transform_first)
574 	{
575 	  obstack_grow (&stk, input, strlen (input));
576 	  break;
577 	}
578     }
579 
580   obstack_1grow (&stk, 0);
581   free (rmp);
582 }
583 
584 static bool
_transform_name_to_obstack(int flags,char * input,char ** output)585 _transform_name_to_obstack (int flags, char *input, char **output)
586 {
587   struct transform *tf;
588   bool alloced = false;
589 
590   if (!stk_init)
591     {
592       obstack_init (&stk);
593       stk_init = true;
594     }
595 
596   for (tf = transform_head; tf; tf = tf->next)
597     {
598       if (tf->flags & flags)
599 	{
600 	  _single_transform_name_to_obstack (tf, input);
601 	  input = obstack_finish (&stk);
602 	  alloced = true;
603 	}
604     }
605   *output = input;
606   return alloced;
607 }
608 
609 bool
transform_name_fp(char ** pinput,int flags,char * (* fun)(char *,void *),void * dat)610 transform_name_fp (char **pinput, int flags,
611 		   char *(*fun)(char *, void *), void *dat)
612 {
613     char *str;
614     bool ret = _transform_name_to_obstack (flags, *pinput, &str);
615     if (ret)
616       {
617 	assign_string (pinput, fun ? fun (str, dat) : str);
618 	obstack_free (&stk, str);
619       }
620     else if (fun)
621       {
622 	*pinput = NULL;
623 	assign_string (pinput, fun (str, dat));
624 	free (str);
625 	ret = true;
626       }
627     return ret;
628 }
629 
630 bool
transform_name(char ** pinput,int type)631 transform_name (char **pinput, int type)
632 {
633   return transform_name_fp (pinput, type, NULL, NULL);
634 }
635 
636 bool
transform_program_p(void)637 transform_program_p (void)
638 {
639   return transform_head != NULL;
640 }
641