1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3     Free Software Foundation, Inc.
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2, or (at your option)
8     any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 
19 /* compile.c: translate sed source into internal form */
20 
21 #include "sed.h"
22 #include "strverscmp.h"
23 #include <stdio.h>
24 #include <ctype.h>
25 
26 #ifdef HAVE_STRINGS_H
27 # include <strings.h>
28 # ifdef HAVE_MEMORY_H
29 #  include <memory.h>
30 # endif
31 #else
32 # include <string.h>
33 #endif /* HAVE_STRINGS_H */
34 
35 #ifdef HAVE_STDLIB_H
36 # include <stdlib.h>
37 #endif
38 #ifndef EXIT_FAILURE
39 # define EXIT_FAILURE 1
40 #endif
41 
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
44 #endif
45 
46 #include <obstack.h>
47 
48 
49 #define YMAP_LENGTH		256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
50 #define VECTOR_ALLOC_INCREMENT	40
51 
52 /* let's not confuse text editors that have only dumb bracket-matching... */
53 #define OPEN_BRACKET	'['
54 #define CLOSE_BRACKET	']'
55 #define OPEN_BRACE	'{'
56 #define CLOSE_BRACE	'}'
57 
58 struct prog_info {
59   /* When we're reading a script command from a string, `prog.base'
60      points to the first character in the string, 'prog.cur' points
61      to the current character in the string, and 'prog.end' points
62      to the end of the string.  This allows us to compile script
63      strings that contain nulls. */
64   const unsigned char *base;
65   const unsigned char *cur;
66   const unsigned char *end;
67 
68   /* This is the current script file.  If it is NULL, we are reading
69      from a string stored at `prog.cur' instead.  If both `prog.file'
70      and `prog.cur' are NULL, we're in trouble! */
71   FILE *file;
72 };
73 
74 /* Information used to give out useful and informative error messages. */
75 struct error_info {
76   /* This is the name of the current script file. */
77   const char *name;
78 
79   /* This is the number of the current script line that we're compiling. */
80   countT line;
81 
82   /* This is the index of the "-e" expressions on the command line. */
83   countT string_expr_count;
84 };
85 
86 
87 /* Label structure used to resolve GOTO's, labels, and block beginnings. */
88 struct sed_label {
89   countT v_index;		/* index of vector element being referenced */
90   char *name;			/* NUL-terminated name of the label */
91   struct error_info err_info;	/* track where `{}' blocks start */
92   struct sed_label *next;	/* linked list (stack) */
93 };
94 
95 struct special_files {
96   struct output outf;
97   FILE **pfp;
98 };
99 
100 FILE *my_stdin, *my_stdout, *my_stderr;
101 struct special_files special_files[] = {
102   { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
103   { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
104   { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
105   { { NULL, false, NULL, NULL }, NULL }
106 };
107 
108 
109 /* Where we are in the processing of the input. */
110 static struct prog_info prog;
111 static struct error_info cur_input;
112 
113 /* Information about labels and jumps-to-labels.  This is used to do
114    the required backpatching after we have compiled all the scripts. */
115 static struct sed_label *jumps = NULL;
116 static struct sed_label *labels = NULL;
117 
118 /* We wish to detect #n magic only in the first input argument;
119    this flag tracks when we have consumed the first file of input. */
120 static bool first_script = true;
121 
122 /* Allow for scripts like "sed -e 'i\' -e foo": */
123 static struct buffer *pending_text = NULL;
124 static struct text_buf *old_text_buf = NULL;
125 
126 /* Information about block start positions.  This is used to backpatch
127    block end positions. */
128 static struct sed_label *blocks = NULL;
129 
130 /* Use an obstack for compilation. */
131 static struct obstack obs;
132 
133 /* Various error messages we may want to print */
134 static const char errors[] =
135   "multiple `!'s\0"
136   "unexpected `,'\0"
137   "invalid usage of +N or ~N as first address\0"
138   "unmatched `{'\0"
139   "unexpected `}'\0"
140   "extra characters after command\0"
141   "expected \\ after `a', `c' or `i'\0"
142   "`}' doesn't want any addresses\0"
143   ": doesn't want any addresses\0"
144   "comments don't accept any addresses\0"
145   "missing command\0"
146   "command only uses one address\0"
147   "unterminated address regex\0"
148   "unterminated `s' command\0"
149   "unterminated `y' command\0"
150   "unknown option to `s'\0"
151   "multiple `p' options to `s' command\0"
152   "multiple `g' options to `s' command\0"
153   "multiple number options to `s' command\0"
154   "number option to `s' command may not be zero\0"
155   "strings for `y' command are different lengths\0"
156   "delimiter character is not a single-byte character\0"
157   "expected newer version of sed\0"
158   "invalid usage of line address 0\0"
159   "unknown command: `%c'";
160 
161 #define BAD_BANG (errors)
162 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
163 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
164 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
165 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
166 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
167 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
168 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
169 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
170 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
171 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
172 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
173 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
174 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
175 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
176 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
177 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
178 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
179 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
180 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
181 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
182 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
183 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
184 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
185 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
186 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
187 
188 static struct output *file_read = NULL;
189 static struct output *file_write = NULL;
190 
191 
192 /* Complain about an unknown command and exit. */
193 void
bad_command(ch)194 bad_command(ch)
195   char ch;
196 {
197   const char *msg = _(UNKNOWN_CMD);
198   char *unknown_cmd = xmalloc(strlen(msg));
199   sprintf(unknown_cmd, msg, ch);
200   bad_prog(unknown_cmd);
201 }
202 
203 /* Complain about a programming error and exit. */
204 void
bad_prog(why)205 bad_prog(why)
206   const char *why;
207 {
208   if (cur_input.name)
209     fprintf(stderr, _("%s: file %s line %lu: %s\n"),
210 	    myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
211   else
212     fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
213 	    myname,
214 	    CAST(unsigned long)cur_input.string_expr_count,
215 	    CAST(unsigned long)(prog.cur-prog.base),
216 	    why);
217   exit(EXIT_FAILURE);
218 }
219 
220 
221 /* Read the next character from the program.  Return EOF if there isn't
222    anything to read.  Keep cur_input.line up to date, so error messages
223    can be meaningful. */
224 static int inchar P_((void));
225 static int
inchar()226 inchar()
227 {
228   int ch = EOF;
229 
230   if (prog.cur)
231     {
232       if (prog.cur < prog.end)
233 	ch = *prog.cur++;
234     }
235   else if (prog.file)
236     {
237       if (!feof(prog.file))
238 	ch = getc(prog.file);
239     }
240   if (ch == '\n')
241     ++cur_input.line;
242   return ch;
243 }
244 
245 /* unget `ch' so the next call to inchar will return it.   */
246 static void savchar P_((int ch));
247 static void
savchar(ch)248 savchar(ch)
249   int ch;
250 {
251   if (ch == EOF)
252     return;
253   if (ch == '\n' && cur_input.line > 0)
254     --cur_input.line;
255   if (prog.cur)
256     {
257       if (prog.cur <= prog.base || *--prog.cur != ch)
258 	panic("Called savchar() with unexpected pushback (%x)",
259 	      CAST(unsigned char)ch);
260     }
261   else
262     ungetc(ch, prog.file);
263 }
264 
265 /* Read the next non-blank character from the program.  */
266 static int in_nonblank P_((void));
267 static int
in_nonblank()268 in_nonblank()
269 {
270   int ch;
271   do
272     ch = inchar();
273     while (ISBLANK(ch));
274   return ch;
275 }
276 
277 /* Read an integer value from the program.  */
278 static countT in_integer P_((int ch));
279 static countT
in_integer(ch)280 in_integer(ch)
281   int ch;
282 {
283   countT num = 0;
284 
285   while (ISDIGIT(ch))
286     {
287       num = num * 10 + ch - '0';
288       ch = inchar();
289     }
290   savchar(ch);
291   return num;
292 }
293 
294 static int add_then_next P_((struct buffer *b, int ch));
295 static int
add_then_next(b,ch)296 add_then_next(b, ch)
297   struct buffer *b;
298   int ch;
299 {
300   add1_buffer(b, ch);
301   return inchar();
302 }
303 
304 static char * convert_number P_((char *, char *, const char *, int, int, int));
305 static char *
convert_number(result,buf,bufend,base,maxdigits,default_char)306 convert_number(result, buf, bufend, base, maxdigits, default_char)
307   char *result;
308   char *buf;
309   const char *bufend;
310   int base;
311   int maxdigits;
312   int default_char;
313 {
314   int n = 0;
315   char *p;
316 
317   for (p=buf; p < bufend && maxdigits-- > 0; ++p)
318     {
319       int d = -1;
320       switch (*p)
321 	{
322 	case '0': d = 0x0; break;
323 	case '1': d = 0x1; break;
324 	case '2': d = 0x2; break;
325 	case '3': d = 0x3; break;
326 	case '4': d = 0x4; break;
327 	case '5': d = 0x5; break;
328 	case '6': d = 0x6; break;
329 	case '7': d = 0x7; break;
330 	case '8': d = 0x8; break;
331 	case '9': d = 0x9; break;
332 	case 'A': case 'a': d = 0xa; break;
333 	case 'B': case 'b': d = 0xb; break;
334 	case 'C': case 'c': d = 0xc; break;
335 	case 'D': case 'd': d = 0xd; break;
336 	case 'E': case 'e': d = 0xe; break;
337 	case 'F': case 'f': d = 0xf; break;
338 	}
339       if (d < 0 || base <= d)
340 	break;
341       n = n * base + d;
342     }
343   if (p == buf)
344     *result = default_char;
345   else
346     *result = n;
347   return p;
348 }
349 
350 
351 /* Read in a filename for a `r', `w', or `s///w' command. */
352 static struct buffer *read_filename P_((void));
353 static struct buffer *
read_filename()354 read_filename()
355 {
356   struct buffer *b;
357   int ch;
358 
359   b = init_buffer();
360   ch = in_nonblank();
361   while (ch != EOF && ch != '\n')
362     {
363 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
364       if (posixicity == POSIXLY_EXTENDED)
365 	if (ch == ';' || ch == '#')
366 	  {
367 	    savchar(ch);
368 	    break;
369 	  }
370 #endif
371       ch = add_then_next(b, ch);
372     }
373   add1_buffer(b, '\0');
374   return b;
375 }
376 
377 static struct output *get_openfile P_((struct output **file_ptrs, char *mode, bool fail));
378 static struct output *
get_openfile(file_ptrs,mode,fail)379 get_openfile(file_ptrs, mode, fail)
380      struct output **file_ptrs;
381      char *mode;
382      bool fail;
383 {
384   struct buffer *b;
385   char *file_name;
386   struct output *p;
387   int is_stderr;
388 
389   b = read_filename();
390   file_name = get_buffer(b);
391   for (p=*file_ptrs; p; p=p->link)
392     if (strcmp(p->name, file_name) == 0)
393       break;
394 
395   if (posixicity == POSIXLY_EXTENDED)
396     {
397       /* Check whether it is a special file (stdin, stdout or stderr) */
398       struct special_files *special = special_files;
399 
400       /* std* sometimes are not constants, so they
401          cannot be used in the initializer for special_files */
402 #ifndef CONFIG_WITHOUT_O_OPT
403       my_stdin = stdin; my_stdout = sed_stdout; my_stderr = stderr;
404 #else
405       my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
406 #endif
407       for (special = special_files; special->outf.name; special++)
408         if (strcmp(special->outf.name, file_name) == 0)
409           {
410 	    special->outf.fp = *special->pfp;
411 	    free_buffer (b);
412 	    return &special->outf;
413           }
414     }
415 
416   if (!p)
417     {
418       p = OB_MALLOC(&obs, 1, struct output);
419       p->name = ck_strdup(file_name);
420       p->fp = ck_fopen(p->name, mode, fail);
421       p->missing_newline = false;
422       p->link = *file_ptrs;
423       *file_ptrs = p;
424     }
425   free_buffer(b);
426   return p;
427 }
428 
429 
430 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
431 static struct sed_cmd *
next_cmd_entry(vectorp)432 next_cmd_entry(vectorp)
433   struct vector **vectorp;
434 {
435   struct sed_cmd *cmd;
436   struct vector *v;
437 
438   v = *vectorp;
439   if (v->v_length == v->v_allocated)
440     {
441       v->v_allocated += VECTOR_ALLOC_INCREMENT;
442       v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
443     }
444 
445   cmd = v->v + v->v_length;
446   cmd->a1 = NULL;
447   cmd->a2 = NULL;
448   cmd->range_state = RANGE_INACTIVE;
449   cmd->addr_bang = false;
450   cmd->cmd = '\0';	/* something invalid, to catch bugs early */
451 
452   *vectorp  = v;
453   return cmd;
454 }
455 
456 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
457 static int
snarf_char_class(b,cur_stat)458 snarf_char_class(b, cur_stat)
459   struct buffer *b;
460   mbstate_t *cur_stat;
461 {
462   int ch;
463   int state = 0;
464   int delim;
465   bool pending_mb = 0;
466 
467   ch = inchar();
468   if (ch == '^')
469     ch = add_then_next(b, ch);
470   if (ch == CLOSE_BRACKET)
471     ch = add_then_next(b, ch);
472 
473   /* States are:
474 	0 outside a collation element, character class or collation class
475 	1 after the bracket
476 	2 after the opening ./:/=
477 	3 after the closing ./:/= */
478 
479   for (;; ch = add_then_next (b, ch))
480     {
481       pending_mb = BRLEN (ch, cur_stat) != 1;
482 
483       switch (ch)
484 	{
485 	case EOF:
486 	case '\n':
487 	  return ch;
488 
489 	case '.':
490 	case ':':
491 	case '=':
492 	  if (pending_mb)
493 	    continue;
494 
495 	  if (state == 1)
496 	    {
497 	      delim = ch;
498 	      state++;
499 	    }
500 	  else if (ch == delim && state == 2)
501 	    state++;
502 	  else
503 	    break;
504 
505 	  continue;
506 
507 	case OPEN_BRACKET:
508 	  if (pending_mb)
509 	    continue;
510 
511 	  state++;
512 	  continue;
513 
514 	case CLOSE_BRACKET:
515 	  if (pending_mb)
516 	    continue;
517 
518 	  if (state == 0 || state == 1)
519 	    return ch;
520 	  else if (state == 3)
521 	    state = 0;
522 
523 	  break;
524 
525 	default:
526 	  break;
527 	}
528 
529       /* Getting a character different from .=: whilst in state 1
530          goes back to state 0, getting a character different from ]
531          whilst in state 3 goes back to state 2.  */
532       state &= ~1;
533     }
534 }
535 
536 static struct buffer *match_slash P_((int slash, bool regex));
537 static struct buffer *
match_slash(slash,regex)538 match_slash(slash, regex)
539   int slash;
540   bool regex;
541 {
542   struct buffer *b;
543   int ch;
544   bool pending_mb = false;
545   mbstate_t cur_stat;
546 
547   memset (&cur_stat, 0, sizeof (mbstate_t));
548 
549   if (BRLEN (slash, &cur_stat) == -2)
550   if (BRLEN (slash, &cur_stat) == -2)
551     bad_prog (BAD_DELIM);
552 
553   memset (&cur_stat, 0, sizeof (mbstate_t));
554 
555   b = init_buffer();
556   while ((ch = inchar()) != EOF && ch != '\n')
557     {
558       pending_mb = BRLEN (ch, &cur_stat) != 1;
559       pending_mb = BRLEN (ch, &cur_stat) != 1;
560 
561       if (!pending_mb)
562 	{
563 	  if (ch == slash)
564 	    return b;
565 	  else if (ch == '\\')
566 	    {
567 	      ch = inchar();
568 	      if (ch == EOF)
569 	        break;
570 #ifndef REG_PERL
571 	      else if (ch == 'n' && regex)
572 	        ch = '\n';
573 #endif
574 	      else if (ch != '\n' && ch != slash)
575 	        add1_buffer(b, '\\');
576 	    }
577           else if (ch == OPEN_BRACKET && regex)
578 	    {
579 	      add1_buffer(b, ch);
580 	      ch = snarf_char_class(b, &cur_stat);
581 	      if (ch != CLOSE_BRACKET)
582 	        break;
583 	    }
584 	}
585 
586       add1_buffer(b, ch);
587     }
588 
589   if (ch == '\n')
590     savchar(ch);	/* for proper line number in error report */
591   free_buffer(b);
592   return NULL;
593 }
594 
595 static int mark_subst_opts P_((struct subst *cmd));
596 static int
mark_subst_opts(cmd)597 mark_subst_opts(cmd)
598   struct subst *cmd;
599 {
600   int flags = 0;
601   int ch;
602 
603   cmd->global = false;
604   cmd->print = false;
605   cmd->eval = false;
606   cmd->numb = 0;
607   cmd->outf = NULL;
608 
609   for (;;)
610     switch ( (ch = in_nonblank()) )
611       {
612       case 'i':	/* GNU extension */
613       case 'I':	/* GNU extension */
614 	flags |= REG_ICASE;
615 	break;
616 
617 #ifdef REG_PERL
618       case 's':	/* GNU extension */
619       case 'S':	/* GNU extension */
620 	if (extended_regexp_flags & REG_PERL)
621 	  flags |= REG_DOTALL;
622 	break;
623 
624       case 'x':	/* GNU extension */
625       case 'X':	/* GNU extension */
626 	if (extended_regexp_flags & REG_PERL)
627 	  flags |= REG_EXTENDED;
628 	break;
629 #endif
630 
631       case 'm':	/* GNU extension */
632       case 'M':	/* GNU extension */
633 	flags |= REG_NEWLINE;
634 	break;
635 
636       case 'e':
637 	cmd->eval = true;
638 	break;
639 
640       case 'p':
641 	if (cmd->print)
642 	  bad_prog(_(EXCESS_P_OPT));
643 	cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
644 	break;
645 
646       case 'g':
647 	if (cmd->global)
648 	  bad_prog(_(EXCESS_G_OPT));
649 	cmd->global = true;
650 	break;
651 
652       case 'w':
653 	cmd->outf = get_openfile(&file_write, "w", true);
654 	return flags;
655 
656       case '0': case '1': case '2': case '3': case '4':
657       case '5': case '6': case '7': case '8': case '9':
658 	if (cmd->numb)
659 	  bad_prog(_(EXCESS_N_OPT));
660 	cmd->numb = in_integer(ch);
661 	if (!cmd->numb)
662 	  bad_prog(_(ZERO_N_OPT));
663 	break;
664 
665       case CLOSE_BRACE:
666       case '#':
667 	savchar(ch);
668 	/* Fall Through */
669       case EOF:
670       case '\n':
671       case ';':
672 	return flags;
673 
674       case '\r':
675 	if (inchar() == '\n')
676 	  return flags;
677 	/* FALLTHROUGH */
678 
679       default:
680 	bad_prog(_(UNKNOWN_S_OPT));
681 	/*NOTREACHED*/
682       }
683 }
684 
685 
686 /* read in a label for a `:', `b', or `t' command */
687 static char *read_label P_((void));
688 static char *
read_label()689 read_label()
690 {
691   struct buffer *b;
692   int ch;
693   char *ret;
694 
695   b = init_buffer();
696   ch = in_nonblank();
697 
698   while (ch != EOF && ch != '\n'
699 	 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
700     ch = add_then_next (b, ch);
701 
702   savchar(ch);
703   add1_buffer(b, '\0');
704   ret = ck_strdup(get_buffer(b));
705   free_buffer(b);
706   return ret;
707 }
708 
709 /* Store a label (or label reference) created by a `:', `b', or `t'
710    command so that the jump to/from the label can be backpatched after
711    compilation is complete, or a reference created by a `{' to be
712    backpatched when the corresponding `}' is found.  */
713 static struct sed_label *setup_label
714   P_((struct sed_label *, countT, char *, const struct error_info *));
715 static struct sed_label *
setup_label(list,idx,name,err_info)716 setup_label(list, idx, name, err_info)
717   struct sed_label *list;
718   countT idx;
719   char *name;
720   const struct error_info *err_info;
721 {
722   struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
723   ret->v_index = idx;
724   ret->name = name;
725   if (err_info)
726     MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
727   ret->next = list;
728   return ret;
729 }
730 
731 static struct sed_label *release_label P_((struct sed_label *list_head));
732 static struct sed_label *
release_label(list_head)733 release_label(list_head)
734   struct sed_label *list_head;
735 {
736   struct sed_label *ret;
737 
738   if (!list_head)
739     return NULL;
740   ret = list_head->next;
741 
742   FREE(list_head->name);
743 
744 #if 0
745   /* We use obstacks */
746   FREE(list_head);
747 #endif
748   return ret;
749 }
750 
751 static struct replacement *new_replacement P_((char *, size_t,
752 					       enum replacement_types));
753 static struct replacement *
new_replacement(text,length,type)754 new_replacement(text, length, type)
755   char *text;
756   size_t length;
757   enum replacement_types type;
758 {
759   struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
760 
761   r->prefix = text;
762   r->prefix_length = length;
763   r->subst_id = -1;
764   r->repl_type = type;
765 
766   /* r-> next = NULL; */
767   return r;
768 }
769 
770 static void setup_replacement P_((struct subst *, const char *, size_t));
771 static void
setup_replacement(sub,text,length)772 setup_replacement(sub, text, length)
773      struct subst *sub;
774      const char *text;
775      size_t length;
776 {
777   char *base;
778   char *p;
779   char *text_end;
780   enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
781   struct replacement root;
782   struct replacement *tail;
783 
784   sub->max_id = 0;
785   base = MEMDUP(text, length, char);
786   length = normalize_text(base, length, TEXT_REPLACEMENT);
787 
788   text_end = base + length;
789   tail = &root;
790 
791   for (p=base; p<text_end; ++p)
792     {
793       if (*p == '\\')
794 	{
795 	  /* Preceding the backslash may be some literal text: */
796 	  tail = tail->next =
797 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
798 
799 	  repl_type = save_type;
800 
801 	  /* Skip the backslash and look for a numeric back-reference,
802 	     or a case-munging escape if not in POSIX mode: */
803 	  ++p;
804 	  if (p < text_end && (posixicity != POSIXLY_BASIC || ISDIGIT (*p)))
805 	    switch (*p)
806 	      {
807 	      case '0': case '1': case '2': case '3': case '4':
808 	      case '5': case '6': case '7': case '8': case '9':
809 		tail->subst_id = *p - '0';
810 		if (sub->max_id < tail->subst_id)
811 		  sub->max_id = tail->subst_id;
812 		break;
813 
814 	      case 'L':
815 		repl_type = REPL_LOWERCASE;
816 		save_type = REPL_LOWERCASE;
817 		break;
818 
819 	      case 'U':
820 		repl_type = REPL_UPPERCASE;
821 		save_type = REPL_UPPERCASE;
822 		break;
823 
824 	      case 'E':
825 		repl_type = REPL_ASIS;
826 		save_type = REPL_ASIS;
827 		break;
828 
829 	      case 'l':
830 		save_type = repl_type;
831 		repl_type |= REPL_LOWERCASE_FIRST;
832 		break;
833 
834 	      case 'u':
835 		save_type = repl_type;
836 		repl_type |= REPL_UPPERCASE_FIRST;
837 		break;
838 
839 	      default:
840 		p[-1] = *p;
841 		++tail->prefix_length;
842 	      }
843 
844 	  base = p + 1;
845 	}
846       else if (*p == '&')
847 	{
848 	  /* Preceding the ampersand may be some literal text: */
849 	  tail = tail->next =
850 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
851 
852 	  repl_type = save_type;
853 	  tail->subst_id = 0;
854 	  base = p + 1;
855 	}
856   }
857   /* There may be some trailing literal text: */
858   if (base < text_end)
859     tail = tail->next =
860       new_replacement(base, CAST(size_t)(text_end - base), repl_type);
861 
862   tail->next = NULL;
863   sub->replacement = root.next;
864 }
865 
866 static void read_text P_((struct text_buf *buf, int leadin_ch));
867 static void
read_text(buf,leadin_ch)868 read_text(buf, leadin_ch)
869   struct text_buf *buf;
870   int leadin_ch;
871 {
872   int ch;
873 
874   /* Should we start afresh (as opposed to continue a partial text)? */
875   if (buf)
876     {
877       if (pending_text)
878 	free_buffer(pending_text);
879       pending_text = init_buffer();
880       buf->text = NULL;
881       buf->text_length = 0;
882       old_text_buf = buf;
883     }
884   /* assert(old_text_buf != NULL); */
885 
886   if (leadin_ch == EOF)
887     return;
888 
889   if (leadin_ch != '\n')
890     add1_buffer(pending_text, leadin_ch);
891 
892   ch = inchar();
893   while (ch != EOF && ch != '\n')
894     {
895       if (ch == '\\')
896 	{
897 	  ch = inchar();
898 	  if (ch != EOF)
899 	    add1_buffer (pending_text, '\\');
900 	}
901 
902       if (ch == EOF)
903 	{
904 	  add1_buffer (pending_text, '\n');
905 	  return;
906 	}
907 
908       ch = add_then_next (pending_text, ch);
909     }
910 
911   add1_buffer(pending_text, '\n');
912   if (!buf)
913     buf = old_text_buf;
914   buf->text_length = normalize_text (get_buffer (pending_text),
915 				     size_buffer (pending_text), TEXT_BUFFER);
916   buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
917   free_buffer(pending_text);
918   pending_text = NULL;
919 }
920 
921 
922 /* Try to read an address for a sed command.  If it succeeds,
923    return non-zero and store the resulting address in `*addr'.
924    If the input doesn't look like an address read nothing
925    and return zero.  */
926 static bool compile_address P_((struct addr *addr, int ch));
927 static bool
compile_address(addr,ch)928 compile_address(addr, ch)
929   struct addr *addr;
930   int ch;
931 {
932   addr->addr_type = ADDR_IS_NULL;
933   addr->addr_step = 0;
934   addr->addr_number = ~(countT)0;  /* extremely unlikely to ever match */
935   addr->addr_regex = NULL;
936 
937   if (ch == '/' || ch == '\\')
938     {
939       int flags = 0;
940       struct buffer *b;
941       addr->addr_type = ADDR_IS_REGEX;
942       if (ch == '\\')
943 	ch = inchar();
944       if ( !(b = match_slash(ch, true)) )
945 	bad_prog(_(UNTERM_ADDR_RE));
946 
947       for(;;)
948 	{
949 	  ch = in_nonblank();
950           switch(ch)
951 	    {
952 	    case 'I':	/* GNU extension */
953 	      flags |= REG_ICASE;
954 	      break;
955 
956 #ifdef REG_PERL
957 	    case 'S':	/* GNU extension */
958 	      if (extended_regexp_flags & REG_PERL)
959 		flags |= REG_DOTALL;
960 	      break;
961 
962 	    case 'X':	/* GNU extension */
963 	      if (extended_regexp_flags & REG_PERL)
964 		flags |= REG_EXTENDED;
965 	      break;
966 #endif
967 
968 	    case 'M':	/* GNU extension */
969 	      flags |= REG_NEWLINE;
970 	      break;
971 
972 	    default:
973 	      savchar (ch);
974 	      addr->addr_regex = compile_regex (b, flags, 0);
975 	      free_buffer(b);
976 	      return true;
977 	    }
978 	}
979     }
980   else if (ISDIGIT(ch))
981     {
982       addr->addr_number = in_integer(ch);
983       addr->addr_type = ADDR_IS_NUM;
984       ch = in_nonblank();
985       if (ch != '~')
986 	{
987 	  savchar(ch);
988 	}
989       else
990 	{
991 	  countT step = in_integer(in_nonblank());
992 	  if (step > 0)
993 	    {
994 	      addr->addr_step = step;
995 	      addr->addr_type = ADDR_IS_NUM_MOD;
996 	    }
997 	}
998     }
999   else if (ch == '+' || ch == '~')
1000     {
1001       addr->addr_step = in_integer(in_nonblank());
1002       if (addr->addr_step==0)
1003 	; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1004       else if (ch == '+')
1005 	addr->addr_type = ADDR_IS_STEP;
1006       else
1007 	addr->addr_type = ADDR_IS_STEP_MOD;
1008     }
1009   else if (ch == '$')
1010     {
1011       addr->addr_type = ADDR_IS_LAST;
1012     }
1013   else
1014     return false;
1015 
1016   return true;
1017 }
1018 
1019 /* Read a program (or a subprogram within `{' `}' pairs) in and store
1020    the compiled form in `*vector'.  Return a pointer to the new vector.  */
1021 static struct vector *compile_program P_((struct vector *));
1022 static struct vector *
compile_program(vector)1023 compile_program(vector)
1024   struct vector *vector;
1025 {
1026   struct sed_cmd *cur_cmd;
1027   struct buffer *b;
1028   int ch;
1029 
1030   if (!vector)
1031     {
1032       vector = MALLOC(1, struct vector);
1033       vector->v = NULL;
1034       vector->v_allocated = 0;
1035       vector->v_length = 0;
1036 
1037       obstack_init (&obs);
1038     }
1039   if (pending_text)
1040     read_text(NULL, '\n');
1041 
1042   for (;;)
1043     {
1044       struct addr a;
1045 
1046       while ((ch=inchar()) == ';' || ISSPACE(ch))
1047 	;
1048       if (ch == EOF)
1049 	break;
1050 
1051       cur_cmd = next_cmd_entry(&vector);
1052       if (compile_address(&a, ch))
1053 	{
1054 	  if (a.addr_type == ADDR_IS_STEP
1055 	      || a.addr_type == ADDR_IS_STEP_MOD)
1056 	    bad_prog(_(BAD_STEP));
1057 
1058 	  cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1059 	  ch = in_nonblank();
1060 	  if (ch == ',')
1061 	    {
1062 	      if (!compile_address(&a, in_nonblank()))
1063 		bad_prog(_(BAD_COMMA));
1064 
1065 	      cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1066 	      ch = in_nonblank();
1067 	    }
1068 
1069 	  if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1070 	      && cur_cmd->a1->addr_number == 0
1071 	      && (!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX))
1072 	    bad_prog(_(INVALID_LINE_0));
1073 	}
1074       if (ch == '!')
1075 	{
1076 	  cur_cmd->addr_bang = true;
1077 	  ch = in_nonblank();
1078 	  if (ch == '!')
1079 	    bad_prog(_(BAD_BANG));
1080 	}
1081 
1082       /* Do not accept extended commands in --posix mode.  Also,
1083 	 a few commands only accept one address in that mode.  */
1084       if (posixicity == POSIXLY_BASIC)
1085 	switch (ch)
1086 	  {
1087 	    case 'v': case 'L': case 'Q': case 'T':
1088 	    case 'R': case 'W':
1089 	      bad_command(ch);
1090 
1091 	    case 'a': case 'i': case 'l':
1092 	    case '=': case 'r':
1093 	      if (cur_cmd->a2)
1094 	        bad_prog(_(ONE_ADDR));
1095 	  }
1096 
1097       cur_cmd->cmd = ch;
1098       switch (ch)
1099 	{
1100 	case '#':
1101 	  if (cur_cmd->a1)
1102 	    bad_prog(_(NO_SHARP_ADDR));
1103 	  ch = inchar();
1104 	  if (ch=='n' && first_script && cur_input.line < 2)
1105 	    if (   (prog.base && prog.cur==2+prog.base)
1106 		|| (prog.file && !prog.base && 2==ftell(prog.file)))
1107 	      no_default_output = true;
1108 	  while (ch != EOF && ch != '\n')
1109 	    ch = inchar();
1110 	  continue;	/* restart the for (;;) loop */
1111 
1112 	case 'v':
1113 	  /* This is an extension.  Programs needing GNU sed might start
1114 	   * with a `v' command so that other seds will stop.
1115 	   * We compare the version and ignore POSIXLY_CORRECT.
1116 	   */
1117 	  {
1118 	    char *version = read_label ();
1119 	    char *compared_version;
1120 	    compared_version = (*version == '\0') ? "4.0" : version;
1121 	    if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1122 	      bad_prog(_(ANCIENT_VERSION));
1123 
1124 	    free (version);
1125 	    posixicity = POSIXLY_EXTENDED;
1126 	  }
1127 	  continue;
1128 
1129 	case '{':
1130 	  blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1131 	  cur_cmd->addr_bang = !cur_cmd->addr_bang;
1132 	  break;
1133 
1134 	case '}':
1135 	  if (!blocks)
1136 	    bad_prog(_(EXCESS_CLOSE_BRACE));
1137 	  if (cur_cmd->a1)
1138 	    bad_prog(_(NO_CLOSE_BRACE_ADDR));
1139 	  ch = in_nonblank();
1140 	  if (ch == CLOSE_BRACE || ch == '#')
1141 	    savchar(ch);
1142 	  else if (ch != EOF && ch != '\n' && ch != ';')
1143 	    bad_prog(_(EXCESS_JUNK));
1144 
1145 	  vector->v[blocks->v_index].x.jump_index = vector->v_length;
1146 	  blocks = release_label(blocks);	/* done with this entry */
1147 	  break;
1148 
1149 	case 'e':
1150 	  ch = in_nonblank();
1151 	  if (ch == EOF || ch == '\n')
1152 	    {
1153 	      cur_cmd->x.cmd_txt.text_length = 0;
1154 	      break;
1155 	    }
1156 	  else
1157 	    goto read_text_to_slash;
1158 
1159 	case 'a':
1160 	case 'i':
1161 	case 'c':
1162 	  ch = in_nonblank();
1163 
1164 	read_text_to_slash:
1165 	  if (ch == EOF)
1166 	    bad_prog(_(EXPECTED_SLASH));
1167 
1168 	  if (ch == '\\')
1169 	    ch = inchar();
1170 	  else
1171 	    {
1172 	      savchar(ch);
1173 	      ch = '\n';
1174 	    }
1175 
1176 	  read_text(&cur_cmd->x.cmd_txt, ch);
1177 	  break;
1178 
1179 	case ':':
1180 	  if (cur_cmd->a1)
1181 	    bad_prog(_(NO_COLON_ADDR));
1182 	  labels = setup_label(labels, vector->v_length, read_label(), NULL);
1183 	  break;
1184 
1185 	case 'T':
1186 	case 'b':
1187 	case 't':
1188 	  jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1189 	  break;
1190 
1191 	case 'Q':
1192 	case 'q':
1193 	  if (cur_cmd->a2)
1194 	    bad_prog(_(ONE_ADDR));
1195 	  /* Fall through */
1196 
1197 	case 'L':
1198 	case 'l':
1199 	  ch = in_nonblank();
1200 	  if (ISDIGIT(ch))
1201 	    {
1202 	      cur_cmd->x.int_arg = in_integer(ch);
1203 	      ch = in_nonblank();
1204 	    }
1205 	  else
1206 	    cur_cmd->x.int_arg = -1;
1207 
1208 	  if (ch == CLOSE_BRACE || ch == '#')
1209 	    savchar(ch);
1210 	  else if (ch != EOF && ch != '\n' && ch != ';')
1211 	    bad_prog(_(EXCESS_JUNK));
1212 
1213 	  break;
1214 
1215 	case '=':
1216 	case 'd':
1217 	case 'D':
1218 	case 'g':
1219 	case 'G':
1220 	case 'h':
1221 	case 'H':
1222 	case 'n':
1223 	case 'N':
1224 	case 'p':
1225 	case 'P':
1226 	case 'x':
1227 	  ch = in_nonblank();
1228 	  if (ch == CLOSE_BRACE || ch == '#')
1229 	    savchar(ch);
1230 	  else if (ch != EOF && ch != '\n' && ch != ';')
1231 	    bad_prog(_(EXCESS_JUNK));
1232 	  break;
1233 
1234 	case 'r':
1235 	  b = read_filename();
1236 	  cur_cmd->x.fname = ck_strdup(get_buffer(b));
1237 	  free_buffer(b);
1238 	  break;
1239 
1240         case 'R':
1241 	  cur_cmd->x.fp = get_openfile(&file_read, "r", false)->fp;
1242 	  break;
1243 
1244         case 'W':
1245 	case 'w':
1246 	  cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1247 	  break;
1248 
1249 	case 's':
1250 	  {
1251 	    struct buffer *b2;
1252 	    int flags;
1253 	    int slash;
1254 
1255 	    slash = inchar();
1256 	    if ( !(b  = match_slash(slash, true)) )
1257 	      bad_prog(_(UNTERM_S_CMD));
1258 	    if ( !(b2 = match_slash(slash, false)) )
1259 	      bad_prog(_(UNTERM_S_CMD));
1260 
1261 	    cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1262 	    setup_replacement(cur_cmd->x.cmd_subst,
1263 			      get_buffer(b2), size_buffer(b2));
1264 	    free_buffer(b2);
1265 
1266 	    flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1267 	    cur_cmd->x.cmd_subst->regx =
1268 	      compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1269 	    free_buffer(b);
1270 	  }
1271 	  break;
1272 
1273 	case 'y':
1274 	  {
1275 	    size_t len, dest_len;
1276 	    int slash;
1277 	    struct buffer *b2;
1278             char *src_buf, *dest_buf;
1279 
1280 	    slash = inchar();
1281 	    if ( !(b = match_slash(slash, false)) )
1282 	      bad_prog(_(UNTERM_Y_CMD));
1283             src_buf = get_buffer(b);
1284 	    len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1285 
1286             if ( !(b2 = match_slash(slash, false)) )
1287  	      bad_prog(_(UNTERM_Y_CMD));
1288             dest_buf = get_buffer(b2);
1289 	    dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1290 
1291             if (mb_cur_max > 1)
1292 	      {
1293                 int i, j, idx, src_char_num;
1294                 size_t *src_lens = MALLOC(len, size_t);
1295                 char **trans_pairs;
1296                 size_t mbclen;
1297                 mbstate_t cur_stat;
1298 
1299                 /* Enumerate how many character the source buffer has.  */
1300                 memset(&cur_stat, 0, sizeof(mbstate_t));
1301                 for (i = 0, j = 0; i < len;)
1302                   {
1303                     mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1304                     /* An invalid sequence, or a truncated multibyte character.
1305                        We treat it as a singlebyte character.  */
1306                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1307                         || mbclen == 0)
1308                       mbclen = 1;
1309                     src_lens[j++] = mbclen;
1310                     i += mbclen;
1311                   }
1312                 src_char_num = j;
1313 
1314                 memset(&cur_stat, 0, sizeof(mbstate_t));
1315                 idx = 0;
1316 
1317                 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1318                      src(i) : pointer to i-th source character.
1319                      dest(i) : pointer to i-th destination character.
1320                      NULL : terminator */
1321                 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1322                 cur_cmd->x.translatemb = trans_pairs;
1323                 for (i = 0; i < src_char_num; i++)
1324                   {
1325                     if (idx >= dest_len)
1326                       bad_prog(_(Y_CMD_LEN));
1327 
1328                     /* Set the i-th source character.  */
1329                     trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1330                     strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1331                     trans_pairs[2 * i][src_lens[i]] = '\0';
1332                     src_buf += src_lens[i]; /* Forward to next character.  */
1333 
1334                     /* Fetch the i-th destination character.  */
1335                     mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1336                     /* An invalid sequence, or a truncated multibyte character.
1337                        We treat it as a singlebyte character.  */
1338                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1339                         || mbclen == 0)
1340                       mbclen = 1;
1341 
1342                     /* Set the i-th destination character.  */
1343                     trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1344                     strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1345                     trans_pairs[2 * i + 1][mbclen] = '\0';
1346                     idx += mbclen; /* Forward to next character.  */
1347                   }
1348                 trans_pairs[2 * i] = NULL;
1349                 if (idx != dest_len)
1350                   bad_prog(_(Y_CMD_LEN));
1351               }
1352             else
1353               {
1354 	        char *translate = OB_MALLOC(&obs, YMAP_LENGTH, char);
1355                 unsigned char *ustring = CAST(unsigned char *)src_buf;
1356 
1357 		if (len != dest_len)
1358                   bad_prog(_(Y_CMD_LEN));
1359 
1360 	        for (len = 0; len < YMAP_LENGTH; len++)
1361 	          translate[len] = len;
1362 
1363                 while (dest_len--)
1364                   translate[(unsigned char)*ustring++] = *dest_buf++;
1365 
1366 	        cur_cmd->x.translate = translate;
1367 	      }
1368 
1369             if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1370               bad_prog(_(EXCESS_JUNK));
1371 
1372             free_buffer(b);
1373             free_buffer(b2);
1374 	  }
1375 	break;
1376 
1377 	case EOF:
1378 	  bad_prog(_(NO_COMMAND));
1379 	  /*NOTREACHED*/
1380 
1381 	default:
1382 	  bad_command (ch);
1383 	  /*NOTREACHED*/
1384 	}
1385 
1386       /* this is buried down here so that "continue" statements will miss it */
1387       ++vector->v_length;
1388     }
1389   return vector;
1390 }
1391 
1392 
1393 /* deal with \X escapes */
1394 size_t
normalize_text(buf,len,buftype)1395 normalize_text(buf, len, buftype)
1396   char *buf;
1397   size_t len;
1398   enum text_types buftype;
1399 {
1400   const char *bufend = buf + len;
1401   char *p = buf;
1402   char *q = buf;
1403 
1404   /* This variable prevents normalizing text within bracket
1405      subexpressions when conforming to POSIX.  If 0, we
1406      are not within a bracket expression.  If -1, we are within a
1407      bracket expression but are not within [.FOO.], [=FOO=],
1408      or [:FOO:].  Otherwise, this is the '.', '=', or ':'
1409      respectively within these three types of subexpressions.  */
1410   int bracket_state = 0;
1411 
1412   int mbclen;
1413   mbstate_t cur_stat;
1414   memset(&cur_stat, 0, sizeof(mbstate_t));
1415 
1416   while (p < bufend)
1417     {
1418       int c;
1419       mbclen = MBRLEN (p, bufend - p, &cur_stat);
1420       if (mbclen != 1)
1421 	{
1422           /* An invalid sequence, or a truncated multibyte character.
1423              We treat it as a singlebyte character.  */
1424           if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1425             mbclen = 1;
1426 
1427           memmove (q, p, mbclen);
1428           q += mbclen;
1429           p += mbclen;
1430 	  continue;
1431 	}
1432 
1433       if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1434 	switch ( (c = *++p) )
1435 	  {
1436 #if defined __STDC__ && __STDC__-0
1437 	  case 'a': *q++ = '\a'; p++; continue;
1438 #else /* Not STDC; we'll just assume ASCII */
1439 	  case 'a': *q++ = '\007'; p++; continue;
1440 #endif
1441 	  /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1442 	  case 'f': *q++ = '\f'; p++; continue;
1443 	  case '\n': /*fall through */
1444 	  case 'n': *q++ = '\n'; p++; continue;
1445 	  case 'r': *q++ = '\r'; p++; continue;
1446 	  case 't': *q++ = '\t'; p++; continue;
1447 	  case 'v': *q++ = '\v'; p++; continue;
1448 
1449 	  case 'd': /* decimal byte */
1450 	    p = convert_number(q, p+1, bufend, 10, 3, 'd');
1451 	    q++;
1452 	    continue;
1453 
1454 	  case 'x': /* hexadecimal byte */
1455 	    p = convert_number(q, p+1, bufend, 16, 2, 'x');
1456 	    q++;
1457 	    continue;
1458 
1459 #ifdef REG_PERL
1460 	  case '0': case '1': case '2': case '3':
1461 	  case '4': case '5': case '6': case '7':
1462 	    if ((extended_regexp_flags & REG_PERL)
1463 		&& p+1 < bufend
1464 		&& p[1] >= '0' && p[1] <= '9')
1465 	      {
1466 		p = convert_number(q, p, bufend, 8, 3, *p);
1467 		q++;
1468 	      }
1469 	    else
1470 	      {
1471 		/* we just pass the \ up one level for interpretation */
1472 	        if (buftype != TEXT_BUFFER)
1473 		  *q++ = '\\';
1474 	      }
1475 
1476 	    continue;
1477 
1478 	  case 'o': /* octal byte */
1479 	    if (!(extended_regexp_flags & REG_PERL))
1480 	      {
1481 	        p = convert_number(q, p+1, bufend,  8, 3, 'o');
1482 		q++;
1483 	      }
1484 	    else
1485 	      {
1486 	        /* we just pass the \ up one level for interpretation */
1487 	        if (buftype != TEXT_BUFFER)
1488 		  *q++ = '\\';
1489 	      }
1490 
1491 	    continue;
1492 #else
1493 	  case 'o': /* octal byte */
1494 	    p = convert_number(q, p+1, bufend,  8, 3, 'o');
1495 	    q++;
1496 	    continue;
1497 #endif
1498 
1499 	  case 'c':
1500 	    if (++p < bufend)
1501 	      {
1502 		*q++ = toupper(*p) ^ 0x40;
1503 		p++;
1504 		continue;
1505 	      }
1506 	    else
1507 	      {
1508 	        /* we just pass the \ up one level for interpretation */
1509 	        if (buftype != TEXT_BUFFER)
1510 		  *q++ = '\\';
1511 	        continue;
1512 	      }
1513 
1514 	  default:
1515 	    /* we just pass the \ up one level for interpretation */
1516 	    if (buftype != TEXT_BUFFER)
1517 	      *q++ = '\\';
1518 	    break;
1519 	  }
1520       else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1521         switch (*p)
1522           {
1523           case '[':
1524             if (!bracket_state)
1525               bracket_state = -1;
1526             break;
1527 
1528 	  case ':':
1529 	  case '.':
1530 	  case '=':
1531             if (bracket_state == -1 && p[-1] == '[')
1532               bracket_state = *p;
1533             break;
1534 
1535           case ']':
1536             if (bracket_state == 0)
1537 	      ;
1538             else if (bracket_state == -1)
1539               bracket_state = 0;
1540             else if (p[-2] != bracket_state && p[-1] == bracket_state)
1541               bracket_state = -1;
1542             break;
1543           }
1544 
1545       *q++ = *p++;
1546     }
1547     return (size_t)(q - buf);
1548 }
1549 
1550 
1551 /* `str' is a string (from the command line) that contains a sed command.
1552    Compile the command, and add it to the end of `cur_program'. */
1553 struct vector *
compile_string(cur_program,str,len)1554 compile_string(cur_program, str, len)
1555   struct vector *cur_program;
1556   char *str;
1557   size_t len;
1558 {
1559   static countT string_expr_count = 0;
1560   struct vector *ret;
1561 
1562   prog.file = NULL;
1563   prog.base = CAST(unsigned char *)str;
1564   prog.cur = prog.base;
1565   prog.end = prog.cur + len;
1566 
1567   cur_input.line = 0;
1568   cur_input.name = NULL;
1569   cur_input.string_expr_count = ++string_expr_count;
1570 
1571   ret = compile_program(cur_program);
1572   prog.base = NULL;
1573   prog.cur = NULL;
1574   prog.end = NULL;
1575 
1576   first_script = false;
1577   return ret;
1578 }
1579 
1580 /* `cmdfile' is the name of a file containing sed commands.
1581    Read them in and add them to the end of `cur_program'.
1582  */
1583 struct vector *
compile_file(cur_program,cmdfile)1584 compile_file(cur_program, cmdfile)
1585   struct vector *cur_program;
1586   const char *cmdfile;
1587 {
1588   size_t len;
1589   struct vector *ret;
1590 
1591   prog.file = stdin;
1592   if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1593     prog.file = ck_fopen(cmdfile, "rt", true);
1594 
1595   cur_input.line = 1;
1596   cur_input.name = cmdfile;
1597   cur_input.string_expr_count = 0;
1598 
1599   ret = compile_program(cur_program);
1600   if (prog.file != stdin)
1601     ck_fclose(prog.file);
1602   prog.file = NULL;
1603 
1604   first_script = false;
1605   return ret;
1606 }
1607 
1608 /* Make any checks which require the whole program to have been read.
1609    In particular: this backpatches the jump targets.
1610    Any cleanup which can be done after these checks is done here also.  */
1611 void
check_final_program(program)1612 check_final_program(program)
1613   struct vector *program;
1614 {
1615   struct sed_label *go;
1616   struct sed_label *lbl;
1617 
1618   /* do all "{"s have a corresponding "}"? */
1619   if (blocks)
1620     {
1621       /* update info for error reporting: */
1622       MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1623       bad_prog(_(EXCESS_OPEN_BRACE));
1624     }
1625 
1626   /* was the final command an unterminated a/c/i command? */
1627   if (pending_text)
1628     {
1629       old_text_buf->text_length = size_buffer(pending_text);
1630       old_text_buf->text = MEMDUP(get_buffer(pending_text),
1631 				  old_text_buf->text_length, char);
1632       free_buffer(pending_text);
1633       pending_text = NULL;
1634     }
1635 
1636   for (go = jumps; go; go = release_label(go))
1637     {
1638       for (lbl = labels; lbl; lbl = lbl->next)
1639 	if (strcmp(lbl->name, go->name) == 0)
1640 	  break;
1641       if (lbl)
1642 	{
1643 	  program->v[go->v_index].x.jump_index = lbl->v_index;
1644 	}
1645       else
1646 	{
1647 	  if (*go->name)
1648 	    panic(_("can't find label for jump to `%s'"), go->name);
1649 	  program->v[go->v_index].x.jump_index = program->v_length;
1650 	}
1651     }
1652   jumps = NULL;
1653 
1654   for (lbl = labels; lbl; lbl = release_label(lbl))
1655     ;
1656   labels = NULL;
1657 
1658   /* There is no longer a need to track file names: */
1659   {
1660     struct output *p;
1661 
1662     for (p=file_read; p; p=p->link)
1663       if (p->name)
1664 	{
1665 	  FREE(p->name);
1666 	  p->name = NULL;
1667 	}
1668 
1669     for (p=file_write; p; p=p->link)
1670       if (p->name)
1671 	{
1672 	  FREE(p->name);
1673 	  p->name = NULL;
1674 	}
1675   }
1676 }
1677 
1678 /* Rewind all resources which were allocated in this module. */
1679 void
rewind_read_files()1680 rewind_read_files()
1681 {
1682   struct output *p;
1683 
1684   for (p=file_read; p; p=p->link)
1685     if (p->fp)
1686       rewind(p->fp);
1687 }
1688 
1689 /* Release all resources which were allocated in this module. */
1690 void
finish_program(program)1691 finish_program(program)
1692   struct vector *program;
1693 {
1694   /* close all files... */
1695   {
1696     struct output *p, *q;
1697 
1698     for (p=file_read; p; p=q)
1699       {
1700 	if (p->fp)
1701 	  ck_fclose(p->fp);
1702 	q = p->link;
1703 #if 0
1704 	/* We use obstacks. */
1705 	FREE(p);
1706 #endif
1707       }
1708 
1709     for (p=file_write; p; p=q)
1710       {
1711 	if (p->fp)
1712 	  ck_fclose(p->fp);
1713 	q = p->link;
1714 #if 0
1715 	/* We use obstacks. */
1716 	FREE(p);
1717 #endif
1718       }
1719     file_read = file_write = NULL;
1720   }
1721 
1722 #ifdef DEBUG_LEAKS
1723   obstack_free (&obs, NULL);
1724 #endif /*DEBUG_LEAKS*/
1725 }
1726