1 /* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 /* compile.c: translate sed source into internal form */
20
21 #include "sed.h"
22 #include "strverscmp.h"
23 #include <stdio.h>
24 #include <ctype.h>
25
26 #ifdef HAVE_STRINGS_H
27 # include <strings.h>
28 # ifdef HAVE_MEMORY_H
29 # include <memory.h>
30 # endif
31 #else
32 # include <string.h>
33 #endif /* HAVE_STRINGS_H */
34
35 #ifdef HAVE_STDLIB_H
36 # include <stdlib.h>
37 #endif
38 #ifndef EXIT_FAILURE
39 # define EXIT_FAILURE 1
40 #endif
41
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
44 #endif
45
46 #include <obstack.h>
47
48
49 #define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
50 #define VECTOR_ALLOC_INCREMENT 40
51
52 /* let's not confuse text editors that have only dumb bracket-matching... */
53 #define OPEN_BRACKET '['
54 #define CLOSE_BRACKET ']'
55 #define OPEN_BRACE '{'
56 #define CLOSE_BRACE '}'
57
58 struct prog_info {
59 /* When we're reading a script command from a string, `prog.base'
60 points to the first character in the string, 'prog.cur' points
61 to the current character in the string, and 'prog.end' points
62 to the end of the string. This allows us to compile script
63 strings that contain nulls. */
64 const unsigned char *base;
65 const unsigned char *cur;
66 const unsigned char *end;
67
68 /* This is the current script file. If it is NULL, we are reading
69 from a string stored at `prog.cur' instead. If both `prog.file'
70 and `prog.cur' are NULL, we're in trouble! */
71 FILE *file;
72 };
73
74 /* Information used to give out useful and informative error messages. */
75 struct error_info {
76 /* This is the name of the current script file. */
77 const char *name;
78
79 /* This is the number of the current script line that we're compiling. */
80 countT line;
81
82 /* This is the index of the "-e" expressions on the command line. */
83 countT string_expr_count;
84 };
85
86
87 /* Label structure used to resolve GOTO's, labels, and block beginnings. */
88 struct sed_label {
89 countT v_index; /* index of vector element being referenced */
90 char *name; /* NUL-terminated name of the label */
91 struct error_info err_info; /* track where `{}' blocks start */
92 struct sed_label *next; /* linked list (stack) */
93 };
94
95 struct special_files {
96 struct output outf;
97 FILE **pfp;
98 };
99
100 FILE *my_stdin, *my_stdout, *my_stderr;
101 struct special_files special_files[] = {
102 { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
103 { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
104 { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
105 { { NULL, false, NULL, NULL }, NULL }
106 };
107
108
109 /* Where we are in the processing of the input. */
110 static struct prog_info prog;
111 static struct error_info cur_input;
112
113 /* Information about labels and jumps-to-labels. This is used to do
114 the required backpatching after we have compiled all the scripts. */
115 static struct sed_label *jumps = NULL;
116 static struct sed_label *labels = NULL;
117
118 /* We wish to detect #n magic only in the first input argument;
119 this flag tracks when we have consumed the first file of input. */
120 static bool first_script = true;
121
122 /* Allow for scripts like "sed -e 'i\' -e foo": */
123 static struct buffer *pending_text = NULL;
124 static struct text_buf *old_text_buf = NULL;
125
126 /* Information about block start positions. This is used to backpatch
127 block end positions. */
128 static struct sed_label *blocks = NULL;
129
130 /* Use an obstack for compilation. */
131 static struct obstack obs;
132
133 /* Various error messages we may want to print */
134 static const char errors[] =
135 "multiple `!'s\0"
136 "unexpected `,'\0"
137 "invalid usage of +N or ~N as first address\0"
138 "unmatched `{'\0"
139 "unexpected `}'\0"
140 "extra characters after command\0"
141 "expected \\ after `a', `c' or `i'\0"
142 "`}' doesn't want any addresses\0"
143 ": doesn't want any addresses\0"
144 "comments don't accept any addresses\0"
145 "missing command\0"
146 "command only uses one address\0"
147 "unterminated address regex\0"
148 "unterminated `s' command\0"
149 "unterminated `y' command\0"
150 "unknown option to `s'\0"
151 "multiple `p' options to `s' command\0"
152 "multiple `g' options to `s' command\0"
153 "multiple number options to `s' command\0"
154 "number option to `s' command may not be zero\0"
155 "strings for `y' command are different lengths\0"
156 "delimiter character is not a single-byte character\0"
157 "expected newer version of sed\0"
158 "invalid usage of line address 0\0"
159 "unknown command: `%c'";
160
161 #define BAD_BANG (errors)
162 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
163 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
164 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
165 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
166 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
167 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
168 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
169 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
170 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
171 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
172 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
173 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
174 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
175 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
176 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
177 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
178 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
179 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
180 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
181 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
182 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
183 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
184 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
185 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
186 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
187
188 static struct output *file_read = NULL;
189 static struct output *file_write = NULL;
190
191
192 /* Complain about an unknown command and exit. */
193 void
bad_command(ch)194 bad_command(ch)
195 char ch;
196 {
197 const char *msg = _(UNKNOWN_CMD);
198 char *unknown_cmd = xmalloc(strlen(msg));
199 sprintf(unknown_cmd, msg, ch);
200 bad_prog(unknown_cmd);
201 }
202
203 /* Complain about a programming error and exit. */
204 void
bad_prog(why)205 bad_prog(why)
206 const char *why;
207 {
208 if (cur_input.name)
209 fprintf(stderr, _("%s: file %s line %lu: %s\n"),
210 myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
211 else
212 fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
213 myname,
214 CAST(unsigned long)cur_input.string_expr_count,
215 CAST(unsigned long)(prog.cur-prog.base),
216 why);
217 exit(EXIT_FAILURE);
218 }
219
220
221 /* Read the next character from the program. Return EOF if there isn't
222 anything to read. Keep cur_input.line up to date, so error messages
223 can be meaningful. */
224 static int inchar P_((void));
225 static int
inchar()226 inchar()
227 {
228 int ch = EOF;
229
230 if (prog.cur)
231 {
232 if (prog.cur < prog.end)
233 ch = *prog.cur++;
234 }
235 else if (prog.file)
236 {
237 if (!feof(prog.file))
238 ch = getc(prog.file);
239 }
240 if (ch == '\n')
241 ++cur_input.line;
242 return ch;
243 }
244
245 /* unget `ch' so the next call to inchar will return it. */
246 static void savchar P_((int ch));
247 static void
savchar(ch)248 savchar(ch)
249 int ch;
250 {
251 if (ch == EOF)
252 return;
253 if (ch == '\n' && cur_input.line > 0)
254 --cur_input.line;
255 if (prog.cur)
256 {
257 if (prog.cur <= prog.base || *--prog.cur != ch)
258 panic("Called savchar() with unexpected pushback (%x)",
259 CAST(unsigned char)ch);
260 }
261 else
262 ungetc(ch, prog.file);
263 }
264
265 /* Read the next non-blank character from the program. */
266 static int in_nonblank P_((void));
267 static int
in_nonblank()268 in_nonblank()
269 {
270 int ch;
271 do
272 ch = inchar();
273 while (ISBLANK(ch));
274 return ch;
275 }
276
277 /* Read an integer value from the program. */
278 static countT in_integer P_((int ch));
279 static countT
in_integer(ch)280 in_integer(ch)
281 int ch;
282 {
283 countT num = 0;
284
285 while (ISDIGIT(ch))
286 {
287 num = num * 10 + ch - '0';
288 ch = inchar();
289 }
290 savchar(ch);
291 return num;
292 }
293
294 static int add_then_next P_((struct buffer *b, int ch));
295 static int
add_then_next(b,ch)296 add_then_next(b, ch)
297 struct buffer *b;
298 int ch;
299 {
300 add1_buffer(b, ch);
301 return inchar();
302 }
303
304 static char * convert_number P_((char *, char *, const char *, int, int, int));
305 static char *
convert_number(result,buf,bufend,base,maxdigits,default_char)306 convert_number(result, buf, bufend, base, maxdigits, default_char)
307 char *result;
308 char *buf;
309 const char *bufend;
310 int base;
311 int maxdigits;
312 int default_char;
313 {
314 int n = 0;
315 char *p;
316
317 for (p=buf; p < bufend && maxdigits-- > 0; ++p)
318 {
319 int d = -1;
320 switch (*p)
321 {
322 case '0': d = 0x0; break;
323 case '1': d = 0x1; break;
324 case '2': d = 0x2; break;
325 case '3': d = 0x3; break;
326 case '4': d = 0x4; break;
327 case '5': d = 0x5; break;
328 case '6': d = 0x6; break;
329 case '7': d = 0x7; break;
330 case '8': d = 0x8; break;
331 case '9': d = 0x9; break;
332 case 'A': case 'a': d = 0xa; break;
333 case 'B': case 'b': d = 0xb; break;
334 case 'C': case 'c': d = 0xc; break;
335 case 'D': case 'd': d = 0xd; break;
336 case 'E': case 'e': d = 0xe; break;
337 case 'F': case 'f': d = 0xf; break;
338 }
339 if (d < 0 || base <= d)
340 break;
341 n = n * base + d;
342 }
343 if (p == buf)
344 *result = default_char;
345 else
346 *result = n;
347 return p;
348 }
349
350
351 /* Read in a filename for a `r', `w', or `s///w' command. */
352 static struct buffer *read_filename P_((void));
353 static struct buffer *
read_filename()354 read_filename()
355 {
356 struct buffer *b;
357 int ch;
358
359 b = init_buffer();
360 ch = in_nonblank();
361 while (ch != EOF && ch != '\n')
362 {
363 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
364 if (posixicity == POSIXLY_EXTENDED)
365 if (ch == ';' || ch == '#')
366 {
367 savchar(ch);
368 break;
369 }
370 #endif
371 ch = add_then_next(b, ch);
372 }
373 add1_buffer(b, '\0');
374 return b;
375 }
376
377 static struct output *get_openfile P_((struct output **file_ptrs, char *mode, bool fail));
378 static struct output *
get_openfile(file_ptrs,mode,fail)379 get_openfile(file_ptrs, mode, fail)
380 struct output **file_ptrs;
381 char *mode;
382 bool fail;
383 {
384 struct buffer *b;
385 char *file_name;
386 struct output *p;
387 int is_stderr;
388
389 b = read_filename();
390 file_name = get_buffer(b);
391 for (p=*file_ptrs; p; p=p->link)
392 if (strcmp(p->name, file_name) == 0)
393 break;
394
395 if (posixicity == POSIXLY_EXTENDED)
396 {
397 /* Check whether it is a special file (stdin, stdout or stderr) */
398 struct special_files *special = special_files;
399
400 /* std* sometimes are not constants, so they
401 cannot be used in the initializer for special_files */
402 #ifndef CONFIG_WITHOUT_O_OPT
403 my_stdin = stdin; my_stdout = sed_stdout; my_stderr = stderr;
404 #else
405 my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
406 #endif
407 for (special = special_files; special->outf.name; special++)
408 if (strcmp(special->outf.name, file_name) == 0)
409 {
410 special->outf.fp = *special->pfp;
411 free_buffer (b);
412 return &special->outf;
413 }
414 }
415
416 if (!p)
417 {
418 p = OB_MALLOC(&obs, 1, struct output);
419 p->name = ck_strdup(file_name);
420 p->fp = ck_fopen(p->name, mode, fail);
421 p->missing_newline = false;
422 p->link = *file_ptrs;
423 *file_ptrs = p;
424 }
425 free_buffer(b);
426 return p;
427 }
428
429
430 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
431 static struct sed_cmd *
next_cmd_entry(vectorp)432 next_cmd_entry(vectorp)
433 struct vector **vectorp;
434 {
435 struct sed_cmd *cmd;
436 struct vector *v;
437
438 v = *vectorp;
439 if (v->v_length == v->v_allocated)
440 {
441 v->v_allocated += VECTOR_ALLOC_INCREMENT;
442 v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
443 }
444
445 cmd = v->v + v->v_length;
446 cmd->a1 = NULL;
447 cmd->a2 = NULL;
448 cmd->range_state = RANGE_INACTIVE;
449 cmd->addr_bang = false;
450 cmd->cmd = '\0'; /* something invalid, to catch bugs early */
451
452 *vectorp = v;
453 return cmd;
454 }
455
456 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
457 static int
snarf_char_class(b,cur_stat)458 snarf_char_class(b, cur_stat)
459 struct buffer *b;
460 mbstate_t *cur_stat;
461 {
462 int ch;
463 int state = 0;
464 int delim;
465 bool pending_mb = 0;
466
467 ch = inchar();
468 if (ch == '^')
469 ch = add_then_next(b, ch);
470 if (ch == CLOSE_BRACKET)
471 ch = add_then_next(b, ch);
472
473 /* States are:
474 0 outside a collation element, character class or collation class
475 1 after the bracket
476 2 after the opening ./:/=
477 3 after the closing ./:/= */
478
479 for (;; ch = add_then_next (b, ch))
480 {
481 pending_mb = BRLEN (ch, cur_stat) != 1;
482
483 switch (ch)
484 {
485 case EOF:
486 case '\n':
487 return ch;
488
489 case '.':
490 case ':':
491 case '=':
492 if (pending_mb)
493 continue;
494
495 if (state == 1)
496 {
497 delim = ch;
498 state++;
499 }
500 else if (ch == delim && state == 2)
501 state++;
502 else
503 break;
504
505 continue;
506
507 case OPEN_BRACKET:
508 if (pending_mb)
509 continue;
510
511 state++;
512 continue;
513
514 case CLOSE_BRACKET:
515 if (pending_mb)
516 continue;
517
518 if (state == 0 || state == 1)
519 return ch;
520 else if (state == 3)
521 state = 0;
522
523 break;
524
525 default:
526 break;
527 }
528
529 /* Getting a character different from .=: whilst in state 1
530 goes back to state 0, getting a character different from ]
531 whilst in state 3 goes back to state 2. */
532 state &= ~1;
533 }
534 }
535
536 static struct buffer *match_slash P_((int slash, bool regex));
537 static struct buffer *
match_slash(slash,regex)538 match_slash(slash, regex)
539 int slash;
540 bool regex;
541 {
542 struct buffer *b;
543 int ch;
544 bool pending_mb = false;
545 mbstate_t cur_stat;
546
547 memset (&cur_stat, 0, sizeof (mbstate_t));
548
549 if (BRLEN (slash, &cur_stat) == -2)
550 if (BRLEN (slash, &cur_stat) == -2)
551 bad_prog (BAD_DELIM);
552
553 memset (&cur_stat, 0, sizeof (mbstate_t));
554
555 b = init_buffer();
556 while ((ch = inchar()) != EOF && ch != '\n')
557 {
558 pending_mb = BRLEN (ch, &cur_stat) != 1;
559 pending_mb = BRLEN (ch, &cur_stat) != 1;
560
561 if (!pending_mb)
562 {
563 if (ch == slash)
564 return b;
565 else if (ch == '\\')
566 {
567 ch = inchar();
568 if (ch == EOF)
569 break;
570 #ifndef REG_PERL
571 else if (ch == 'n' && regex)
572 ch = '\n';
573 #endif
574 else if (ch != '\n' && ch != slash)
575 add1_buffer(b, '\\');
576 }
577 else if (ch == OPEN_BRACKET && regex)
578 {
579 add1_buffer(b, ch);
580 ch = snarf_char_class(b, &cur_stat);
581 if (ch != CLOSE_BRACKET)
582 break;
583 }
584 }
585
586 add1_buffer(b, ch);
587 }
588
589 if (ch == '\n')
590 savchar(ch); /* for proper line number in error report */
591 free_buffer(b);
592 return NULL;
593 }
594
595 static int mark_subst_opts P_((struct subst *cmd));
596 static int
mark_subst_opts(cmd)597 mark_subst_opts(cmd)
598 struct subst *cmd;
599 {
600 int flags = 0;
601 int ch;
602
603 cmd->global = false;
604 cmd->print = false;
605 cmd->eval = false;
606 cmd->numb = 0;
607 cmd->outf = NULL;
608
609 for (;;)
610 switch ( (ch = in_nonblank()) )
611 {
612 case 'i': /* GNU extension */
613 case 'I': /* GNU extension */
614 flags |= REG_ICASE;
615 break;
616
617 #ifdef REG_PERL
618 case 's': /* GNU extension */
619 case 'S': /* GNU extension */
620 if (extended_regexp_flags & REG_PERL)
621 flags |= REG_DOTALL;
622 break;
623
624 case 'x': /* GNU extension */
625 case 'X': /* GNU extension */
626 if (extended_regexp_flags & REG_PERL)
627 flags |= REG_EXTENDED;
628 break;
629 #endif
630
631 case 'm': /* GNU extension */
632 case 'M': /* GNU extension */
633 flags |= REG_NEWLINE;
634 break;
635
636 case 'e':
637 cmd->eval = true;
638 break;
639
640 case 'p':
641 if (cmd->print)
642 bad_prog(_(EXCESS_P_OPT));
643 cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
644 break;
645
646 case 'g':
647 if (cmd->global)
648 bad_prog(_(EXCESS_G_OPT));
649 cmd->global = true;
650 break;
651
652 case 'w':
653 cmd->outf = get_openfile(&file_write, "w", true);
654 return flags;
655
656 case '0': case '1': case '2': case '3': case '4':
657 case '5': case '6': case '7': case '8': case '9':
658 if (cmd->numb)
659 bad_prog(_(EXCESS_N_OPT));
660 cmd->numb = in_integer(ch);
661 if (!cmd->numb)
662 bad_prog(_(ZERO_N_OPT));
663 break;
664
665 case CLOSE_BRACE:
666 case '#':
667 savchar(ch);
668 /* Fall Through */
669 case EOF:
670 case '\n':
671 case ';':
672 return flags;
673
674 case '\r':
675 if (inchar() == '\n')
676 return flags;
677 /* FALLTHROUGH */
678
679 default:
680 bad_prog(_(UNKNOWN_S_OPT));
681 /*NOTREACHED*/
682 }
683 }
684
685
686 /* read in a label for a `:', `b', or `t' command */
687 static char *read_label P_((void));
688 static char *
read_label()689 read_label()
690 {
691 struct buffer *b;
692 int ch;
693 char *ret;
694
695 b = init_buffer();
696 ch = in_nonblank();
697
698 while (ch != EOF && ch != '\n'
699 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
700 ch = add_then_next (b, ch);
701
702 savchar(ch);
703 add1_buffer(b, '\0');
704 ret = ck_strdup(get_buffer(b));
705 free_buffer(b);
706 return ret;
707 }
708
709 /* Store a label (or label reference) created by a `:', `b', or `t'
710 command so that the jump to/from the label can be backpatched after
711 compilation is complete, or a reference created by a `{' to be
712 backpatched when the corresponding `}' is found. */
713 static struct sed_label *setup_label
714 P_((struct sed_label *, countT, char *, const struct error_info *));
715 static struct sed_label *
setup_label(list,idx,name,err_info)716 setup_label(list, idx, name, err_info)
717 struct sed_label *list;
718 countT idx;
719 char *name;
720 const struct error_info *err_info;
721 {
722 struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
723 ret->v_index = idx;
724 ret->name = name;
725 if (err_info)
726 MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
727 ret->next = list;
728 return ret;
729 }
730
731 static struct sed_label *release_label P_((struct sed_label *list_head));
732 static struct sed_label *
release_label(list_head)733 release_label(list_head)
734 struct sed_label *list_head;
735 {
736 struct sed_label *ret;
737
738 if (!list_head)
739 return NULL;
740 ret = list_head->next;
741
742 FREE(list_head->name);
743
744 #if 0
745 /* We use obstacks */
746 FREE(list_head);
747 #endif
748 return ret;
749 }
750
751 static struct replacement *new_replacement P_((char *, size_t,
752 enum replacement_types));
753 static struct replacement *
new_replacement(text,length,type)754 new_replacement(text, length, type)
755 char *text;
756 size_t length;
757 enum replacement_types type;
758 {
759 struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
760
761 r->prefix = text;
762 r->prefix_length = length;
763 r->subst_id = -1;
764 r->repl_type = type;
765
766 /* r-> next = NULL; */
767 return r;
768 }
769
770 static void setup_replacement P_((struct subst *, const char *, size_t));
771 static void
setup_replacement(sub,text,length)772 setup_replacement(sub, text, length)
773 struct subst *sub;
774 const char *text;
775 size_t length;
776 {
777 char *base;
778 char *p;
779 char *text_end;
780 enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
781 struct replacement root;
782 struct replacement *tail;
783
784 sub->max_id = 0;
785 base = MEMDUP(text, length, char);
786 length = normalize_text(base, length, TEXT_REPLACEMENT);
787
788 text_end = base + length;
789 tail = &root;
790
791 for (p=base; p<text_end; ++p)
792 {
793 if (*p == '\\')
794 {
795 /* Preceding the backslash may be some literal text: */
796 tail = tail->next =
797 new_replacement(base, CAST(size_t)(p - base), repl_type);
798
799 repl_type = save_type;
800
801 /* Skip the backslash and look for a numeric back-reference,
802 or a case-munging escape if not in POSIX mode: */
803 ++p;
804 if (p < text_end && (posixicity != POSIXLY_BASIC || ISDIGIT (*p)))
805 switch (*p)
806 {
807 case '0': case '1': case '2': case '3': case '4':
808 case '5': case '6': case '7': case '8': case '9':
809 tail->subst_id = *p - '0';
810 if (sub->max_id < tail->subst_id)
811 sub->max_id = tail->subst_id;
812 break;
813
814 case 'L':
815 repl_type = REPL_LOWERCASE;
816 save_type = REPL_LOWERCASE;
817 break;
818
819 case 'U':
820 repl_type = REPL_UPPERCASE;
821 save_type = REPL_UPPERCASE;
822 break;
823
824 case 'E':
825 repl_type = REPL_ASIS;
826 save_type = REPL_ASIS;
827 break;
828
829 case 'l':
830 save_type = repl_type;
831 repl_type |= REPL_LOWERCASE_FIRST;
832 break;
833
834 case 'u':
835 save_type = repl_type;
836 repl_type |= REPL_UPPERCASE_FIRST;
837 break;
838
839 default:
840 p[-1] = *p;
841 ++tail->prefix_length;
842 }
843
844 base = p + 1;
845 }
846 else if (*p == '&')
847 {
848 /* Preceding the ampersand may be some literal text: */
849 tail = tail->next =
850 new_replacement(base, CAST(size_t)(p - base), repl_type);
851
852 repl_type = save_type;
853 tail->subst_id = 0;
854 base = p + 1;
855 }
856 }
857 /* There may be some trailing literal text: */
858 if (base < text_end)
859 tail = tail->next =
860 new_replacement(base, CAST(size_t)(text_end - base), repl_type);
861
862 tail->next = NULL;
863 sub->replacement = root.next;
864 }
865
866 static void read_text P_((struct text_buf *buf, int leadin_ch));
867 static void
read_text(buf,leadin_ch)868 read_text(buf, leadin_ch)
869 struct text_buf *buf;
870 int leadin_ch;
871 {
872 int ch;
873
874 /* Should we start afresh (as opposed to continue a partial text)? */
875 if (buf)
876 {
877 if (pending_text)
878 free_buffer(pending_text);
879 pending_text = init_buffer();
880 buf->text = NULL;
881 buf->text_length = 0;
882 old_text_buf = buf;
883 }
884 /* assert(old_text_buf != NULL); */
885
886 if (leadin_ch == EOF)
887 return;
888
889 if (leadin_ch != '\n')
890 add1_buffer(pending_text, leadin_ch);
891
892 ch = inchar();
893 while (ch != EOF && ch != '\n')
894 {
895 if (ch == '\\')
896 {
897 ch = inchar();
898 if (ch != EOF)
899 add1_buffer (pending_text, '\\');
900 }
901
902 if (ch == EOF)
903 {
904 add1_buffer (pending_text, '\n');
905 return;
906 }
907
908 ch = add_then_next (pending_text, ch);
909 }
910
911 add1_buffer(pending_text, '\n');
912 if (!buf)
913 buf = old_text_buf;
914 buf->text_length = normalize_text (get_buffer (pending_text),
915 size_buffer (pending_text), TEXT_BUFFER);
916 buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
917 free_buffer(pending_text);
918 pending_text = NULL;
919 }
920
921
922 /* Try to read an address for a sed command. If it succeeds,
923 return non-zero and store the resulting address in `*addr'.
924 If the input doesn't look like an address read nothing
925 and return zero. */
926 static bool compile_address P_((struct addr *addr, int ch));
927 static bool
compile_address(addr,ch)928 compile_address(addr, ch)
929 struct addr *addr;
930 int ch;
931 {
932 addr->addr_type = ADDR_IS_NULL;
933 addr->addr_step = 0;
934 addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
935 addr->addr_regex = NULL;
936
937 if (ch == '/' || ch == '\\')
938 {
939 int flags = 0;
940 struct buffer *b;
941 addr->addr_type = ADDR_IS_REGEX;
942 if (ch == '\\')
943 ch = inchar();
944 if ( !(b = match_slash(ch, true)) )
945 bad_prog(_(UNTERM_ADDR_RE));
946
947 for(;;)
948 {
949 ch = in_nonblank();
950 switch(ch)
951 {
952 case 'I': /* GNU extension */
953 flags |= REG_ICASE;
954 break;
955
956 #ifdef REG_PERL
957 case 'S': /* GNU extension */
958 if (extended_regexp_flags & REG_PERL)
959 flags |= REG_DOTALL;
960 break;
961
962 case 'X': /* GNU extension */
963 if (extended_regexp_flags & REG_PERL)
964 flags |= REG_EXTENDED;
965 break;
966 #endif
967
968 case 'M': /* GNU extension */
969 flags |= REG_NEWLINE;
970 break;
971
972 default:
973 savchar (ch);
974 addr->addr_regex = compile_regex (b, flags, 0);
975 free_buffer(b);
976 return true;
977 }
978 }
979 }
980 else if (ISDIGIT(ch))
981 {
982 addr->addr_number = in_integer(ch);
983 addr->addr_type = ADDR_IS_NUM;
984 ch = in_nonblank();
985 if (ch != '~')
986 {
987 savchar(ch);
988 }
989 else
990 {
991 countT step = in_integer(in_nonblank());
992 if (step > 0)
993 {
994 addr->addr_step = step;
995 addr->addr_type = ADDR_IS_NUM_MOD;
996 }
997 }
998 }
999 else if (ch == '+' || ch == '~')
1000 {
1001 addr->addr_step = in_integer(in_nonblank());
1002 if (addr->addr_step==0)
1003 ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1004 else if (ch == '+')
1005 addr->addr_type = ADDR_IS_STEP;
1006 else
1007 addr->addr_type = ADDR_IS_STEP_MOD;
1008 }
1009 else if (ch == '$')
1010 {
1011 addr->addr_type = ADDR_IS_LAST;
1012 }
1013 else
1014 return false;
1015
1016 return true;
1017 }
1018
1019 /* Read a program (or a subprogram within `{' `}' pairs) in and store
1020 the compiled form in `*vector'. Return a pointer to the new vector. */
1021 static struct vector *compile_program P_((struct vector *));
1022 static struct vector *
compile_program(vector)1023 compile_program(vector)
1024 struct vector *vector;
1025 {
1026 struct sed_cmd *cur_cmd;
1027 struct buffer *b;
1028 int ch;
1029
1030 if (!vector)
1031 {
1032 vector = MALLOC(1, struct vector);
1033 vector->v = NULL;
1034 vector->v_allocated = 0;
1035 vector->v_length = 0;
1036
1037 obstack_init (&obs);
1038 }
1039 if (pending_text)
1040 read_text(NULL, '\n');
1041
1042 for (;;)
1043 {
1044 struct addr a;
1045
1046 while ((ch=inchar()) == ';' || ISSPACE(ch))
1047 ;
1048 if (ch == EOF)
1049 break;
1050
1051 cur_cmd = next_cmd_entry(&vector);
1052 if (compile_address(&a, ch))
1053 {
1054 if (a.addr_type == ADDR_IS_STEP
1055 || a.addr_type == ADDR_IS_STEP_MOD)
1056 bad_prog(_(BAD_STEP));
1057
1058 cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1059 ch = in_nonblank();
1060 if (ch == ',')
1061 {
1062 if (!compile_address(&a, in_nonblank()))
1063 bad_prog(_(BAD_COMMA));
1064
1065 cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1066 ch = in_nonblank();
1067 }
1068
1069 if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1070 && cur_cmd->a1->addr_number == 0
1071 && (!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX))
1072 bad_prog(_(INVALID_LINE_0));
1073 }
1074 if (ch == '!')
1075 {
1076 cur_cmd->addr_bang = true;
1077 ch = in_nonblank();
1078 if (ch == '!')
1079 bad_prog(_(BAD_BANG));
1080 }
1081
1082 /* Do not accept extended commands in --posix mode. Also,
1083 a few commands only accept one address in that mode. */
1084 if (posixicity == POSIXLY_BASIC)
1085 switch (ch)
1086 {
1087 case 'v': case 'L': case 'Q': case 'T':
1088 case 'R': case 'W':
1089 bad_command(ch);
1090
1091 case 'a': case 'i': case 'l':
1092 case '=': case 'r':
1093 if (cur_cmd->a2)
1094 bad_prog(_(ONE_ADDR));
1095 }
1096
1097 cur_cmd->cmd = ch;
1098 switch (ch)
1099 {
1100 case '#':
1101 if (cur_cmd->a1)
1102 bad_prog(_(NO_SHARP_ADDR));
1103 ch = inchar();
1104 if (ch=='n' && first_script && cur_input.line < 2)
1105 if ( (prog.base && prog.cur==2+prog.base)
1106 || (prog.file && !prog.base && 2==ftell(prog.file)))
1107 no_default_output = true;
1108 while (ch != EOF && ch != '\n')
1109 ch = inchar();
1110 continue; /* restart the for (;;) loop */
1111
1112 case 'v':
1113 /* This is an extension. Programs needing GNU sed might start
1114 * with a `v' command so that other seds will stop.
1115 * We compare the version and ignore POSIXLY_CORRECT.
1116 */
1117 {
1118 char *version = read_label ();
1119 char *compared_version;
1120 compared_version = (*version == '\0') ? "4.0" : version;
1121 if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1122 bad_prog(_(ANCIENT_VERSION));
1123
1124 free (version);
1125 posixicity = POSIXLY_EXTENDED;
1126 }
1127 continue;
1128
1129 case '{':
1130 blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1131 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1132 break;
1133
1134 case '}':
1135 if (!blocks)
1136 bad_prog(_(EXCESS_CLOSE_BRACE));
1137 if (cur_cmd->a1)
1138 bad_prog(_(NO_CLOSE_BRACE_ADDR));
1139 ch = in_nonblank();
1140 if (ch == CLOSE_BRACE || ch == '#')
1141 savchar(ch);
1142 else if (ch != EOF && ch != '\n' && ch != ';')
1143 bad_prog(_(EXCESS_JUNK));
1144
1145 vector->v[blocks->v_index].x.jump_index = vector->v_length;
1146 blocks = release_label(blocks); /* done with this entry */
1147 break;
1148
1149 case 'e':
1150 ch = in_nonblank();
1151 if (ch == EOF || ch == '\n')
1152 {
1153 cur_cmd->x.cmd_txt.text_length = 0;
1154 break;
1155 }
1156 else
1157 goto read_text_to_slash;
1158
1159 case 'a':
1160 case 'i':
1161 case 'c':
1162 ch = in_nonblank();
1163
1164 read_text_to_slash:
1165 if (ch == EOF)
1166 bad_prog(_(EXPECTED_SLASH));
1167
1168 if (ch == '\\')
1169 ch = inchar();
1170 else
1171 {
1172 savchar(ch);
1173 ch = '\n';
1174 }
1175
1176 read_text(&cur_cmd->x.cmd_txt, ch);
1177 break;
1178
1179 case ':':
1180 if (cur_cmd->a1)
1181 bad_prog(_(NO_COLON_ADDR));
1182 labels = setup_label(labels, vector->v_length, read_label(), NULL);
1183 break;
1184
1185 case 'T':
1186 case 'b':
1187 case 't':
1188 jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1189 break;
1190
1191 case 'Q':
1192 case 'q':
1193 if (cur_cmd->a2)
1194 bad_prog(_(ONE_ADDR));
1195 /* Fall through */
1196
1197 case 'L':
1198 case 'l':
1199 ch = in_nonblank();
1200 if (ISDIGIT(ch))
1201 {
1202 cur_cmd->x.int_arg = in_integer(ch);
1203 ch = in_nonblank();
1204 }
1205 else
1206 cur_cmd->x.int_arg = -1;
1207
1208 if (ch == CLOSE_BRACE || ch == '#')
1209 savchar(ch);
1210 else if (ch != EOF && ch != '\n' && ch != ';')
1211 bad_prog(_(EXCESS_JUNK));
1212
1213 break;
1214
1215 case '=':
1216 case 'd':
1217 case 'D':
1218 case 'g':
1219 case 'G':
1220 case 'h':
1221 case 'H':
1222 case 'n':
1223 case 'N':
1224 case 'p':
1225 case 'P':
1226 case 'x':
1227 ch = in_nonblank();
1228 if (ch == CLOSE_BRACE || ch == '#')
1229 savchar(ch);
1230 else if (ch != EOF && ch != '\n' && ch != ';')
1231 bad_prog(_(EXCESS_JUNK));
1232 break;
1233
1234 case 'r':
1235 b = read_filename();
1236 cur_cmd->x.fname = ck_strdup(get_buffer(b));
1237 free_buffer(b);
1238 break;
1239
1240 case 'R':
1241 cur_cmd->x.fp = get_openfile(&file_read, "r", false)->fp;
1242 break;
1243
1244 case 'W':
1245 case 'w':
1246 cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1247 break;
1248
1249 case 's':
1250 {
1251 struct buffer *b2;
1252 int flags;
1253 int slash;
1254
1255 slash = inchar();
1256 if ( !(b = match_slash(slash, true)) )
1257 bad_prog(_(UNTERM_S_CMD));
1258 if ( !(b2 = match_slash(slash, false)) )
1259 bad_prog(_(UNTERM_S_CMD));
1260
1261 cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1262 setup_replacement(cur_cmd->x.cmd_subst,
1263 get_buffer(b2), size_buffer(b2));
1264 free_buffer(b2);
1265
1266 flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1267 cur_cmd->x.cmd_subst->regx =
1268 compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1269 free_buffer(b);
1270 }
1271 break;
1272
1273 case 'y':
1274 {
1275 size_t len, dest_len;
1276 int slash;
1277 struct buffer *b2;
1278 char *src_buf, *dest_buf;
1279
1280 slash = inchar();
1281 if ( !(b = match_slash(slash, false)) )
1282 bad_prog(_(UNTERM_Y_CMD));
1283 src_buf = get_buffer(b);
1284 len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1285
1286 if ( !(b2 = match_slash(slash, false)) )
1287 bad_prog(_(UNTERM_Y_CMD));
1288 dest_buf = get_buffer(b2);
1289 dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1290
1291 if (mb_cur_max > 1)
1292 {
1293 int i, j, idx, src_char_num;
1294 size_t *src_lens = MALLOC(len, size_t);
1295 char **trans_pairs;
1296 size_t mbclen;
1297 mbstate_t cur_stat;
1298
1299 /* Enumerate how many character the source buffer has. */
1300 memset(&cur_stat, 0, sizeof(mbstate_t));
1301 for (i = 0, j = 0; i < len;)
1302 {
1303 mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1304 /* An invalid sequence, or a truncated multibyte character.
1305 We treat it as a singlebyte character. */
1306 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1307 || mbclen == 0)
1308 mbclen = 1;
1309 src_lens[j++] = mbclen;
1310 i += mbclen;
1311 }
1312 src_char_num = j;
1313
1314 memset(&cur_stat, 0, sizeof(mbstate_t));
1315 idx = 0;
1316
1317 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1318 src(i) : pointer to i-th source character.
1319 dest(i) : pointer to i-th destination character.
1320 NULL : terminator */
1321 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1322 cur_cmd->x.translatemb = trans_pairs;
1323 for (i = 0; i < src_char_num; i++)
1324 {
1325 if (idx >= dest_len)
1326 bad_prog(_(Y_CMD_LEN));
1327
1328 /* Set the i-th source character. */
1329 trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1330 strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1331 trans_pairs[2 * i][src_lens[i]] = '\0';
1332 src_buf += src_lens[i]; /* Forward to next character. */
1333
1334 /* Fetch the i-th destination character. */
1335 mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1336 /* An invalid sequence, or a truncated multibyte character.
1337 We treat it as a singlebyte character. */
1338 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1339 || mbclen == 0)
1340 mbclen = 1;
1341
1342 /* Set the i-th destination character. */
1343 trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1344 strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1345 trans_pairs[2 * i + 1][mbclen] = '\0';
1346 idx += mbclen; /* Forward to next character. */
1347 }
1348 trans_pairs[2 * i] = NULL;
1349 if (idx != dest_len)
1350 bad_prog(_(Y_CMD_LEN));
1351 }
1352 else
1353 {
1354 char *translate = OB_MALLOC(&obs, YMAP_LENGTH, char);
1355 unsigned char *ustring = CAST(unsigned char *)src_buf;
1356
1357 if (len != dest_len)
1358 bad_prog(_(Y_CMD_LEN));
1359
1360 for (len = 0; len < YMAP_LENGTH; len++)
1361 translate[len] = len;
1362
1363 while (dest_len--)
1364 translate[(unsigned char)*ustring++] = *dest_buf++;
1365
1366 cur_cmd->x.translate = translate;
1367 }
1368
1369 if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1370 bad_prog(_(EXCESS_JUNK));
1371
1372 free_buffer(b);
1373 free_buffer(b2);
1374 }
1375 break;
1376
1377 case EOF:
1378 bad_prog(_(NO_COMMAND));
1379 /*NOTREACHED*/
1380
1381 default:
1382 bad_command (ch);
1383 /*NOTREACHED*/
1384 }
1385
1386 /* this is buried down here so that "continue" statements will miss it */
1387 ++vector->v_length;
1388 }
1389 return vector;
1390 }
1391
1392
1393 /* deal with \X escapes */
1394 size_t
normalize_text(buf,len,buftype)1395 normalize_text(buf, len, buftype)
1396 char *buf;
1397 size_t len;
1398 enum text_types buftype;
1399 {
1400 const char *bufend = buf + len;
1401 char *p = buf;
1402 char *q = buf;
1403
1404 /* This variable prevents normalizing text within bracket
1405 subexpressions when conforming to POSIX. If 0, we
1406 are not within a bracket expression. If -1, we are within a
1407 bracket expression but are not within [.FOO.], [=FOO=],
1408 or [:FOO:]. Otherwise, this is the '.', '=', or ':'
1409 respectively within these three types of subexpressions. */
1410 int bracket_state = 0;
1411
1412 int mbclen;
1413 mbstate_t cur_stat;
1414 memset(&cur_stat, 0, sizeof(mbstate_t));
1415
1416 while (p < bufend)
1417 {
1418 int c;
1419 mbclen = MBRLEN (p, bufend - p, &cur_stat);
1420 if (mbclen != 1)
1421 {
1422 /* An invalid sequence, or a truncated multibyte character.
1423 We treat it as a singlebyte character. */
1424 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1425 mbclen = 1;
1426
1427 memmove (q, p, mbclen);
1428 q += mbclen;
1429 p += mbclen;
1430 continue;
1431 }
1432
1433 if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1434 switch ( (c = *++p) )
1435 {
1436 #if defined __STDC__ && __STDC__-0
1437 case 'a': *q++ = '\a'; p++; continue;
1438 #else /* Not STDC; we'll just assume ASCII */
1439 case 'a': *q++ = '\007'; p++; continue;
1440 #endif
1441 /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1442 case 'f': *q++ = '\f'; p++; continue;
1443 case '\n': /*fall through */
1444 case 'n': *q++ = '\n'; p++; continue;
1445 case 'r': *q++ = '\r'; p++; continue;
1446 case 't': *q++ = '\t'; p++; continue;
1447 case 'v': *q++ = '\v'; p++; continue;
1448
1449 case 'd': /* decimal byte */
1450 p = convert_number(q, p+1, bufend, 10, 3, 'd');
1451 q++;
1452 continue;
1453
1454 case 'x': /* hexadecimal byte */
1455 p = convert_number(q, p+1, bufend, 16, 2, 'x');
1456 q++;
1457 continue;
1458
1459 #ifdef REG_PERL
1460 case '0': case '1': case '2': case '3':
1461 case '4': case '5': case '6': case '7':
1462 if ((extended_regexp_flags & REG_PERL)
1463 && p+1 < bufend
1464 && p[1] >= '0' && p[1] <= '9')
1465 {
1466 p = convert_number(q, p, bufend, 8, 3, *p);
1467 q++;
1468 }
1469 else
1470 {
1471 /* we just pass the \ up one level for interpretation */
1472 if (buftype != TEXT_BUFFER)
1473 *q++ = '\\';
1474 }
1475
1476 continue;
1477
1478 case 'o': /* octal byte */
1479 if (!(extended_regexp_flags & REG_PERL))
1480 {
1481 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1482 q++;
1483 }
1484 else
1485 {
1486 /* we just pass the \ up one level for interpretation */
1487 if (buftype != TEXT_BUFFER)
1488 *q++ = '\\';
1489 }
1490
1491 continue;
1492 #else
1493 case 'o': /* octal byte */
1494 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1495 q++;
1496 continue;
1497 #endif
1498
1499 case 'c':
1500 if (++p < bufend)
1501 {
1502 *q++ = toupper(*p) ^ 0x40;
1503 p++;
1504 continue;
1505 }
1506 else
1507 {
1508 /* we just pass the \ up one level for interpretation */
1509 if (buftype != TEXT_BUFFER)
1510 *q++ = '\\';
1511 continue;
1512 }
1513
1514 default:
1515 /* we just pass the \ up one level for interpretation */
1516 if (buftype != TEXT_BUFFER)
1517 *q++ = '\\';
1518 break;
1519 }
1520 else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1521 switch (*p)
1522 {
1523 case '[':
1524 if (!bracket_state)
1525 bracket_state = -1;
1526 break;
1527
1528 case ':':
1529 case '.':
1530 case '=':
1531 if (bracket_state == -1 && p[-1] == '[')
1532 bracket_state = *p;
1533 break;
1534
1535 case ']':
1536 if (bracket_state == 0)
1537 ;
1538 else if (bracket_state == -1)
1539 bracket_state = 0;
1540 else if (p[-2] != bracket_state && p[-1] == bracket_state)
1541 bracket_state = -1;
1542 break;
1543 }
1544
1545 *q++ = *p++;
1546 }
1547 return (size_t)(q - buf);
1548 }
1549
1550
1551 /* `str' is a string (from the command line) that contains a sed command.
1552 Compile the command, and add it to the end of `cur_program'. */
1553 struct vector *
compile_string(cur_program,str,len)1554 compile_string(cur_program, str, len)
1555 struct vector *cur_program;
1556 char *str;
1557 size_t len;
1558 {
1559 static countT string_expr_count = 0;
1560 struct vector *ret;
1561
1562 prog.file = NULL;
1563 prog.base = CAST(unsigned char *)str;
1564 prog.cur = prog.base;
1565 prog.end = prog.cur + len;
1566
1567 cur_input.line = 0;
1568 cur_input.name = NULL;
1569 cur_input.string_expr_count = ++string_expr_count;
1570
1571 ret = compile_program(cur_program);
1572 prog.base = NULL;
1573 prog.cur = NULL;
1574 prog.end = NULL;
1575
1576 first_script = false;
1577 return ret;
1578 }
1579
1580 /* `cmdfile' is the name of a file containing sed commands.
1581 Read them in and add them to the end of `cur_program'.
1582 */
1583 struct vector *
compile_file(cur_program,cmdfile)1584 compile_file(cur_program, cmdfile)
1585 struct vector *cur_program;
1586 const char *cmdfile;
1587 {
1588 size_t len;
1589 struct vector *ret;
1590
1591 prog.file = stdin;
1592 if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1593 prog.file = ck_fopen(cmdfile, "rt", true);
1594
1595 cur_input.line = 1;
1596 cur_input.name = cmdfile;
1597 cur_input.string_expr_count = 0;
1598
1599 ret = compile_program(cur_program);
1600 if (prog.file != stdin)
1601 ck_fclose(prog.file);
1602 prog.file = NULL;
1603
1604 first_script = false;
1605 return ret;
1606 }
1607
1608 /* Make any checks which require the whole program to have been read.
1609 In particular: this backpatches the jump targets.
1610 Any cleanup which can be done after these checks is done here also. */
1611 void
check_final_program(program)1612 check_final_program(program)
1613 struct vector *program;
1614 {
1615 struct sed_label *go;
1616 struct sed_label *lbl;
1617
1618 /* do all "{"s have a corresponding "}"? */
1619 if (blocks)
1620 {
1621 /* update info for error reporting: */
1622 MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1623 bad_prog(_(EXCESS_OPEN_BRACE));
1624 }
1625
1626 /* was the final command an unterminated a/c/i command? */
1627 if (pending_text)
1628 {
1629 old_text_buf->text_length = size_buffer(pending_text);
1630 old_text_buf->text = MEMDUP(get_buffer(pending_text),
1631 old_text_buf->text_length, char);
1632 free_buffer(pending_text);
1633 pending_text = NULL;
1634 }
1635
1636 for (go = jumps; go; go = release_label(go))
1637 {
1638 for (lbl = labels; lbl; lbl = lbl->next)
1639 if (strcmp(lbl->name, go->name) == 0)
1640 break;
1641 if (lbl)
1642 {
1643 program->v[go->v_index].x.jump_index = lbl->v_index;
1644 }
1645 else
1646 {
1647 if (*go->name)
1648 panic(_("can't find label for jump to `%s'"), go->name);
1649 program->v[go->v_index].x.jump_index = program->v_length;
1650 }
1651 }
1652 jumps = NULL;
1653
1654 for (lbl = labels; lbl; lbl = release_label(lbl))
1655 ;
1656 labels = NULL;
1657
1658 /* There is no longer a need to track file names: */
1659 {
1660 struct output *p;
1661
1662 for (p=file_read; p; p=p->link)
1663 if (p->name)
1664 {
1665 FREE(p->name);
1666 p->name = NULL;
1667 }
1668
1669 for (p=file_write; p; p=p->link)
1670 if (p->name)
1671 {
1672 FREE(p->name);
1673 p->name = NULL;
1674 }
1675 }
1676 }
1677
1678 /* Rewind all resources which were allocated in this module. */
1679 void
rewind_read_files()1680 rewind_read_files()
1681 {
1682 struct output *p;
1683
1684 for (p=file_read; p; p=p->link)
1685 if (p->fp)
1686 rewind(p->fp);
1687 }
1688
1689 /* Release all resources which were allocated in this module. */
1690 void
finish_program(program)1691 finish_program(program)
1692 struct vector *program;
1693 {
1694 /* close all files... */
1695 {
1696 struct output *p, *q;
1697
1698 for (p=file_read; p; p=q)
1699 {
1700 if (p->fp)
1701 ck_fclose(p->fp);
1702 q = p->link;
1703 #if 0
1704 /* We use obstacks. */
1705 FREE(p);
1706 #endif
1707 }
1708
1709 for (p=file_write; p; p=q)
1710 {
1711 if (p->fp)
1712 ck_fclose(p->fp);
1713 q = p->link;
1714 #if 0
1715 /* We use obstacks. */
1716 FREE(p);
1717 #endif
1718 }
1719 file_read = file_write = NULL;
1720 }
1721
1722 #ifdef DEBUG_LEAKS
1723 obstack_free (&obs, NULL);
1724 #endif /*DEBUG_LEAKS*/
1725 }
1726