1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3     Free Software Foundation, Inc.
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2, or (at your option)
8     any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 
19 #undef EXPERIMENTAL_DASH_N_OPTIMIZATION	/*don't use -- is very buggy*/
20 #define INITIAL_BUFFER_SIZE	50
21 #define FREAD_BUFFER_SIZE	8192
22 
23 #include "sed.h"
24 
25 #include <stdio.h>
26 #include <ctype.h>
27 
28 #include <errno.h>
29 #ifndef errno
30 extern int errno;
31 #endif
32 
33 #ifdef HAVE_UNISTD_H
34 # include <unistd.h>
35 #endif
36 
37 #ifdef __GNUC__
38 # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
39    /* silence warning about unused parameter even for "gcc -W -Wunused" */
40 #  define UNUSED	__attribute__((unused))
41 # endif
42 #endif
43 #ifndef UNUSED
44 # define UNUSED
45 #endif
46 
47 #ifdef HAVE_STRINGS_H
48 # include <strings.h>
49 #else
50 # include <string.h>
51 #endif /*HAVE_STRINGS_H*/
52 #ifdef HAVE_MEMORY_H
53 # include <memory.h>
54 #endif
55 
56 #ifndef HAVE_STRCHR
57 # define strchr index
58 # define strrchr rindex
59 #endif
60 
61 #ifdef HAVE_STDLIB_H
62 # include <stdlib.h>
63 #endif
64 #ifndef EXIT_SUCCESS
65 # define EXIT_SUCCESS 0
66 #endif
67 
68 #ifdef HAVE_SYS_TYPES_H
69 # include <sys/types.h>
70 #endif
71 
72 #include <sys/stat.h>
73 
74 
75 /* Sed operates a line at a time. */
76 struct line {
77   char *text;		/* Pointer to line allocated by malloc. */
78   char *active;		/* Pointer to non-consumed part of text. */
79   size_t length;	/* Length of text (or active, if used). */
80   size_t alloc;		/* Allocated space for active. */
81   bool chomped;		/* Was a trailing newline dropped? */
82 #ifdef HAVE_MBRTOWC
83   mbstate_t mbstate;
84 #endif
85 };
86 
87 /* A queue of text to write out at the end of a cycle
88    (filled by the "a", "r" and "R" commands.) */
89 struct append_queue {
90   const char *fname;
91   char *text;
92   size_t textlen;
93   struct append_queue *next;
94   bool free;
95 };
96 
97 /* State information for the input stream. */
98 struct input {
99   /* The list of yet-to-be-opened files.  It is invalid for file_list
100      to be NULL.  When *file_list is NULL we are currently processing
101      the last file.  */
102 
103   char **file_list;
104 
105   /* Count of files we failed to open. */
106   countT bad_count;
107 
108   /* Current input line number (over all files).  */
109   countT line_number;
110 
111   /* True if we'll reset line numbers and addresses before
112      starting to process the next (possibly the first) file.  */
113   bool reset_at_next_file;
114 
115   /* Function to read one line.  If FP is NULL, read_fn better not
116      be one which uses fp; in particular, read_always_fail() is
117      recommended. */
118   bool (*read_fn) P_((struct input *));	/* read one line */
119 
120   char *out_file_name;
121 
122   const char *in_file_name;
123 
124   /* if NULL, none of the following are valid */
125   FILE *fp;
126 
127   bool no_buffering;
128 };
129 
130 
131 /* Have we done any replacements lately?  This is used by the `t' command. */
132 static bool replaced = false;
133 
134 /* The current output file (stdout if -i is not being used. */
135 static struct output output_file;
136 
137 /* The `current' input line. */
138 static struct line line;
139 
140 /* An input line used to accumulate the result of the s and e commands. */
141 static struct line s_accum;
142 
143 /* An input line that's been stored by later use by the program */
144 static struct line hold;
145 
146 /* The buffered input look-ahead.  The only field that should be
147    used outside of read_mem_line() or line_init() is buffer.length. */
148 static struct line buffer;
149 
150 static struct append_queue *append_head = NULL;
151 static struct append_queue *append_tail = NULL;
152 
153 
154 #ifdef BOOTSTRAP
155 /* We can't be sure that the system we're boostrapping on has
156    memchr(), and ../lib/memchr.c requires configuration knowledge
157    about how many bits are in a `long'.  This implementation
158    is far from ideal, but it should get us up-and-limping well
159    enough to run the configure script, which is all that matters.
160 */
161 # ifdef memchr
162 #  undef memchr
163 # endif
164 # define memchr bootstrap_memchr
165 
166 static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
167 static VOID *
bootstrap_memchr(s,c,n)168 bootstrap_memchr(s, c, n)
169   const VOID *s;
170   int c;
171   size_t n;
172 {
173   char *p;
174 
175   for (p=(char *)s; n-- > 0; ++p)
176     if (*p == c)
177       return p;
178   return CAST(VOID *)0;
179 }
180 #endif /*BOOTSTRAP*/
181 
182 /* increase a struct line's length, making some attempt at
183    keeping realloc() calls under control by padding for future growth.  */
184 static void resize_line P_((struct line *, size_t));
185 static void
resize_line(lb,len)186 resize_line(lb, len)
187   struct line *lb;
188   size_t len;
189 {
190   int inactive;
191   inactive = lb->active - lb->text;
192 
193   /* If the inactive part has got to more than two thirds of the buffer,
194    * remove it. */
195   if (inactive > lb->alloc * 2)
196     {
197       MEMMOVE(lb->text, lb->active, lb->length);
198       lb->alloc += lb->active - lb->text;
199       lb->active = lb->text;
200       inactive = 0;
201 
202       if (lb->alloc > len)
203 	return;
204     }
205 
206   lb->alloc *= 2;
207   if (lb->alloc < len)
208     lb->alloc = len;
209   if (lb->alloc < INITIAL_BUFFER_SIZE)
210     lb->alloc = INITIAL_BUFFER_SIZE;
211 
212   lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
213   lb->active = lb->text + inactive;
214 }
215 
216 /* Append `length' bytes from `string' to the line `to'. */
217 static void str_append P_((struct line *, const char *, size_t));
218 static void
str_append(to,string,length)219 str_append(to, string, length)
220   struct line *to;
221   const char *string;
222   size_t length;
223 {
224   size_t new_length = to->length + length;
225 
226   if (to->alloc < new_length)
227     resize_line(to, new_length);
228   MEMCPY(to->active + to->length, string, length);
229   to->length = new_length;
230 
231 #ifdef HAVE_MBRTOWC
232   if (mb_cur_max == 1)
233     return;
234 
235   while (length)
236     {
237       int n = MBRLEN (string, length, &to->mbstate);
238 
239       /* An invalid sequence is treated like a singlebyte character. */
240       if (n == -1)
241 	{
242 	  memset (&to->mbstate, 0, sizeof (to->mbstate));
243 	  n = 1;
244 	}
245 
246       if (n > 0)
247 	length -= n;
248       else
249 	break;
250     }
251 #endif
252 }
253 
254 static void str_append_modified P_((struct line *, const char *, size_t,
255 				    enum replacement_types));
256 static void
str_append_modified(to,string,length,type)257 str_append_modified(to, string, length, type)
258   struct line *to;
259   const char *string;
260   size_t length;
261   enum replacement_types type;
262 {
263   size_t old_length = to->length;
264   char *start, *end;
265 
266   if (length == 0)
267     return;
268 
269 #ifdef HAVE_MBRTOWC
270   {
271     mbstate_t from_stat;
272 
273     if (type == REPL_ASIS)
274       {
275 	str_append(to, string, length);
276         return;
277       }
278 
279     if (to->alloc - to->length < length * mb_cur_max)
280       resize_line(to, to->length + length * mb_cur_max);
281 
282     MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
283     while (length)
284       {
285 	wchar_t wc;
286         int n = MBRTOWC (&wc, string, length, &from_stat);
287 
288         /* An invalid sequence is treated like a singlebyte character. */
289         if (n == -1)
290           {
291             memset (&to->mbstate, 0, sizeof (from_stat));
292             n = 1;
293           }
294 
295         if (n > 0)
296           string += n, length -= n;
297         else
298 	  {
299 	    /* Incomplete sequence, copy it manually.  */
300 	    str_append(to, string, length);
301 	    return;
302 	  }
303 
304 	/* Convert the first character specially... */
305         if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
306 	  {
307             if (type & REPL_UPPERCASE_FIRST)
308               wc = towupper(wc);
309             else
310               wc = towlower(wc);
311 
312             type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
313 	    if (type == REPL_ASIS)
314 	      {
315 		n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
316 		to->length += n;
317 		str_append(to, string, length);
318 	        return;
319 	      }
320           }
321 
322         else if (type & REPL_UPPERCASE)
323           wc = towupper(wc);
324         else
325           wc = towlower(wc);
326 
327 	/* Copy the new wide character to the end of the string. */
328 	n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
329         to->length += n;
330 	if (n == -1)
331 	  {
332 	    fprintf (stderr, "Case conversion produced an invalid character!");
333 	    abort ();
334 	  }
335       }
336   }
337 #else
338   str_append(to, string, length);
339   start = to->active + old_length;
340   end = start + length;
341 
342   /* Now do the required modifications.  First \[lu]... */
343   if (type & REPL_UPPERCASE_FIRST)
344     {
345       *start = toupper(*start);
346       start++;
347       type &= ~REPL_UPPERCASE_FIRST;
348     }
349   else if (type & REPL_LOWERCASE_FIRST)
350     {
351       *start = tolower(*start);
352       start++;
353       type &= ~REPL_LOWERCASE_FIRST;
354     }
355 
356   if (type == REPL_ASIS)
357     return;
358 
359   /* ...and then \[LU] */
360   if (type == REPL_UPPERCASE)
361     for (; start != end; start++)
362       *start = toupper(*start);
363   else
364     for (; start != end; start++)
365       *start = tolower(*start);
366 #endif
367 }
368 
369 /* initialize a "struct line" buffer */
370 static void line_init P_((struct line *, size_t initial_size));
371 static void
line_init(buf,initial_size)372 line_init(buf, initial_size)
373   struct line *buf;
374   size_t initial_size;
375 {
376   buf->text = MALLOC(initial_size, char);
377   buf->active = buf->text;
378   buf->alloc = initial_size;
379   buf->length = 0;
380   buf->chomped = true;
381 
382 #ifdef HAVE_MBRTOWC
383   memset (&buf->mbstate, 0, sizeof (buf->mbstate));
384 #endif
385 
386 }
387 
388 /* Copy the contents of the line `from' into the line `to'.
389    This destroys the old contents of `to'. */
390 static void line_copy P_((struct line *from, struct line *to));
391 static void
line_copy(from,to)392 line_copy(from, to)
393   struct line *from;
394   struct line *to;
395 {
396   /* Remove the inactive portion in the destination buffer. */
397   to->alloc += to->active - to->text;
398 
399   if (to->alloc < from->length)
400     {
401       to->alloc *= 2;
402       if (to->alloc < from->length)
403 	to->alloc = from->length;
404       if (to->alloc < INITIAL_BUFFER_SIZE)
405 	to->alloc = INITIAL_BUFFER_SIZE;
406       /* Use FREE()+MALLOC() instead of REALLOC() to
407 	 avoid unnecessary copying of old text. */
408       FREE(to->text);
409       to->text = MALLOC(to->alloc, char);
410     }
411 
412   to->active = to->text;
413   to->length = from->length;
414   to->chomped = from->chomped;
415   MEMCPY(to->active, from->active, from->length);
416 
417 #ifdef HAVE_MBRTOWC
418   MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
419 #endif
420 }
421 
422 /* Append the contents of the line `from' to the line `to'. */
423 static void line_append P_((struct line *from, struct line *to));
424 static void
line_append(from,to)425 line_append(from, to)
426   struct line *from;
427   struct line *to;
428 {
429   str_append(to, "\n", 1);
430   str_append(to, from->active, from->length);
431   to->chomped = from->chomped;
432 
433 #ifdef HAVE_MBRTOWC
434   MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
435 #endif
436 }
437 
438 /* Exchange the contents of two "struct line" buffers. */
439 static void line_exchange P_((struct line *, struct line *));
440 static void
line_exchange(a,b)441 line_exchange(a, b)
442   struct line *a;
443   struct line *b;
444 {
445   struct line t;
446 
447   MEMCPY(&t,  a, sizeof(struct line));
448   MEMCPY( a,  b, sizeof(struct line));
449   MEMCPY( b, &t, sizeof(struct line));
450 }
451 
452 
453 /* dummy function to simplify read_pattern_space() */
454 static bool read_always_fail P_((struct input *));
455 static bool
read_always_fail(input)456 read_always_fail(input)
457   struct input *input UNUSED;
458 {
459   return false;
460 }
461 
462 static bool read_file_line P_((struct input *));
463 static bool
read_file_line(input)464 read_file_line(input)
465   struct input *input;
466 {
467   static char *b;
468   static size_t blen;
469 
470   long result = ck_getline (&b, &blen, input->fp);
471   if (result <= 0)
472     return false;
473 
474   /* Remove the trailing new-line that is left by getline. */
475   if (b[result - 1] == '\n')
476     --result;
477   else
478     line.chomped = false;
479 
480   str_append(&line, b, result);
481   return true;
482 }
483 
484 
485 static inline void output_missing_newline P_((struct output *));
486 static inline void
output_missing_newline(outf)487 output_missing_newline(outf)
488   struct output *outf;
489 {
490   if (outf->missing_newline)
491     {
492       ck_fwrite("\n", 1, 1, outf->fp);
493       outf->missing_newline = false;
494     }
495 }
496 
497 static inline void flush_output P_((FILE *));
498 static inline void
flush_output(fp)499 flush_output(fp)
500   FILE *fp;
501 {
502 #ifndef CONFIG_WITHOUT_O_OPT
503   if (fp != sed_stdout || unbuffered_output)
504 #else
505   if (fp != stdout || unbuffered_output)
506 #endif
507     ck_fflush(fp);
508 }
509 
510 static void output_line P_((const char *, size_t, bool, struct output *));
511 static void
output_line(text,length,nl,outf)512 output_line(text, length, nl, outf)
513   const char *text;
514   size_t length;
515   bool nl;
516   struct output *outf;
517 {
518   output_missing_newline(outf);
519 
520   if (length)
521     ck_fwrite(text, 1, length, outf->fp);
522 
523   if (nl)
524     ck_fwrite("\n", 1, 1, outf->fp);
525   else
526     outf->missing_newline = true;
527 
528   flush_output(outf->fp);
529 }
530 
531 static struct append_queue *next_append_slot P_((void));
532 static struct append_queue *
next_append_slot()533 next_append_slot()
534 {
535   struct append_queue *n = MALLOC(1, struct append_queue);
536 
537   n->fname = NULL;
538   n->text = NULL;
539   n->textlen = 0;
540   n->next = NULL;
541   n->free = false;
542 
543   if (append_tail)
544       append_tail->next = n;
545   else
546       append_head = n;
547   return append_tail = n;
548 }
549 
550 static void release_append_queue P_((void));
551 static void
release_append_queue()552 release_append_queue()
553 {
554   struct append_queue *p, *q;
555 
556   for (p=append_head; p; p=q)
557     {
558       if (p->free)
559         FREE(p->text);
560 
561       q = p->next;
562       FREE(p);
563     }
564   append_head = append_tail = NULL;
565 }
566 
567 static void dump_append_queue P_((void));
568 static void
dump_append_queue()569 dump_append_queue()
570 {
571   struct append_queue *p;
572 
573   output_missing_newline(&output_file);
574   for (p=append_head; p; p=p->next)
575     {
576       if (p->text)
577         ck_fwrite(p->text, 1, p->textlen, output_file.fp);
578 
579       if (p->fname)
580 	{
581 	  char buf[FREAD_BUFFER_SIZE];
582 	  size_t cnt;
583 	  FILE *fp;
584 
585 	  /* "If _fname_ does not exist or cannot be read, it shall
586 	     be treated as if it were an empty file, causing no error
587 	     condition."  IEEE Std 1003.2-1992
588 	     So, don't fail. */
589 	  fp = ck_fopen(p->fname, "r", false);
590 	  if (fp)
591 	    {
592 	      while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
593 		ck_fwrite(buf, 1, cnt, output_file.fp);
594 	      ck_fclose(fp);
595 	    }
596 	}
597     }
598 
599   flush_output(output_file.fp);
600   release_append_queue();
601 }
602 
603 
604 /* Compute the name of the backup file for in-place editing */
605 static char *get_backup_file_name P_((const char *));
606 static char *
get_backup_file_name(name)607 get_backup_file_name(name)
608   const char *name;
609 {
610   char *old_asterisk, *asterisk, *backup, *p;
611   int name_length = strlen(name), backup_length = strlen(in_place_extension);
612 
613   /* Compute the length of the backup file */
614   for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
615        asterisk = strchr(old_asterisk, '*');
616        old_asterisk = asterisk + 1)
617     backup_length += name_length - 1;
618 
619   p = backup = xmalloc(backup_length + 1);
620 
621   /* Each iteration gobbles up to an asterisk */
622   for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
623        asterisk = strchr(old_asterisk, '*');
624        old_asterisk = asterisk + 1)
625     {
626       MEMCPY (p, old_asterisk, asterisk - old_asterisk);
627       p += asterisk - old_asterisk;
628       strcpy (p, name);
629       p += name_length;
630     }
631 
632   /* Tack on what's after the last asterisk */
633   strcpy (p, old_asterisk);
634   return backup;
635 }
636 
637 /* Initialize a struct input for the named file. */
638 static void open_next_file P_((const char *name, struct input *));
639 static void
open_next_file(name,input)640 open_next_file(name, input)
641   const char *name;
642   struct input *input;
643 {
644   buffer.length = 0;
645 
646   if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
647     {
648       clearerr(stdin);	/* clear any stale EOF indication */
649       input->fp = stdin;
650     }
651   else if ( ! (input->fp = ck_fopen(name, "r", false)) )
652     {
653       const char *ptr = strerror(errno);
654       fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
655       input->read_fn = read_always_fail; /* a redundancy */
656       ++input->bad_count;
657       return;
658     }
659 
660   input->read_fn = read_file_line;
661 
662   if (in_place_extension)
663     {
664       int output_fd;
665       char *tmpdir = ck_strdup(name), *p;
666       struct stat st;
667 
668       /* get the base name */
669       if (p = strrchr(tmpdir, '/'))
670 	*(p + 1) = 0;
671       else
672 	strcpy(tmpdir, ".");
673 
674       input->in_file_name = name;
675 
676       if (isatty (fileno (input->fp)))
677         panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
678 
679       fstat (fileno (input->fp), &st);
680       if (!S_ISREG (st.st_mode))
681         panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
682 
683       output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
684       output_file.missing_newline = false;
685       free (tmpdir);
686 
687       if (!output_file.fp)
688         panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
689 
690       output_fd = fileno (output_file.fp);
691 #ifdef HAVE_FCHMOD
692       fchmod (output_fd, st.st_mode);
693 #endif
694 #ifdef HAVE_FCHOWN
695       if (fchown (output_fd, st.st_uid, st.st_gid) == -1)
696         fchown (output_fd, -1, st.st_gid);
697 #endif
698     }
699   else
700 #ifndef CONFIG_WITHOUT_O_OPT
701     output_file.fp = sed_stdout;
702 #else
703     output_file.fp = stdout;
704 #endif
705 }
706 
707 
708 /* Clean up an input stream that we are done with. */
709 static void closedown P_((struct input *));
710 static void
closedown(input)711 closedown(input)
712   struct input *input;
713 {
714   input->read_fn = read_always_fail;
715   if (!input->fp)
716     return;
717   if (input->fp != stdin) /* stdin can be reused on tty and tape devices */
718     ck_fclose(input->fp);
719 
720   if (in_place_extension && output_file.fp != NULL)
721     {
722       ck_fclose (output_file.fp);
723       if (strcmp(in_place_extension, "*") != 0)
724         {
725           char *backup_file_name = get_backup_file_name(input->in_file_name);
726 	  ck_rename (input->in_file_name, backup_file_name, input->out_file_name);
727           free (backup_file_name);
728 	}
729 
730       ck_rename (input->out_file_name, input->in_file_name, input->out_file_name);
731       free (input->out_file_name);
732     }
733 
734   input->fp = NULL;
735 }
736 
737 /* Reset range commands so that they are marked as non-matching */
738 static void reset_addresses P_((struct vector *));
739 static void
reset_addresses(vec)740 reset_addresses(vec)
741      struct vector *vec;
742 {
743   struct sed_cmd *cur_cmd;
744   int n;
745 
746   for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
747     if (cur_cmd->a1
748 	&& cur_cmd->a1->addr_type == ADDR_IS_NUM
749 	&& cur_cmd->a1->addr_number == 0)
750       cur_cmd->range_state = RANGE_ACTIVE;
751     else
752       cur_cmd->range_state = RANGE_INACTIVE;
753 }
754 
755 /* Read in the next line of input, and store it in the pattern space.
756    Return zero if there is nothing left to input. */
757 static bool read_pattern_space P_((struct input *, struct vector *, bool));
758 static bool
read_pattern_space(input,the_program,append)759 read_pattern_space(input, the_program, append)
760   struct input *input;
761   struct vector *the_program;
762   bool append;
763 {
764   if (append_head) /* redundant test to optimize for common case */
765     dump_append_queue();
766   replaced = false;
767   if (!append)
768     line.length = 0;
769   line.chomped = true;  /* default, until proved otherwise */
770 
771   while ( ! (*input->read_fn)(input) )
772     {
773       closedown(input);
774 
775       if (!*input->file_list)
776 	return false;
777 
778       if (input->reset_at_next_file)
779 	{
780 	  input->line_number = 0;
781 	  reset_addresses (the_program);
782 	  rewind_read_files ();
783 
784 	  /* If doing in-place editing, we will never append the
785 	     new-line to this file; but if the output goes to stdout,
786 	     we might still have to output the missing new-line.  */
787 	  if (in_place_extension)
788 	    output_file.missing_newline = false;
789 
790 	  input->reset_at_next_file = separate_files;
791 	}
792 
793       open_next_file (*input->file_list++, input);
794     }
795 
796   ++input->line_number;
797   return true;
798 }
799 
800 
801 static bool last_file_with_data_p P_((struct input *));
802 static bool
last_file_with_data_p(input)803 last_file_with_data_p(input)
804   struct input *input;
805 {
806   for (;;)
807     {
808       int ch;
809 
810       closedown(input);
811       if (!*input->file_list)
812 	return true;
813       open_next_file(*input->file_list++, input);
814       if (input->fp)
815 	{
816 	  if ((ch = getc(input->fp)) != EOF)
817 	    {
818 	      ungetc(ch, input->fp);
819 	      return false;
820 	    }
821 	}
822     }
823 }
824 
825 /* Determine if we match the `$' address. */
826 static bool test_eof P_((struct input *));
827 static bool
test_eof(input)828 test_eof(input)
829   struct input *input;
830 {
831   int ch;
832 
833   if (buffer.length)
834     return false;
835   if (!input->fp)
836     return separate_files || last_file_with_data_p(input);
837   if (feof(input->fp))
838     return separate_files || last_file_with_data_p(input);
839   if ((ch = getc(input->fp)) == EOF)
840     return separate_files || last_file_with_data_p(input);
841   ungetc(ch, input->fp);
842   return false;
843 }
844 
845 /* Return non-zero if the current line matches the address
846    pointed to by `addr'. */
847 static bool match_an_address_p P_((struct addr *, struct input *));
848 static bool
match_an_address_p(addr,input)849 match_an_address_p(addr, input)
850   struct addr *addr;
851   struct input *input;
852 {
853   switch (addr->addr_type)
854     {
855     case ADDR_IS_NULL:
856       return true;
857 
858     case ADDR_IS_REGEX:
859       return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
860 
861     case ADDR_IS_NUM_MOD:
862       return (input->line_number >= addr->addr_number
863 	      && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
864 
865     case ADDR_IS_STEP:
866     case ADDR_IS_STEP_MOD:
867       /* reminder: these are only meaningful for a2 addresses */
868       /* a2->addr_number needs to be recomputed each time a1 address
869          matches for the step and step_mod types */
870       return (addr->addr_number <= input->line_number);
871 
872     case ADDR_IS_LAST:
873       return test_eof(input);
874 
875       /* ADDR_IS_NUM is handled in match_address_p.  */
876     case ADDR_IS_NUM:
877     default:
878       panic("INTERNAL ERROR: bad address type");
879     }
880   /*NOTREACHED*/
881   return false;
882 }
883 
884 /* return non-zero if current address is valid for cmd */
885 static bool match_address_p P_((struct sed_cmd *, struct input *));
886 static bool
match_address_p(cmd,input)887 match_address_p(cmd, input)
888   struct sed_cmd *cmd;
889   struct input *input;
890 {
891   if (!cmd->a1)
892     return true;
893 
894   if (cmd->range_state != RANGE_ACTIVE)
895     {
896       /* Find if we are going to activate a range.  Handle ADDR_IS_NUM
897 	 specially: it represent an "absolute" state, it should not
898 	 be computed like regexes.  */
899       if (cmd->a1->addr_type == ADDR_IS_NUM)
900 	{
901 	  if (!cmd->a2)
902 	    return (input->line_number == cmd->a1->addr_number);
903 
904 	  if (cmd->range_state == RANGE_CLOSED
905 	      || input->line_number < cmd->a1->addr_number)
906 	    return false;
907 	}
908       else
909 	{
910           if (!cmd->a2)
911 	    return match_an_address_p(cmd->a1, input);
912 
913 	  if (!match_an_address_p(cmd->a1, input))
914             return false;
915 	}
916 
917       /* Ok, start a new range.  */
918       cmd->range_state = RANGE_ACTIVE;
919       switch (cmd->a2->addr_type)
920 	{
921 	case ADDR_IS_REGEX:
922 	  /* Always include at least two lines.  */
923 	  return true;
924 	case ADDR_IS_NUM:
925 	  /* Same handling as below, but always include at least one line.  */
926           if (input->line_number >= cmd->a2->addr_number)
927 	    cmd->range_state = RANGE_CLOSED;
928           return true;
929 	case ADDR_IS_STEP:
930 	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
931 	  return true;
932 	case ADDR_IS_STEP_MOD:
933 	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
934 				 - (input->line_number%cmd->a2->addr_step);
935 	  return true;
936 	default:
937 	  break;
938         }
939     }
940 
941   /* cmd->range_state == RANGE_ACTIVE.  Check if the range is
942      ending; also handle ADDR_IS_NUM specially in this case.  */
943 
944   if (cmd->a2->addr_type == ADDR_IS_NUM)
945     {
946       /* If the second address is a line number, and if we got past
947          that line, fail to match (it can happen when you jump
948 	 over such addresses with `b' and `t'.  Use RANGE_CLOSED
949          so that the range is not re-enabled anymore.  */
950       if (input->line_number >= cmd->a2->addr_number)
951 	cmd->range_state = RANGE_CLOSED;
952 
953       return (input->line_number <= cmd->a2->addr_number);
954    }
955 
956   /* Other addresses are treated as usual.  */
957   if (match_an_address_p(cmd->a2, input))
958     cmd->range_state = RANGE_CLOSED;
959 
960   return true;
961 }
962 
963 
964 static void do_list P_((int line_len));
965 static void
do_list(line_len)966 do_list(line_len)
967      int line_len;
968 {
969   unsigned char *p = CAST(unsigned char *)line.active;
970   countT len = line.length;
971   countT width = 0;
972   char obuf[180];	/* just in case we encounter a 512-bit char (;-) */
973   char *o;
974   size_t olen;
975   FILE *fp = output_file.fp;
976 
977   output_missing_newline(&output_file);
978   for (; len--; ++p) {
979       o = obuf;
980 
981       /* Some locales define 8-bit characters as printable.  This makes the
982 	 testsuite fail at 8to7.sed because the `l' command in fact will not
983 	 convert the 8-bit characters. */
984 #if defined isascii || defined HAVE_ISASCII
985       if (isascii(*p) && ISPRINT(*p)) {
986 #else
987       if (ISPRINT(*p)) {
988 #endif
989 	  *o++ = *p;
990 	  if (*p == '\\')
991 	    *o++ = '\\';
992       } else {
993 	  *o++ = '\\';
994 	  switch (*p) {
995 #if defined __STDC__ && __STDC__-0
996 	    case '\a': *o++ = 'a'; break;
997 #else /* Not STDC; we'll just assume ASCII */
998 	    case 007:  *o++ = 'a'; break;
999 #endif
1000 	    case '\b': *o++ = 'b'; break;
1001 	    case '\f': *o++ = 'f'; break;
1002 	    case '\n': *o++ = 'n'; break;
1003 	    case '\r': *o++ = 'r'; break;
1004 	    case '\t': *o++ = 't'; break;
1005 	    case '\v': *o++ = 'v'; break;
1006 	    default:
1007 	      sprintf(o, "%03o", *p);
1008 	      o += strlen(o);
1009 	      break;
1010 	    }
1011       }
1012       olen = o - obuf;
1013       if (width+olen >= line_len && line_len > 0) {
1014 	  ck_fwrite("\\\n", 1, 2, fp);
1015 	  width = 0;
1016       }
1017       ck_fwrite(obuf, 1, olen, fp);
1018       width += olen;
1019   }
1020   ck_fwrite("$\n", 1, 2, fp);
1021   flush_output (fp);
1022 }
1023 
1024 
1025 static enum replacement_types append_replacement P_((struct line *, struct replacement *,
1026 						     struct re_registers *,
1027 						     enum replacement_types));
1028 static enum replacement_types
append_replacement(buf,p,regs,repl_mod)1029 append_replacement (buf, p, regs, repl_mod)
1030   struct line *buf;
1031   struct replacement *p;
1032   struct re_registers *regs;
1033   enum replacement_types repl_mod;
1034 {
1035   for (; p; p=p->next)
1036     {
1037       int i = p->subst_id;
1038       enum replacement_types curr_type;
1039 
1040       /* Apply a \[lu] modifier that was given earlier, but which we
1041          have not had yet the occasion to apply.  But don't do it
1042          if this replacement has a modifier of its own. */
1043       curr_type = (p->repl_type & REPL_MODIFIERS)
1044         ? p->repl_type
1045         : p->repl_type | repl_mod;
1046 
1047       repl_mod = 0;
1048       if (p->prefix_length)
1049         {
1050           str_append_modified(buf, p->prefix, p->prefix_length,
1051     			      curr_type);
1052           curr_type &= ~REPL_MODIFIERS;
1053         }
1054 
1055       if (0 <= i)
1056         if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
1057           /* Save this modifier, we shall apply it later.
1058 	     e.g. in s/()([a-z])/\u\1\2/
1059 	     the \u modifier is applied to \2, not \1 */
1060 	  repl_mod = curr_type & REPL_MODIFIERS;
1061 
1062 	else
1063 	  str_append_modified(buf, line.active + regs->start[i],
1064 			      CAST(size_t)(regs->end[i] - regs->start[i]),
1065 			      curr_type);
1066     }
1067 
1068   return repl_mod;
1069 }
1070 
1071 static void do_subst P_((struct subst *));
1072 static void
do_subst(sub)1073 do_subst(sub)
1074   struct subst *sub;
1075 {
1076   size_t start = 0;	/* where to start scan for (next) match in LINE */
1077   size_t last_end = 0;  /* where did the last successful match end in LINE */
1078   countT count = 0;	/* number of matches found */
1079   bool again = true;
1080 
1081   static struct re_registers regs;
1082 
1083   if (s_accum.alloc == 0)
1084     line_init(&s_accum, INITIAL_BUFFER_SIZE);
1085   s_accum.length = 0;
1086 
1087   /* The first part of the loop optimizes s/xxx// when xxx is at the
1088      start, and s/xxx$// */
1089   if (!match_regex(sub->regx, line.active, line.length, start,
1090 		   &regs, sub->max_id + 1))
1091     return;
1092 
1093   if (!sub->replacement && sub->numb <= 1)
1094     if (regs.start[0] == 0 && !sub->global)
1095       {
1096 	/* We found a match, set the `replaced' flag. */
1097 	replaced = true;
1098 
1099 	line.active += regs.end[0];
1100 	line.length -= regs.end[0];
1101 	line.alloc -= regs.end[0];
1102 	goto post_subst;
1103       }
1104     else if (regs.end[0] == line.length)
1105       {
1106 	/* We found a match, set the `replaced' flag. */
1107 	replaced = true;
1108 
1109 	line.length = regs.start[0];
1110 	goto post_subst;
1111       }
1112 
1113   do
1114     {
1115       enum replacement_types repl_mod = 0;
1116 
1117       size_t offset = regs.start[0];
1118       size_t matched = regs.end[0] - regs.start[0];
1119 
1120       /* Copy stuff to the left of this match into the output string. */
1121       if (start < offset)
1122 	str_append(&s_accum, line.active + start, offset - start);
1123 
1124       /* If we're counting up to the Nth match, are we there yet?
1125          And even if we are there, there is another case we have to
1126 	 skip: are we matching an empty string immediately following
1127          another match?
1128 
1129          This latter case avoids that baaaac, when passed through
1130          s,a*,x,g, gives `xbxxcx' instead of xbxcx.  This behavior is
1131          unacceptable because it is not consistently applied (for
1132          example, `baaaa' gives `xbx', not `xbxx'). */
1133       if ((matched > 0 || count == 0 || offset > last_end)
1134 	  && ++count >= sub->numb)
1135         {
1136           /* We found a match, set the `replaced' flag. */
1137           replaced = true;
1138 
1139           /* Now expand the replacement string into the output string. */
1140           repl_mod = append_replacement (&s_accum, sub->replacement, &regs, repl_mod);
1141 	  again = sub->global;
1142         }
1143       else
1144 	{
1145           /* The match was not replaced.  Copy the text until its
1146              end; if it was vacuous, skip over one character and
1147 	     add that character to the output.  */
1148 	  if (matched == 0)
1149 	    {
1150 	      if (start < line.length)
1151 	        matched = 1;
1152 	      else
1153 	        break;
1154 	    }
1155 
1156 	  str_append(&s_accum, line.active + offset, matched);
1157         }
1158 
1159       /* Start after the match.  last_end is the real end of the matched
1160 	 substring, excluding characters that were skipped in case the RE
1161 	 matched the empty string.  */
1162       start = offset + matched;
1163       last_end = regs.end[0];
1164     }
1165   while (again
1166 	 && start <= line.length
1167 	 && match_regex(sub->regx, line.active, line.length, start,
1168 			&regs, sub->max_id + 1));
1169 
1170   /* Copy stuff to the right of the last match into the output string. */
1171   if (start < line.length)
1172     str_append(&s_accum, line.active + start, line.length-start);
1173   s_accum.chomped = line.chomped;
1174 
1175   /* Exchange line and s_accum.  This can be much cheaper
1176      than copying s_accum.active into line.text (for huge lines). */
1177   line_exchange(&line, &s_accum);
1178 
1179   /* Finish up. */
1180   if (count < sub->numb)
1181     return;
1182 
1183  post_subst:
1184   if (sub->print & 1)
1185     output_line(line.active, line.length, line.chomped, &output_file);
1186 
1187   if (sub->eval)
1188     {
1189 #ifdef HAVE_POPEN
1190       FILE *pipe;
1191       s_accum.length = 0;
1192 
1193       str_append (&line, "", 1);
1194       pipe = popen(line.active, "r");
1195 
1196       if (pipe != NULL)
1197 	{
1198 	  while (!feof (pipe))
1199 	    {
1200 	      char buf[4096];
1201 	      int n = fread (buf, sizeof(char), 4096, pipe);
1202 	      if (n > 0)
1203 		str_append(&s_accum, buf, n);
1204 	    }
1205 
1206 	  pclose (pipe);
1207 
1208 	  line_exchange(&line, &s_accum);
1209 	  if (line.length &&
1210 	      line.active[line.length - 1] == '\n')
1211 	    line.length--;
1212 	}
1213       else
1214 	panic(_("error in subprocess"));
1215 #else
1216       panic(_("option `e' not supported"));
1217 #endif
1218     }
1219 
1220   if (sub->print & 2)
1221     output_line(line.active, line.length, line.chomped, &output_file);
1222   if (sub->outf)
1223     output_line(line.active, line.length, line.chomped, sub->outf);
1224 }
1225 
1226 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1227 /* Used to attempt a simple-minded optimization. */
1228 
1229 static countT branches;
1230 
1231 static countT count_branches P_((struct vector *));
1232 static countT
count_branches(program)1233 count_branches(program)
1234   struct vector *program;
1235 {
1236   struct sed_cmd *cur_cmd = program->v;
1237   countT isn_cnt = program->v_length;
1238   countT cnt = 0;
1239 
1240   while (isn_cnt-- > 0)
1241     {
1242       switch (cur_cmd->cmd)
1243 	{
1244 	case 'b':
1245 	case 't':
1246 	case 'T':
1247 	case '{':
1248 	  ++cnt;
1249 	}
1250     }
1251   return cnt;
1252 }
1253 
1254 static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
1255 static struct sed_cmd *
shrink_program(vec,cur_cmd)1256 shrink_program(vec, cur_cmd)
1257   struct vector *vec;
1258   struct sed_cmd *cur_cmd;
1259 {
1260   struct sed_cmd *v = vec->v;
1261   struct sed_cmd *last_cmd = v + vec->v_length;
1262   struct sed_cmd *p;
1263   countT cmd_cnt;
1264 
1265   for (p=v; p < cur_cmd; ++p)
1266     if (p->cmd != '#')
1267       MEMCPY(v++, p, sizeof *v);
1268   cmd_cnt = v - vec->v;
1269 
1270   for (; p < last_cmd; ++p)
1271     if (p->cmd != '#')
1272       MEMCPY(v++, p, sizeof *v);
1273   vec->v_length = v - vec->v;
1274 
1275   return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
1276 }
1277 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1278 
1279 /* Execute the program `vec' on the current input line.
1280    Return exit status if caller should quit, -1 otherwise. */
1281 static int execute_program P_((struct vector *, struct input *));
1282 static int
execute_program(vec,input)1283 execute_program(vec, input)
1284   struct vector *vec;
1285   struct input *input;
1286 {
1287   struct sed_cmd *cur_cmd;
1288   struct sed_cmd *end_cmd;
1289 
1290   cur_cmd = vec->v;
1291   end_cmd = vec->v + vec->v_length;
1292   while (cur_cmd < end_cmd)
1293     {
1294       if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
1295 	{
1296 	  switch (cur_cmd->cmd)
1297 	    {
1298 	    case 'a':
1299 	      {
1300 		struct append_queue *aq = next_append_slot();
1301 		aq->text = cur_cmd->x.cmd_txt.text;
1302 		aq->textlen = cur_cmd->x.cmd_txt.text_length;
1303 	      }
1304 	      break;
1305 
1306 	    case '{':
1307 	    case 'b':
1308 	      cur_cmd = vec->v + cur_cmd->x.jump_index;
1309 	      continue;
1310 
1311 	    case '}':
1312 	    case '#':
1313 	    case ':':
1314 	      /* Executing labels and block-ends are easy. */
1315 	      break;
1316 
1317 	    case 'c':
1318 	      if (cur_cmd->range_state != RANGE_ACTIVE)
1319 		output_line(cur_cmd->x.cmd_txt.text,
1320 			    cur_cmd->x.cmd_txt.text_length - 1, true,
1321 			    &output_file);
1322 	      /* POSIX.2 is silent about c starting a new cycle,
1323 		 but it seems to be expected (and make sense). */
1324 	      /* Fall Through */
1325 	    case 'd':
1326 	      return -1;
1327 
1328 	    case 'D':
1329 	      {
1330 		char *p = memchr(line.active, '\n', line.length);
1331 		if (!p)
1332 		  return -1;
1333 
1334 		++p;
1335 		line.alloc -= p - line.active;
1336 		line.length -= p - line.active;
1337 		line.active += p - line.active;
1338 
1339 		/* reset to start next cycle without reading a new line: */
1340 		cur_cmd = vec->v;
1341 		continue;
1342 	      }
1343 
1344 	    case 'e': {
1345 #ifdef HAVE_POPEN
1346 	      FILE *pipe;
1347 	      int cmd_length = cur_cmd->x.cmd_txt.text_length;
1348 	      if (s_accum.alloc == 0)
1349 		line_init(&s_accum, INITIAL_BUFFER_SIZE);
1350 	      s_accum.length = 0;
1351 
1352 	      if (!cmd_length)
1353 		{
1354 		  str_append (&line, "", 1);
1355 		  pipe = popen(line.active, "r");
1356 		}
1357 	      else
1358 		{
1359 		  cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
1360 		  pipe = popen(cur_cmd->x.cmd_txt.text, "r");
1361                   output_missing_newline(&output_file);
1362 		}
1363 
1364 	      if (pipe != NULL)
1365 		{
1366 		  while (!feof (pipe))
1367 		    {
1368 		      char buf[4096];
1369 		      int n = fread (buf, sizeof(char), 4096, pipe);
1370 		      if (n > 0)
1371 			if (!cmd_length)
1372 			  str_append(&s_accum, buf, n);
1373 			else
1374 			  ck_fwrite(buf, 1, n, output_file.fp);
1375 		    }
1376 
1377 		  pclose (pipe);
1378 		  if (!cmd_length)
1379 		    {
1380 		      /* Store into pattern space for plain `e' commands */
1381 		      if (s_accum.length &&
1382 			  s_accum.active[s_accum.length - 1] == '\n')
1383 			s_accum.length--;
1384 
1385 		      /* Exchange line and s_accum.  This can be much
1386 			 cheaper than copying s_accum.active into line.text
1387 			 (for huge lines). */
1388 		      line_exchange(&line, &s_accum);
1389 		    }
1390                   else
1391                     flush_output(output_file.fp);
1392 
1393 		}
1394 	      else
1395 		panic(_("error in subprocess"));
1396 #else
1397 	      panic(_("`e' command not supported"));
1398 #endif
1399 	      break;
1400 	    }
1401 
1402 	    case 'g':
1403 	      line_copy(&hold, &line);
1404 	      break;
1405 
1406 	    case 'G':
1407 	      line_append(&hold, &line);
1408 	      break;
1409 
1410 	    case 'h':
1411 	      line_copy(&line, &hold);
1412 	      break;
1413 
1414 	    case 'H':
1415 	      line_append(&line, &hold);
1416 	      break;
1417 
1418 	    case 'i':
1419 	      output_line(cur_cmd->x.cmd_txt.text,
1420 			  cur_cmd->x.cmd_txt.text_length - 1,
1421 			  true, &output_file);
1422 	      break;
1423 
1424 	    case 'l':
1425 	      do_list(cur_cmd->x.int_arg == -1
1426 		      ? lcmd_out_line_len
1427 		      : cur_cmd->x.int_arg);
1428 	      break;
1429 
1430 	    case 'L':
1431               output_missing_newline(&output_file);
1432 	      fmt(line.active, line.active + line.length,
1433 		  cur_cmd->x.int_arg == -1
1434 		  ? lcmd_out_line_len
1435 		  : cur_cmd->x.int_arg,
1436 		  output_file.fp);
1437               flush_output(output_file.fp);
1438 	      break;
1439 
1440 	    case 'n':
1441 	      if (!no_default_output)
1442 		output_line(line.active, line.length, line.chomped, &output_file);
1443 	      if (test_eof(input) || !read_pattern_space(input, vec, false))
1444 		return -1;
1445 	      break;
1446 
1447 	    case 'N':
1448 	      str_append(&line, "\n", 1);
1449 
1450               if (test_eof(input) || !read_pattern_space(input, vec, true))
1451                 {
1452                   line.length--;
1453                   if (posixicity == POSIXLY_EXTENDED && !no_default_output)
1454                      output_line(line.active, line.length, line.chomped,
1455                                  &output_file);
1456                   return -1;
1457                 }
1458 	      break;
1459 
1460 	    case 'p':
1461 	      output_line(line.active, line.length, line.chomped, &output_file);
1462 	      break;
1463 
1464 	    case 'P':
1465 	      {
1466 		char *p = memchr(line.active, '\n', line.length);
1467 		output_line(line.active, p ? p - line.active : line.length,
1468 			    p ? true : line.chomped, &output_file);
1469 	      }
1470 	      break;
1471 
1472             case 'q':
1473               if (!no_default_output)
1474                 output_line(line.active, line.length, line.chomped, &output_file);
1475 	      dump_append_queue();
1476 
1477 	    case 'Q':
1478 	      return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
1479 
1480 	    case 'r':
1481 	      if (cur_cmd->x.fname)
1482 		{
1483 		  struct append_queue *aq = next_append_slot();
1484 		  aq->fname = cur_cmd->x.fname;
1485 		}
1486 	      break;
1487 
1488 	    case 'R':
1489 	      if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
1490 		{
1491 		  struct append_queue *aq;
1492 		  size_t buflen;
1493 		  char *text = NULL;
1494 		  int result;
1495 
1496 		  result = ck_getline (&text, &buflen, cur_cmd->x.fp);
1497 		  if (result != EOF)
1498 		    {
1499 		      aq = next_append_slot();
1500 		      aq->free = true;
1501 		      aq->text = text;
1502 		      aq->textlen = result;
1503 		    }
1504 		}
1505 	      break;
1506 
1507 	    case 's':
1508 	      do_subst(cur_cmd->x.cmd_subst);
1509 	      break;
1510 
1511 	    case 't':
1512 	      if (replaced)
1513 		{
1514 		  replaced = false;
1515 		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1516 		  continue;
1517 		}
1518 	      break;
1519 
1520 	    case 'T':
1521 	      if (!replaced)
1522 		{
1523 		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1524 		  continue;
1525 		}
1526 	      else
1527 		replaced = false;
1528 	      break;
1529 
1530 	    case 'w':
1531 	      if (cur_cmd->x.fp)
1532 		output_line(line.active, line.length,
1533 			    line.chomped, cur_cmd->x.outf);
1534 	      break;
1535 
1536 	    case 'W':
1537 	      if (cur_cmd->x.fp)
1538 	        {
1539 		  char *p = memchr(line.active, '\n', line.length);
1540 		  output_line(line.active, p ? p - line.active : line.length,
1541 			      p ? true : line.chomped, cur_cmd->x.outf);
1542 	        }
1543 	      break;
1544 
1545 	    case 'x':
1546 	      line_exchange(&line, &hold);
1547 	      break;
1548 
1549 	    case 'y':
1550 	      {
1551 #ifdef HAVE_MBRTOWC
1552                if (mb_cur_max > 1)
1553                  {
1554                    int idx, prev_idx; /* index in the input line.  */
1555                    char **trans;
1556                    mbstate_t mbstate;
1557                    memset(&mbstate, 0, sizeof(mbstate_t));
1558                    for (idx = 0; idx < line.length;)
1559                      {
1560                        int mbclen, i;
1561                        mbclen = MBRLEN (line.active + idx, line.length - idx,
1562                                           &mbstate);
1563                        /* An invalid sequence, or a truncated multibyte
1564                           character.  We treat it as a singlebyte character.
1565                        */
1566                        if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1567                            || mbclen == 0)
1568                          mbclen = 1;
1569 
1570                        trans = cur_cmd->x.translatemb;
1571                        /* `i' indicate i-th translate pair.  */
1572                        for (i = 0; trans[2*i] != NULL; i++)
1573                          {
1574                            if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
1575                              {
1576                                bool move_remain_buffer = false;
1577                                int trans_len = strlen(trans[2*i+1]);
1578 
1579                                if (mbclen < trans_len)
1580                                  {
1581                                    int new_len;
1582                                    new_len = line.length + 1 + trans_len - mbclen;
1583                                    /* We must extend the line buffer.  */
1584                                    if (line.alloc < new_len)
1585                                      {
1586                                        /* And we must resize the buffer.  */
1587                                        resize_line(&line, new_len);
1588                                      }
1589                                    move_remain_buffer = true;
1590                                  }
1591                                else if (mbclen > trans_len)
1592                                  {
1593                                    /* We must truncate the line buffer.  */
1594                                    move_remain_buffer = true;
1595                                  }
1596                                prev_idx = idx;
1597                                if (move_remain_buffer)
1598                                  {
1599                                    int move_len, move_offset;
1600                                    char *move_from, *move_to;
1601                                    /* Move the remaining with \0.  */
1602                                    move_from = line.active + idx + mbclen;
1603                                    move_to = line.active + idx + trans_len;
1604                                    move_len = line.length + 1 - idx - mbclen;
1605                                    move_offset = trans_len - mbclen;
1606                                    memmove(move_to, move_from, move_len);
1607                                    line.length += move_offset;
1608                                    idx += move_offset;
1609                                  }
1610                                strncpy(line.active + prev_idx, trans[2*i+1],
1611                                        trans_len);
1612                                break;
1613                              }
1614                          }
1615                        idx += mbclen;
1616                      }
1617                  }
1618                else
1619 #endif /* HAVE_MBRTOWC */
1620                  {
1621                    unsigned char *p, *e;
1622                    p = CAST(unsigned char *)line.active;
1623                    for (e=p+line.length; p<e; ++p)
1624                      *p = cur_cmd->x.translate[*p];
1625                  }
1626 	      }
1627 	      break;
1628 
1629 	    case '=':
1630               output_missing_newline(&output_file);
1631               fprintf(output_file.fp, "%lu\n",
1632                       CAST(unsigned long)input->line_number);
1633               flush_output(output_file.fp);
1634 	      break;
1635 
1636 	    default:
1637 	      panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1638 	    }
1639 	}
1640 
1641 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1642       /* If our top-level program consists solely of commands with
1643          ADDR_IS_NUM addresses then once we past the last mentioned
1644          line we should be able to quit if no_default_output is true,
1645          or otherwise quickly copy input to output.  Now whether this
1646          optimization is a win or not depends on how cheaply we can
1647          implement this for the cases where it doesn't help, as
1648          compared against how much time is saved.  One semantic
1649          difference (which I think is an improvement) is that *this*
1650          version will terminate after printing line two in the script
1651          "yes | sed -n 2p".
1652 
1653          Don't use this when in-place editing is active, because line
1654          numbers restart each time then. */
1655       else if (!separate_files)
1656 	{
1657 	  if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1658 	      && (cur_cmd->a2
1659 		  ? cur_cmd->range_state == RANGE_CLOSED
1660 		  : cur_cmd->a1->addr_number < input->line_number))
1661 	    {
1662 	      /* Skip this address next time */
1663 	      cur_cmd->addr_bang = !cur_cmd->addr_bang;
1664 	      cur_cmd->a1->addr_type = ADDR_IS_NULL;
1665 	      if (cur_cmd->a2)
1666 		cur_cmd->a2->addr_type = ADDR_IS_NULL;
1667 
1668 	      /* can we make an optimization? */
1669 	      if (cur_cmd->addr_bang)
1670 		{
1671 		  if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
1672 		      || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
1673 		    branches--;
1674 
1675 		  cur_cmd->cmd = '#';	/* replace with no-op */
1676 	          if (branches == 0)
1677 		    cur_cmd = shrink_program(vec, cur_cmd);
1678 		  if (!cur_cmd && no_default_output)
1679 		    return 0;
1680 		  end_cmd = vec->v + vec->v_length;
1681 		  if (!cur_cmd)
1682 		    cur_cmd = end_cmd;
1683 		  continue;
1684 		}
1685 	    }
1686 	}
1687 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1688 
1689       /* this is buried down here so that a "continue" statement can skip it */
1690       ++cur_cmd;
1691     }
1692 
1693     if (!no_default_output)
1694       output_line(line.active, line.length, line.chomped, &output_file);
1695     return -1;
1696 }
1697 
1698 
1699 
1700 /* Apply the compiled script to all the named files. */
1701 int
process_files(the_program,argv)1702 process_files(the_program, argv)
1703   struct vector *the_program;
1704   char **argv;
1705 {
1706   static char dash[] = "-";
1707   static char *stdin_argv[2] = { dash, NULL };
1708   struct input input;
1709   int status;
1710 
1711   line_init(&line, INITIAL_BUFFER_SIZE);
1712   line_init(&hold, 0);
1713   line_init(&buffer, 0);
1714 
1715 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1716   branches = count_branches(the_program);
1717 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1718   input.reset_at_next_file = true;
1719   if (argv && *argv)
1720     input.file_list = argv;
1721   else if (in_place_extension)
1722     panic(_("no input files"));
1723   else
1724     input.file_list = stdin_argv;
1725 
1726   input.bad_count = 0;
1727   input.line_number = 0;
1728   input.read_fn = read_always_fail;
1729   input.fp = NULL;
1730 
1731   status = EXIT_SUCCESS;
1732   while (read_pattern_space(&input, the_program, false))
1733     {
1734       status = execute_program(the_program, &input);
1735       if (status == -1)
1736 	status = EXIT_SUCCESS;
1737       else
1738 	break;
1739     }
1740   closedown(&input);
1741 
1742 #ifdef DEBUG_LEAKS
1743   /* We're about to exit, so these free()s are redundant.
1744      But if we're running under a memory-leak detecting
1745      implementation of malloc(), we want to explicitly
1746      deallocate in order to avoid extraneous noise from
1747      the allocator. */
1748   release_append_queue();
1749   FREE(buffer.text);
1750   FREE(hold.text);
1751   FREE(line.text);
1752   FREE(s_accum.text);
1753 #endif /*DEBUG_LEAKS*/
1754 
1755   if (input.bad_count)
1756     status = 2;
1757 
1758   return status;
1759 }
1760