1 /* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/
20 #define INITIAL_BUFFER_SIZE 50
21 #define FREAD_BUFFER_SIZE 8192
22
23 #include "sed.h"
24
25 #include <stdio.h>
26 #include <ctype.h>
27
28 #include <errno.h>
29 #ifndef errno
30 extern int errno;
31 #endif
32
33 #ifdef HAVE_UNISTD_H
34 # include <unistd.h>
35 #endif
36
37 #ifdef __GNUC__
38 # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
39 /* silence warning about unused parameter even for "gcc -W -Wunused" */
40 # define UNUSED __attribute__((unused))
41 # endif
42 #endif
43 #ifndef UNUSED
44 # define UNUSED
45 #endif
46
47 #ifdef HAVE_STRINGS_H
48 # include <strings.h>
49 #else
50 # include <string.h>
51 #endif /*HAVE_STRINGS_H*/
52 #ifdef HAVE_MEMORY_H
53 # include <memory.h>
54 #endif
55
56 #ifndef HAVE_STRCHR
57 # define strchr index
58 # define strrchr rindex
59 #endif
60
61 #ifdef HAVE_STDLIB_H
62 # include <stdlib.h>
63 #endif
64 #ifndef EXIT_SUCCESS
65 # define EXIT_SUCCESS 0
66 #endif
67
68 #ifdef HAVE_SYS_TYPES_H
69 # include <sys/types.h>
70 #endif
71
72 #include <sys/stat.h>
73
74
75 /* Sed operates a line at a time. */
76 struct line {
77 char *text; /* Pointer to line allocated by malloc. */
78 char *active; /* Pointer to non-consumed part of text. */
79 size_t length; /* Length of text (or active, if used). */
80 size_t alloc; /* Allocated space for active. */
81 bool chomped; /* Was a trailing newline dropped? */
82 #ifdef HAVE_MBRTOWC
83 mbstate_t mbstate;
84 #endif
85 };
86
87 /* A queue of text to write out at the end of a cycle
88 (filled by the "a", "r" and "R" commands.) */
89 struct append_queue {
90 const char *fname;
91 char *text;
92 size_t textlen;
93 struct append_queue *next;
94 bool free;
95 };
96
97 /* State information for the input stream. */
98 struct input {
99 /* The list of yet-to-be-opened files. It is invalid for file_list
100 to be NULL. When *file_list is NULL we are currently processing
101 the last file. */
102
103 char **file_list;
104
105 /* Count of files we failed to open. */
106 countT bad_count;
107
108 /* Current input line number (over all files). */
109 countT line_number;
110
111 /* True if we'll reset line numbers and addresses before
112 starting to process the next (possibly the first) file. */
113 bool reset_at_next_file;
114
115 /* Function to read one line. If FP is NULL, read_fn better not
116 be one which uses fp; in particular, read_always_fail() is
117 recommended. */
118 bool (*read_fn) P_((struct input *)); /* read one line */
119
120 char *out_file_name;
121
122 const char *in_file_name;
123
124 /* if NULL, none of the following are valid */
125 FILE *fp;
126
127 bool no_buffering;
128 };
129
130
131 /* Have we done any replacements lately? This is used by the `t' command. */
132 static bool replaced = false;
133
134 /* The current output file (stdout if -i is not being used. */
135 static struct output output_file;
136
137 /* The `current' input line. */
138 static struct line line;
139
140 /* An input line used to accumulate the result of the s and e commands. */
141 static struct line s_accum;
142
143 /* An input line that's been stored by later use by the program */
144 static struct line hold;
145
146 /* The buffered input look-ahead. The only field that should be
147 used outside of read_mem_line() or line_init() is buffer.length. */
148 static struct line buffer;
149
150 static struct append_queue *append_head = NULL;
151 static struct append_queue *append_tail = NULL;
152
153
154 #ifdef BOOTSTRAP
155 /* We can't be sure that the system we're boostrapping on has
156 memchr(), and ../lib/memchr.c requires configuration knowledge
157 about how many bits are in a `long'. This implementation
158 is far from ideal, but it should get us up-and-limping well
159 enough to run the configure script, which is all that matters.
160 */
161 # ifdef memchr
162 # undef memchr
163 # endif
164 # define memchr bootstrap_memchr
165
166 static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
167 static VOID *
bootstrap_memchr(s,c,n)168 bootstrap_memchr(s, c, n)
169 const VOID *s;
170 int c;
171 size_t n;
172 {
173 char *p;
174
175 for (p=(char *)s; n-- > 0; ++p)
176 if (*p == c)
177 return p;
178 return CAST(VOID *)0;
179 }
180 #endif /*BOOTSTRAP*/
181
182 /* increase a struct line's length, making some attempt at
183 keeping realloc() calls under control by padding for future growth. */
184 static void resize_line P_((struct line *, size_t));
185 static void
resize_line(lb,len)186 resize_line(lb, len)
187 struct line *lb;
188 size_t len;
189 {
190 int inactive;
191 inactive = lb->active - lb->text;
192
193 /* If the inactive part has got to more than two thirds of the buffer,
194 * remove it. */
195 if (inactive > lb->alloc * 2)
196 {
197 MEMMOVE(lb->text, lb->active, lb->length);
198 lb->alloc += lb->active - lb->text;
199 lb->active = lb->text;
200 inactive = 0;
201
202 if (lb->alloc > len)
203 return;
204 }
205
206 lb->alloc *= 2;
207 if (lb->alloc < len)
208 lb->alloc = len;
209 if (lb->alloc < INITIAL_BUFFER_SIZE)
210 lb->alloc = INITIAL_BUFFER_SIZE;
211
212 lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
213 lb->active = lb->text + inactive;
214 }
215
216 /* Append `length' bytes from `string' to the line `to'. */
217 static void str_append P_((struct line *, const char *, size_t));
218 static void
str_append(to,string,length)219 str_append(to, string, length)
220 struct line *to;
221 const char *string;
222 size_t length;
223 {
224 size_t new_length = to->length + length;
225
226 if (to->alloc < new_length)
227 resize_line(to, new_length);
228 MEMCPY(to->active + to->length, string, length);
229 to->length = new_length;
230
231 #ifdef HAVE_MBRTOWC
232 if (mb_cur_max == 1)
233 return;
234
235 while (length)
236 {
237 int n = MBRLEN (string, length, &to->mbstate);
238
239 /* An invalid sequence is treated like a singlebyte character. */
240 if (n == -1)
241 {
242 memset (&to->mbstate, 0, sizeof (to->mbstate));
243 n = 1;
244 }
245
246 if (n > 0)
247 length -= n;
248 else
249 break;
250 }
251 #endif
252 }
253
254 static void str_append_modified P_((struct line *, const char *, size_t,
255 enum replacement_types));
256 static void
str_append_modified(to,string,length,type)257 str_append_modified(to, string, length, type)
258 struct line *to;
259 const char *string;
260 size_t length;
261 enum replacement_types type;
262 {
263 size_t old_length = to->length;
264 char *start, *end;
265
266 if (length == 0)
267 return;
268
269 #ifdef HAVE_MBRTOWC
270 {
271 mbstate_t from_stat;
272
273 if (type == REPL_ASIS)
274 {
275 str_append(to, string, length);
276 return;
277 }
278
279 if (to->alloc - to->length < length * mb_cur_max)
280 resize_line(to, to->length + length * mb_cur_max);
281
282 MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
283 while (length)
284 {
285 wchar_t wc;
286 int n = MBRTOWC (&wc, string, length, &from_stat);
287
288 /* An invalid sequence is treated like a singlebyte character. */
289 if (n == -1)
290 {
291 memset (&to->mbstate, 0, sizeof (from_stat));
292 n = 1;
293 }
294
295 if (n > 0)
296 string += n, length -= n;
297 else
298 {
299 /* Incomplete sequence, copy it manually. */
300 str_append(to, string, length);
301 return;
302 }
303
304 /* Convert the first character specially... */
305 if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
306 {
307 if (type & REPL_UPPERCASE_FIRST)
308 wc = towupper(wc);
309 else
310 wc = towlower(wc);
311
312 type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
313 if (type == REPL_ASIS)
314 {
315 n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
316 to->length += n;
317 str_append(to, string, length);
318 return;
319 }
320 }
321
322 else if (type & REPL_UPPERCASE)
323 wc = towupper(wc);
324 else
325 wc = towlower(wc);
326
327 /* Copy the new wide character to the end of the string. */
328 n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
329 to->length += n;
330 if (n == -1)
331 {
332 fprintf (stderr, "Case conversion produced an invalid character!");
333 abort ();
334 }
335 }
336 }
337 #else
338 str_append(to, string, length);
339 start = to->active + old_length;
340 end = start + length;
341
342 /* Now do the required modifications. First \[lu]... */
343 if (type & REPL_UPPERCASE_FIRST)
344 {
345 *start = toupper(*start);
346 start++;
347 type &= ~REPL_UPPERCASE_FIRST;
348 }
349 else if (type & REPL_LOWERCASE_FIRST)
350 {
351 *start = tolower(*start);
352 start++;
353 type &= ~REPL_LOWERCASE_FIRST;
354 }
355
356 if (type == REPL_ASIS)
357 return;
358
359 /* ...and then \[LU] */
360 if (type == REPL_UPPERCASE)
361 for (; start != end; start++)
362 *start = toupper(*start);
363 else
364 for (; start != end; start++)
365 *start = tolower(*start);
366 #endif
367 }
368
369 /* initialize a "struct line" buffer */
370 static void line_init P_((struct line *, size_t initial_size));
371 static void
line_init(buf,initial_size)372 line_init(buf, initial_size)
373 struct line *buf;
374 size_t initial_size;
375 {
376 buf->text = MALLOC(initial_size, char);
377 buf->active = buf->text;
378 buf->alloc = initial_size;
379 buf->length = 0;
380 buf->chomped = true;
381
382 #ifdef HAVE_MBRTOWC
383 memset (&buf->mbstate, 0, sizeof (buf->mbstate));
384 #endif
385
386 }
387
388 /* Copy the contents of the line `from' into the line `to'.
389 This destroys the old contents of `to'. */
390 static void line_copy P_((struct line *from, struct line *to));
391 static void
line_copy(from,to)392 line_copy(from, to)
393 struct line *from;
394 struct line *to;
395 {
396 /* Remove the inactive portion in the destination buffer. */
397 to->alloc += to->active - to->text;
398
399 if (to->alloc < from->length)
400 {
401 to->alloc *= 2;
402 if (to->alloc < from->length)
403 to->alloc = from->length;
404 if (to->alloc < INITIAL_BUFFER_SIZE)
405 to->alloc = INITIAL_BUFFER_SIZE;
406 /* Use FREE()+MALLOC() instead of REALLOC() to
407 avoid unnecessary copying of old text. */
408 FREE(to->text);
409 to->text = MALLOC(to->alloc, char);
410 }
411
412 to->active = to->text;
413 to->length = from->length;
414 to->chomped = from->chomped;
415 MEMCPY(to->active, from->active, from->length);
416
417 #ifdef HAVE_MBRTOWC
418 MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
419 #endif
420 }
421
422 /* Append the contents of the line `from' to the line `to'. */
423 static void line_append P_((struct line *from, struct line *to));
424 static void
line_append(from,to)425 line_append(from, to)
426 struct line *from;
427 struct line *to;
428 {
429 str_append(to, "\n", 1);
430 str_append(to, from->active, from->length);
431 to->chomped = from->chomped;
432
433 #ifdef HAVE_MBRTOWC
434 MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
435 #endif
436 }
437
438 /* Exchange the contents of two "struct line" buffers. */
439 static void line_exchange P_((struct line *, struct line *));
440 static void
line_exchange(a,b)441 line_exchange(a, b)
442 struct line *a;
443 struct line *b;
444 {
445 struct line t;
446
447 MEMCPY(&t, a, sizeof(struct line));
448 MEMCPY( a, b, sizeof(struct line));
449 MEMCPY( b, &t, sizeof(struct line));
450 }
451
452
453 /* dummy function to simplify read_pattern_space() */
454 static bool read_always_fail P_((struct input *));
455 static bool
read_always_fail(input)456 read_always_fail(input)
457 struct input *input UNUSED;
458 {
459 return false;
460 }
461
462 static bool read_file_line P_((struct input *));
463 static bool
read_file_line(input)464 read_file_line(input)
465 struct input *input;
466 {
467 static char *b;
468 static size_t blen;
469
470 long result = ck_getline (&b, &blen, input->fp);
471 if (result <= 0)
472 return false;
473
474 /* Remove the trailing new-line that is left by getline. */
475 if (b[result - 1] == '\n')
476 --result;
477 else
478 line.chomped = false;
479
480 str_append(&line, b, result);
481 return true;
482 }
483
484
485 static inline void output_missing_newline P_((struct output *));
486 static inline void
output_missing_newline(outf)487 output_missing_newline(outf)
488 struct output *outf;
489 {
490 if (outf->missing_newline)
491 {
492 ck_fwrite("\n", 1, 1, outf->fp);
493 outf->missing_newline = false;
494 }
495 }
496
497 static inline void flush_output P_((FILE *));
498 static inline void
flush_output(fp)499 flush_output(fp)
500 FILE *fp;
501 {
502 #ifndef CONFIG_WITHOUT_O_OPT
503 if (fp != sed_stdout || unbuffered_output)
504 #else
505 if (fp != stdout || unbuffered_output)
506 #endif
507 ck_fflush(fp);
508 }
509
510 static void output_line P_((const char *, size_t, bool, struct output *));
511 static void
output_line(text,length,nl,outf)512 output_line(text, length, nl, outf)
513 const char *text;
514 size_t length;
515 bool nl;
516 struct output *outf;
517 {
518 output_missing_newline(outf);
519
520 if (length)
521 ck_fwrite(text, 1, length, outf->fp);
522
523 if (nl)
524 ck_fwrite("\n", 1, 1, outf->fp);
525 else
526 outf->missing_newline = true;
527
528 flush_output(outf->fp);
529 }
530
531 static struct append_queue *next_append_slot P_((void));
532 static struct append_queue *
next_append_slot()533 next_append_slot()
534 {
535 struct append_queue *n = MALLOC(1, struct append_queue);
536
537 n->fname = NULL;
538 n->text = NULL;
539 n->textlen = 0;
540 n->next = NULL;
541 n->free = false;
542
543 if (append_tail)
544 append_tail->next = n;
545 else
546 append_head = n;
547 return append_tail = n;
548 }
549
550 static void release_append_queue P_((void));
551 static void
release_append_queue()552 release_append_queue()
553 {
554 struct append_queue *p, *q;
555
556 for (p=append_head; p; p=q)
557 {
558 if (p->free)
559 FREE(p->text);
560
561 q = p->next;
562 FREE(p);
563 }
564 append_head = append_tail = NULL;
565 }
566
567 static void dump_append_queue P_((void));
568 static void
dump_append_queue()569 dump_append_queue()
570 {
571 struct append_queue *p;
572
573 output_missing_newline(&output_file);
574 for (p=append_head; p; p=p->next)
575 {
576 if (p->text)
577 ck_fwrite(p->text, 1, p->textlen, output_file.fp);
578
579 if (p->fname)
580 {
581 char buf[FREAD_BUFFER_SIZE];
582 size_t cnt;
583 FILE *fp;
584
585 /* "If _fname_ does not exist or cannot be read, it shall
586 be treated as if it were an empty file, causing no error
587 condition." IEEE Std 1003.2-1992
588 So, don't fail. */
589 fp = ck_fopen(p->fname, "r", false);
590 if (fp)
591 {
592 while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
593 ck_fwrite(buf, 1, cnt, output_file.fp);
594 ck_fclose(fp);
595 }
596 }
597 }
598
599 flush_output(output_file.fp);
600 release_append_queue();
601 }
602
603
604 /* Compute the name of the backup file for in-place editing */
605 static char *get_backup_file_name P_((const char *));
606 static char *
get_backup_file_name(name)607 get_backup_file_name(name)
608 const char *name;
609 {
610 char *old_asterisk, *asterisk, *backup, *p;
611 int name_length = strlen(name), backup_length = strlen(in_place_extension);
612
613 /* Compute the length of the backup file */
614 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
615 asterisk = strchr(old_asterisk, '*');
616 old_asterisk = asterisk + 1)
617 backup_length += name_length - 1;
618
619 p = backup = xmalloc(backup_length + 1);
620
621 /* Each iteration gobbles up to an asterisk */
622 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
623 asterisk = strchr(old_asterisk, '*');
624 old_asterisk = asterisk + 1)
625 {
626 MEMCPY (p, old_asterisk, asterisk - old_asterisk);
627 p += asterisk - old_asterisk;
628 strcpy (p, name);
629 p += name_length;
630 }
631
632 /* Tack on what's after the last asterisk */
633 strcpy (p, old_asterisk);
634 return backup;
635 }
636
637 /* Initialize a struct input for the named file. */
638 static void open_next_file P_((const char *name, struct input *));
639 static void
open_next_file(name,input)640 open_next_file(name, input)
641 const char *name;
642 struct input *input;
643 {
644 buffer.length = 0;
645
646 if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
647 {
648 clearerr(stdin); /* clear any stale EOF indication */
649 input->fp = stdin;
650 }
651 else if ( ! (input->fp = ck_fopen(name, "r", false)) )
652 {
653 const char *ptr = strerror(errno);
654 fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
655 input->read_fn = read_always_fail; /* a redundancy */
656 ++input->bad_count;
657 return;
658 }
659
660 input->read_fn = read_file_line;
661
662 if (in_place_extension)
663 {
664 int output_fd;
665 char *tmpdir = ck_strdup(name), *p;
666 struct stat st;
667
668 /* get the base name */
669 if (p = strrchr(tmpdir, '/'))
670 *(p + 1) = 0;
671 else
672 strcpy(tmpdir, ".");
673
674 input->in_file_name = name;
675
676 if (isatty (fileno (input->fp)))
677 panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
678
679 fstat (fileno (input->fp), &st);
680 if (!S_ISREG (st.st_mode))
681 panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
682
683 output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
684 output_file.missing_newline = false;
685 free (tmpdir);
686
687 if (!output_file.fp)
688 panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
689
690 output_fd = fileno (output_file.fp);
691 #ifdef HAVE_FCHMOD
692 fchmod (output_fd, st.st_mode);
693 #endif
694 #ifdef HAVE_FCHOWN
695 if (fchown (output_fd, st.st_uid, st.st_gid) == -1)
696 fchown (output_fd, -1, st.st_gid);
697 #endif
698 }
699 else
700 #ifndef CONFIG_WITHOUT_O_OPT
701 output_file.fp = sed_stdout;
702 #else
703 output_file.fp = stdout;
704 #endif
705 }
706
707
708 /* Clean up an input stream that we are done with. */
709 static void closedown P_((struct input *));
710 static void
closedown(input)711 closedown(input)
712 struct input *input;
713 {
714 input->read_fn = read_always_fail;
715 if (!input->fp)
716 return;
717 if (input->fp != stdin) /* stdin can be reused on tty and tape devices */
718 ck_fclose(input->fp);
719
720 if (in_place_extension && output_file.fp != NULL)
721 {
722 ck_fclose (output_file.fp);
723 if (strcmp(in_place_extension, "*") != 0)
724 {
725 char *backup_file_name = get_backup_file_name(input->in_file_name);
726 ck_rename (input->in_file_name, backup_file_name, input->out_file_name);
727 free (backup_file_name);
728 }
729
730 ck_rename (input->out_file_name, input->in_file_name, input->out_file_name);
731 free (input->out_file_name);
732 }
733
734 input->fp = NULL;
735 }
736
737 /* Reset range commands so that they are marked as non-matching */
738 static void reset_addresses P_((struct vector *));
739 static void
reset_addresses(vec)740 reset_addresses(vec)
741 struct vector *vec;
742 {
743 struct sed_cmd *cur_cmd;
744 int n;
745
746 for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
747 if (cur_cmd->a1
748 && cur_cmd->a1->addr_type == ADDR_IS_NUM
749 && cur_cmd->a1->addr_number == 0)
750 cur_cmd->range_state = RANGE_ACTIVE;
751 else
752 cur_cmd->range_state = RANGE_INACTIVE;
753 }
754
755 /* Read in the next line of input, and store it in the pattern space.
756 Return zero if there is nothing left to input. */
757 static bool read_pattern_space P_((struct input *, struct vector *, bool));
758 static bool
read_pattern_space(input,the_program,append)759 read_pattern_space(input, the_program, append)
760 struct input *input;
761 struct vector *the_program;
762 bool append;
763 {
764 if (append_head) /* redundant test to optimize for common case */
765 dump_append_queue();
766 replaced = false;
767 if (!append)
768 line.length = 0;
769 line.chomped = true; /* default, until proved otherwise */
770
771 while ( ! (*input->read_fn)(input) )
772 {
773 closedown(input);
774
775 if (!*input->file_list)
776 return false;
777
778 if (input->reset_at_next_file)
779 {
780 input->line_number = 0;
781 reset_addresses (the_program);
782 rewind_read_files ();
783
784 /* If doing in-place editing, we will never append the
785 new-line to this file; but if the output goes to stdout,
786 we might still have to output the missing new-line. */
787 if (in_place_extension)
788 output_file.missing_newline = false;
789
790 input->reset_at_next_file = separate_files;
791 }
792
793 open_next_file (*input->file_list++, input);
794 }
795
796 ++input->line_number;
797 return true;
798 }
799
800
801 static bool last_file_with_data_p P_((struct input *));
802 static bool
last_file_with_data_p(input)803 last_file_with_data_p(input)
804 struct input *input;
805 {
806 for (;;)
807 {
808 int ch;
809
810 closedown(input);
811 if (!*input->file_list)
812 return true;
813 open_next_file(*input->file_list++, input);
814 if (input->fp)
815 {
816 if ((ch = getc(input->fp)) != EOF)
817 {
818 ungetc(ch, input->fp);
819 return false;
820 }
821 }
822 }
823 }
824
825 /* Determine if we match the `$' address. */
826 static bool test_eof P_((struct input *));
827 static bool
test_eof(input)828 test_eof(input)
829 struct input *input;
830 {
831 int ch;
832
833 if (buffer.length)
834 return false;
835 if (!input->fp)
836 return separate_files || last_file_with_data_p(input);
837 if (feof(input->fp))
838 return separate_files || last_file_with_data_p(input);
839 if ((ch = getc(input->fp)) == EOF)
840 return separate_files || last_file_with_data_p(input);
841 ungetc(ch, input->fp);
842 return false;
843 }
844
845 /* Return non-zero if the current line matches the address
846 pointed to by `addr'. */
847 static bool match_an_address_p P_((struct addr *, struct input *));
848 static bool
match_an_address_p(addr,input)849 match_an_address_p(addr, input)
850 struct addr *addr;
851 struct input *input;
852 {
853 switch (addr->addr_type)
854 {
855 case ADDR_IS_NULL:
856 return true;
857
858 case ADDR_IS_REGEX:
859 return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
860
861 case ADDR_IS_NUM_MOD:
862 return (input->line_number >= addr->addr_number
863 && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
864
865 case ADDR_IS_STEP:
866 case ADDR_IS_STEP_MOD:
867 /* reminder: these are only meaningful for a2 addresses */
868 /* a2->addr_number needs to be recomputed each time a1 address
869 matches for the step and step_mod types */
870 return (addr->addr_number <= input->line_number);
871
872 case ADDR_IS_LAST:
873 return test_eof(input);
874
875 /* ADDR_IS_NUM is handled in match_address_p. */
876 case ADDR_IS_NUM:
877 default:
878 panic("INTERNAL ERROR: bad address type");
879 }
880 /*NOTREACHED*/
881 return false;
882 }
883
884 /* return non-zero if current address is valid for cmd */
885 static bool match_address_p P_((struct sed_cmd *, struct input *));
886 static bool
match_address_p(cmd,input)887 match_address_p(cmd, input)
888 struct sed_cmd *cmd;
889 struct input *input;
890 {
891 if (!cmd->a1)
892 return true;
893
894 if (cmd->range_state != RANGE_ACTIVE)
895 {
896 /* Find if we are going to activate a range. Handle ADDR_IS_NUM
897 specially: it represent an "absolute" state, it should not
898 be computed like regexes. */
899 if (cmd->a1->addr_type == ADDR_IS_NUM)
900 {
901 if (!cmd->a2)
902 return (input->line_number == cmd->a1->addr_number);
903
904 if (cmd->range_state == RANGE_CLOSED
905 || input->line_number < cmd->a1->addr_number)
906 return false;
907 }
908 else
909 {
910 if (!cmd->a2)
911 return match_an_address_p(cmd->a1, input);
912
913 if (!match_an_address_p(cmd->a1, input))
914 return false;
915 }
916
917 /* Ok, start a new range. */
918 cmd->range_state = RANGE_ACTIVE;
919 switch (cmd->a2->addr_type)
920 {
921 case ADDR_IS_REGEX:
922 /* Always include at least two lines. */
923 return true;
924 case ADDR_IS_NUM:
925 /* Same handling as below, but always include at least one line. */
926 if (input->line_number >= cmd->a2->addr_number)
927 cmd->range_state = RANGE_CLOSED;
928 return true;
929 case ADDR_IS_STEP:
930 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
931 return true;
932 case ADDR_IS_STEP_MOD:
933 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
934 - (input->line_number%cmd->a2->addr_step);
935 return true;
936 default:
937 break;
938 }
939 }
940
941 /* cmd->range_state == RANGE_ACTIVE. Check if the range is
942 ending; also handle ADDR_IS_NUM specially in this case. */
943
944 if (cmd->a2->addr_type == ADDR_IS_NUM)
945 {
946 /* If the second address is a line number, and if we got past
947 that line, fail to match (it can happen when you jump
948 over such addresses with `b' and `t'. Use RANGE_CLOSED
949 so that the range is not re-enabled anymore. */
950 if (input->line_number >= cmd->a2->addr_number)
951 cmd->range_state = RANGE_CLOSED;
952
953 return (input->line_number <= cmd->a2->addr_number);
954 }
955
956 /* Other addresses are treated as usual. */
957 if (match_an_address_p(cmd->a2, input))
958 cmd->range_state = RANGE_CLOSED;
959
960 return true;
961 }
962
963
964 static void do_list P_((int line_len));
965 static void
do_list(line_len)966 do_list(line_len)
967 int line_len;
968 {
969 unsigned char *p = CAST(unsigned char *)line.active;
970 countT len = line.length;
971 countT width = 0;
972 char obuf[180]; /* just in case we encounter a 512-bit char (;-) */
973 char *o;
974 size_t olen;
975 FILE *fp = output_file.fp;
976
977 output_missing_newline(&output_file);
978 for (; len--; ++p) {
979 o = obuf;
980
981 /* Some locales define 8-bit characters as printable. This makes the
982 testsuite fail at 8to7.sed because the `l' command in fact will not
983 convert the 8-bit characters. */
984 #if defined isascii || defined HAVE_ISASCII
985 if (isascii(*p) && ISPRINT(*p)) {
986 #else
987 if (ISPRINT(*p)) {
988 #endif
989 *o++ = *p;
990 if (*p == '\\')
991 *o++ = '\\';
992 } else {
993 *o++ = '\\';
994 switch (*p) {
995 #if defined __STDC__ && __STDC__-0
996 case '\a': *o++ = 'a'; break;
997 #else /* Not STDC; we'll just assume ASCII */
998 case 007: *o++ = 'a'; break;
999 #endif
1000 case '\b': *o++ = 'b'; break;
1001 case '\f': *o++ = 'f'; break;
1002 case '\n': *o++ = 'n'; break;
1003 case '\r': *o++ = 'r'; break;
1004 case '\t': *o++ = 't'; break;
1005 case '\v': *o++ = 'v'; break;
1006 default:
1007 sprintf(o, "%03o", *p);
1008 o += strlen(o);
1009 break;
1010 }
1011 }
1012 olen = o - obuf;
1013 if (width+olen >= line_len && line_len > 0) {
1014 ck_fwrite("\\\n", 1, 2, fp);
1015 width = 0;
1016 }
1017 ck_fwrite(obuf, 1, olen, fp);
1018 width += olen;
1019 }
1020 ck_fwrite("$\n", 1, 2, fp);
1021 flush_output (fp);
1022 }
1023
1024
1025 static enum replacement_types append_replacement P_((struct line *, struct replacement *,
1026 struct re_registers *,
1027 enum replacement_types));
1028 static enum replacement_types
append_replacement(buf,p,regs,repl_mod)1029 append_replacement (buf, p, regs, repl_mod)
1030 struct line *buf;
1031 struct replacement *p;
1032 struct re_registers *regs;
1033 enum replacement_types repl_mod;
1034 {
1035 for (; p; p=p->next)
1036 {
1037 int i = p->subst_id;
1038 enum replacement_types curr_type;
1039
1040 /* Apply a \[lu] modifier that was given earlier, but which we
1041 have not had yet the occasion to apply. But don't do it
1042 if this replacement has a modifier of its own. */
1043 curr_type = (p->repl_type & REPL_MODIFIERS)
1044 ? p->repl_type
1045 : p->repl_type | repl_mod;
1046
1047 repl_mod = 0;
1048 if (p->prefix_length)
1049 {
1050 str_append_modified(buf, p->prefix, p->prefix_length,
1051 curr_type);
1052 curr_type &= ~REPL_MODIFIERS;
1053 }
1054
1055 if (0 <= i)
1056 if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
1057 /* Save this modifier, we shall apply it later.
1058 e.g. in s/()([a-z])/\u\1\2/
1059 the \u modifier is applied to \2, not \1 */
1060 repl_mod = curr_type & REPL_MODIFIERS;
1061
1062 else
1063 str_append_modified(buf, line.active + regs->start[i],
1064 CAST(size_t)(regs->end[i] - regs->start[i]),
1065 curr_type);
1066 }
1067
1068 return repl_mod;
1069 }
1070
1071 static void do_subst P_((struct subst *));
1072 static void
do_subst(sub)1073 do_subst(sub)
1074 struct subst *sub;
1075 {
1076 size_t start = 0; /* where to start scan for (next) match in LINE */
1077 size_t last_end = 0; /* where did the last successful match end in LINE */
1078 countT count = 0; /* number of matches found */
1079 bool again = true;
1080
1081 static struct re_registers regs;
1082
1083 if (s_accum.alloc == 0)
1084 line_init(&s_accum, INITIAL_BUFFER_SIZE);
1085 s_accum.length = 0;
1086
1087 /* The first part of the loop optimizes s/xxx// when xxx is at the
1088 start, and s/xxx$// */
1089 if (!match_regex(sub->regx, line.active, line.length, start,
1090 ®s, sub->max_id + 1))
1091 return;
1092
1093 if (!sub->replacement && sub->numb <= 1)
1094 if (regs.start[0] == 0 && !sub->global)
1095 {
1096 /* We found a match, set the `replaced' flag. */
1097 replaced = true;
1098
1099 line.active += regs.end[0];
1100 line.length -= regs.end[0];
1101 line.alloc -= regs.end[0];
1102 goto post_subst;
1103 }
1104 else if (regs.end[0] == line.length)
1105 {
1106 /* We found a match, set the `replaced' flag. */
1107 replaced = true;
1108
1109 line.length = regs.start[0];
1110 goto post_subst;
1111 }
1112
1113 do
1114 {
1115 enum replacement_types repl_mod = 0;
1116
1117 size_t offset = regs.start[0];
1118 size_t matched = regs.end[0] - regs.start[0];
1119
1120 /* Copy stuff to the left of this match into the output string. */
1121 if (start < offset)
1122 str_append(&s_accum, line.active + start, offset - start);
1123
1124 /* If we're counting up to the Nth match, are we there yet?
1125 And even if we are there, there is another case we have to
1126 skip: are we matching an empty string immediately following
1127 another match?
1128
1129 This latter case avoids that baaaac, when passed through
1130 s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is
1131 unacceptable because it is not consistently applied (for
1132 example, `baaaa' gives `xbx', not `xbxx'). */
1133 if ((matched > 0 || count == 0 || offset > last_end)
1134 && ++count >= sub->numb)
1135 {
1136 /* We found a match, set the `replaced' flag. */
1137 replaced = true;
1138
1139 /* Now expand the replacement string into the output string. */
1140 repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod);
1141 again = sub->global;
1142 }
1143 else
1144 {
1145 /* The match was not replaced. Copy the text until its
1146 end; if it was vacuous, skip over one character and
1147 add that character to the output. */
1148 if (matched == 0)
1149 {
1150 if (start < line.length)
1151 matched = 1;
1152 else
1153 break;
1154 }
1155
1156 str_append(&s_accum, line.active + offset, matched);
1157 }
1158
1159 /* Start after the match. last_end is the real end of the matched
1160 substring, excluding characters that were skipped in case the RE
1161 matched the empty string. */
1162 start = offset + matched;
1163 last_end = regs.end[0];
1164 }
1165 while (again
1166 && start <= line.length
1167 && match_regex(sub->regx, line.active, line.length, start,
1168 ®s, sub->max_id + 1));
1169
1170 /* Copy stuff to the right of the last match into the output string. */
1171 if (start < line.length)
1172 str_append(&s_accum, line.active + start, line.length-start);
1173 s_accum.chomped = line.chomped;
1174
1175 /* Exchange line and s_accum. This can be much cheaper
1176 than copying s_accum.active into line.text (for huge lines). */
1177 line_exchange(&line, &s_accum);
1178
1179 /* Finish up. */
1180 if (count < sub->numb)
1181 return;
1182
1183 post_subst:
1184 if (sub->print & 1)
1185 output_line(line.active, line.length, line.chomped, &output_file);
1186
1187 if (sub->eval)
1188 {
1189 #ifdef HAVE_POPEN
1190 FILE *pipe;
1191 s_accum.length = 0;
1192
1193 str_append (&line, "", 1);
1194 pipe = popen(line.active, "r");
1195
1196 if (pipe != NULL)
1197 {
1198 while (!feof (pipe))
1199 {
1200 char buf[4096];
1201 int n = fread (buf, sizeof(char), 4096, pipe);
1202 if (n > 0)
1203 str_append(&s_accum, buf, n);
1204 }
1205
1206 pclose (pipe);
1207
1208 line_exchange(&line, &s_accum);
1209 if (line.length &&
1210 line.active[line.length - 1] == '\n')
1211 line.length--;
1212 }
1213 else
1214 panic(_("error in subprocess"));
1215 #else
1216 panic(_("option `e' not supported"));
1217 #endif
1218 }
1219
1220 if (sub->print & 2)
1221 output_line(line.active, line.length, line.chomped, &output_file);
1222 if (sub->outf)
1223 output_line(line.active, line.length, line.chomped, sub->outf);
1224 }
1225
1226 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1227 /* Used to attempt a simple-minded optimization. */
1228
1229 static countT branches;
1230
1231 static countT count_branches P_((struct vector *));
1232 static countT
count_branches(program)1233 count_branches(program)
1234 struct vector *program;
1235 {
1236 struct sed_cmd *cur_cmd = program->v;
1237 countT isn_cnt = program->v_length;
1238 countT cnt = 0;
1239
1240 while (isn_cnt-- > 0)
1241 {
1242 switch (cur_cmd->cmd)
1243 {
1244 case 'b':
1245 case 't':
1246 case 'T':
1247 case '{':
1248 ++cnt;
1249 }
1250 }
1251 return cnt;
1252 }
1253
1254 static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
1255 static struct sed_cmd *
shrink_program(vec,cur_cmd)1256 shrink_program(vec, cur_cmd)
1257 struct vector *vec;
1258 struct sed_cmd *cur_cmd;
1259 {
1260 struct sed_cmd *v = vec->v;
1261 struct sed_cmd *last_cmd = v + vec->v_length;
1262 struct sed_cmd *p;
1263 countT cmd_cnt;
1264
1265 for (p=v; p < cur_cmd; ++p)
1266 if (p->cmd != '#')
1267 MEMCPY(v++, p, sizeof *v);
1268 cmd_cnt = v - vec->v;
1269
1270 for (; p < last_cmd; ++p)
1271 if (p->cmd != '#')
1272 MEMCPY(v++, p, sizeof *v);
1273 vec->v_length = v - vec->v;
1274
1275 return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
1276 }
1277 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1278
1279 /* Execute the program `vec' on the current input line.
1280 Return exit status if caller should quit, -1 otherwise. */
1281 static int execute_program P_((struct vector *, struct input *));
1282 static int
execute_program(vec,input)1283 execute_program(vec, input)
1284 struct vector *vec;
1285 struct input *input;
1286 {
1287 struct sed_cmd *cur_cmd;
1288 struct sed_cmd *end_cmd;
1289
1290 cur_cmd = vec->v;
1291 end_cmd = vec->v + vec->v_length;
1292 while (cur_cmd < end_cmd)
1293 {
1294 if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
1295 {
1296 switch (cur_cmd->cmd)
1297 {
1298 case 'a':
1299 {
1300 struct append_queue *aq = next_append_slot();
1301 aq->text = cur_cmd->x.cmd_txt.text;
1302 aq->textlen = cur_cmd->x.cmd_txt.text_length;
1303 }
1304 break;
1305
1306 case '{':
1307 case 'b':
1308 cur_cmd = vec->v + cur_cmd->x.jump_index;
1309 continue;
1310
1311 case '}':
1312 case '#':
1313 case ':':
1314 /* Executing labels and block-ends are easy. */
1315 break;
1316
1317 case 'c':
1318 if (cur_cmd->range_state != RANGE_ACTIVE)
1319 output_line(cur_cmd->x.cmd_txt.text,
1320 cur_cmd->x.cmd_txt.text_length - 1, true,
1321 &output_file);
1322 /* POSIX.2 is silent about c starting a new cycle,
1323 but it seems to be expected (and make sense). */
1324 /* Fall Through */
1325 case 'd':
1326 return -1;
1327
1328 case 'D':
1329 {
1330 char *p = memchr(line.active, '\n', line.length);
1331 if (!p)
1332 return -1;
1333
1334 ++p;
1335 line.alloc -= p - line.active;
1336 line.length -= p - line.active;
1337 line.active += p - line.active;
1338
1339 /* reset to start next cycle without reading a new line: */
1340 cur_cmd = vec->v;
1341 continue;
1342 }
1343
1344 case 'e': {
1345 #ifdef HAVE_POPEN
1346 FILE *pipe;
1347 int cmd_length = cur_cmd->x.cmd_txt.text_length;
1348 if (s_accum.alloc == 0)
1349 line_init(&s_accum, INITIAL_BUFFER_SIZE);
1350 s_accum.length = 0;
1351
1352 if (!cmd_length)
1353 {
1354 str_append (&line, "", 1);
1355 pipe = popen(line.active, "r");
1356 }
1357 else
1358 {
1359 cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
1360 pipe = popen(cur_cmd->x.cmd_txt.text, "r");
1361 output_missing_newline(&output_file);
1362 }
1363
1364 if (pipe != NULL)
1365 {
1366 while (!feof (pipe))
1367 {
1368 char buf[4096];
1369 int n = fread (buf, sizeof(char), 4096, pipe);
1370 if (n > 0)
1371 if (!cmd_length)
1372 str_append(&s_accum, buf, n);
1373 else
1374 ck_fwrite(buf, 1, n, output_file.fp);
1375 }
1376
1377 pclose (pipe);
1378 if (!cmd_length)
1379 {
1380 /* Store into pattern space for plain `e' commands */
1381 if (s_accum.length &&
1382 s_accum.active[s_accum.length - 1] == '\n')
1383 s_accum.length--;
1384
1385 /* Exchange line and s_accum. This can be much
1386 cheaper than copying s_accum.active into line.text
1387 (for huge lines). */
1388 line_exchange(&line, &s_accum);
1389 }
1390 else
1391 flush_output(output_file.fp);
1392
1393 }
1394 else
1395 panic(_("error in subprocess"));
1396 #else
1397 panic(_("`e' command not supported"));
1398 #endif
1399 break;
1400 }
1401
1402 case 'g':
1403 line_copy(&hold, &line);
1404 break;
1405
1406 case 'G':
1407 line_append(&hold, &line);
1408 break;
1409
1410 case 'h':
1411 line_copy(&line, &hold);
1412 break;
1413
1414 case 'H':
1415 line_append(&line, &hold);
1416 break;
1417
1418 case 'i':
1419 output_line(cur_cmd->x.cmd_txt.text,
1420 cur_cmd->x.cmd_txt.text_length - 1,
1421 true, &output_file);
1422 break;
1423
1424 case 'l':
1425 do_list(cur_cmd->x.int_arg == -1
1426 ? lcmd_out_line_len
1427 : cur_cmd->x.int_arg);
1428 break;
1429
1430 case 'L':
1431 output_missing_newline(&output_file);
1432 fmt(line.active, line.active + line.length,
1433 cur_cmd->x.int_arg == -1
1434 ? lcmd_out_line_len
1435 : cur_cmd->x.int_arg,
1436 output_file.fp);
1437 flush_output(output_file.fp);
1438 break;
1439
1440 case 'n':
1441 if (!no_default_output)
1442 output_line(line.active, line.length, line.chomped, &output_file);
1443 if (test_eof(input) || !read_pattern_space(input, vec, false))
1444 return -1;
1445 break;
1446
1447 case 'N':
1448 str_append(&line, "\n", 1);
1449
1450 if (test_eof(input) || !read_pattern_space(input, vec, true))
1451 {
1452 line.length--;
1453 if (posixicity == POSIXLY_EXTENDED && !no_default_output)
1454 output_line(line.active, line.length, line.chomped,
1455 &output_file);
1456 return -1;
1457 }
1458 break;
1459
1460 case 'p':
1461 output_line(line.active, line.length, line.chomped, &output_file);
1462 break;
1463
1464 case 'P':
1465 {
1466 char *p = memchr(line.active, '\n', line.length);
1467 output_line(line.active, p ? p - line.active : line.length,
1468 p ? true : line.chomped, &output_file);
1469 }
1470 break;
1471
1472 case 'q':
1473 if (!no_default_output)
1474 output_line(line.active, line.length, line.chomped, &output_file);
1475 dump_append_queue();
1476
1477 case 'Q':
1478 return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
1479
1480 case 'r':
1481 if (cur_cmd->x.fname)
1482 {
1483 struct append_queue *aq = next_append_slot();
1484 aq->fname = cur_cmd->x.fname;
1485 }
1486 break;
1487
1488 case 'R':
1489 if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
1490 {
1491 struct append_queue *aq;
1492 size_t buflen;
1493 char *text = NULL;
1494 int result;
1495
1496 result = ck_getline (&text, &buflen, cur_cmd->x.fp);
1497 if (result != EOF)
1498 {
1499 aq = next_append_slot();
1500 aq->free = true;
1501 aq->text = text;
1502 aq->textlen = result;
1503 }
1504 }
1505 break;
1506
1507 case 's':
1508 do_subst(cur_cmd->x.cmd_subst);
1509 break;
1510
1511 case 't':
1512 if (replaced)
1513 {
1514 replaced = false;
1515 cur_cmd = vec->v + cur_cmd->x.jump_index;
1516 continue;
1517 }
1518 break;
1519
1520 case 'T':
1521 if (!replaced)
1522 {
1523 cur_cmd = vec->v + cur_cmd->x.jump_index;
1524 continue;
1525 }
1526 else
1527 replaced = false;
1528 break;
1529
1530 case 'w':
1531 if (cur_cmd->x.fp)
1532 output_line(line.active, line.length,
1533 line.chomped, cur_cmd->x.outf);
1534 break;
1535
1536 case 'W':
1537 if (cur_cmd->x.fp)
1538 {
1539 char *p = memchr(line.active, '\n', line.length);
1540 output_line(line.active, p ? p - line.active : line.length,
1541 p ? true : line.chomped, cur_cmd->x.outf);
1542 }
1543 break;
1544
1545 case 'x':
1546 line_exchange(&line, &hold);
1547 break;
1548
1549 case 'y':
1550 {
1551 #ifdef HAVE_MBRTOWC
1552 if (mb_cur_max > 1)
1553 {
1554 int idx, prev_idx; /* index in the input line. */
1555 char **trans;
1556 mbstate_t mbstate;
1557 memset(&mbstate, 0, sizeof(mbstate_t));
1558 for (idx = 0; idx < line.length;)
1559 {
1560 int mbclen, i;
1561 mbclen = MBRLEN (line.active + idx, line.length - idx,
1562 &mbstate);
1563 /* An invalid sequence, or a truncated multibyte
1564 character. We treat it as a singlebyte character.
1565 */
1566 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1567 || mbclen == 0)
1568 mbclen = 1;
1569
1570 trans = cur_cmd->x.translatemb;
1571 /* `i' indicate i-th translate pair. */
1572 for (i = 0; trans[2*i] != NULL; i++)
1573 {
1574 if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
1575 {
1576 bool move_remain_buffer = false;
1577 int trans_len = strlen(trans[2*i+1]);
1578
1579 if (mbclen < trans_len)
1580 {
1581 int new_len;
1582 new_len = line.length + 1 + trans_len - mbclen;
1583 /* We must extend the line buffer. */
1584 if (line.alloc < new_len)
1585 {
1586 /* And we must resize the buffer. */
1587 resize_line(&line, new_len);
1588 }
1589 move_remain_buffer = true;
1590 }
1591 else if (mbclen > trans_len)
1592 {
1593 /* We must truncate the line buffer. */
1594 move_remain_buffer = true;
1595 }
1596 prev_idx = idx;
1597 if (move_remain_buffer)
1598 {
1599 int move_len, move_offset;
1600 char *move_from, *move_to;
1601 /* Move the remaining with \0. */
1602 move_from = line.active + idx + mbclen;
1603 move_to = line.active + idx + trans_len;
1604 move_len = line.length + 1 - idx - mbclen;
1605 move_offset = trans_len - mbclen;
1606 memmove(move_to, move_from, move_len);
1607 line.length += move_offset;
1608 idx += move_offset;
1609 }
1610 strncpy(line.active + prev_idx, trans[2*i+1],
1611 trans_len);
1612 break;
1613 }
1614 }
1615 idx += mbclen;
1616 }
1617 }
1618 else
1619 #endif /* HAVE_MBRTOWC */
1620 {
1621 unsigned char *p, *e;
1622 p = CAST(unsigned char *)line.active;
1623 for (e=p+line.length; p<e; ++p)
1624 *p = cur_cmd->x.translate[*p];
1625 }
1626 }
1627 break;
1628
1629 case '=':
1630 output_missing_newline(&output_file);
1631 fprintf(output_file.fp, "%lu\n",
1632 CAST(unsigned long)input->line_number);
1633 flush_output(output_file.fp);
1634 break;
1635
1636 default:
1637 panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1638 }
1639 }
1640
1641 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1642 /* If our top-level program consists solely of commands with
1643 ADDR_IS_NUM addresses then once we past the last mentioned
1644 line we should be able to quit if no_default_output is true,
1645 or otherwise quickly copy input to output. Now whether this
1646 optimization is a win or not depends on how cheaply we can
1647 implement this for the cases where it doesn't help, as
1648 compared against how much time is saved. One semantic
1649 difference (which I think is an improvement) is that *this*
1650 version will terminate after printing line two in the script
1651 "yes | sed -n 2p".
1652
1653 Don't use this when in-place editing is active, because line
1654 numbers restart each time then. */
1655 else if (!separate_files)
1656 {
1657 if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1658 && (cur_cmd->a2
1659 ? cur_cmd->range_state == RANGE_CLOSED
1660 : cur_cmd->a1->addr_number < input->line_number))
1661 {
1662 /* Skip this address next time */
1663 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1664 cur_cmd->a1->addr_type = ADDR_IS_NULL;
1665 if (cur_cmd->a2)
1666 cur_cmd->a2->addr_type = ADDR_IS_NULL;
1667
1668 /* can we make an optimization? */
1669 if (cur_cmd->addr_bang)
1670 {
1671 if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
1672 || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
1673 branches--;
1674
1675 cur_cmd->cmd = '#'; /* replace with no-op */
1676 if (branches == 0)
1677 cur_cmd = shrink_program(vec, cur_cmd);
1678 if (!cur_cmd && no_default_output)
1679 return 0;
1680 end_cmd = vec->v + vec->v_length;
1681 if (!cur_cmd)
1682 cur_cmd = end_cmd;
1683 continue;
1684 }
1685 }
1686 }
1687 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1688
1689 /* this is buried down here so that a "continue" statement can skip it */
1690 ++cur_cmd;
1691 }
1692
1693 if (!no_default_output)
1694 output_line(line.active, line.length, line.chomped, &output_file);
1695 return -1;
1696 }
1697
1698
1699
1700 /* Apply the compiled script to all the named files. */
1701 int
process_files(the_program,argv)1702 process_files(the_program, argv)
1703 struct vector *the_program;
1704 char **argv;
1705 {
1706 static char dash[] = "-";
1707 static char *stdin_argv[2] = { dash, NULL };
1708 struct input input;
1709 int status;
1710
1711 line_init(&line, INITIAL_BUFFER_SIZE);
1712 line_init(&hold, 0);
1713 line_init(&buffer, 0);
1714
1715 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1716 branches = count_branches(the_program);
1717 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1718 input.reset_at_next_file = true;
1719 if (argv && *argv)
1720 input.file_list = argv;
1721 else if (in_place_extension)
1722 panic(_("no input files"));
1723 else
1724 input.file_list = stdin_argv;
1725
1726 input.bad_count = 0;
1727 input.line_number = 0;
1728 input.read_fn = read_always_fail;
1729 input.fp = NULL;
1730
1731 status = EXIT_SUCCESS;
1732 while (read_pattern_space(&input, the_program, false))
1733 {
1734 status = execute_program(the_program, &input);
1735 if (status == -1)
1736 status = EXIT_SUCCESS;
1737 else
1738 break;
1739 }
1740 closedown(&input);
1741
1742 #ifdef DEBUG_LEAKS
1743 /* We're about to exit, so these free()s are redundant.
1744 But if we're running under a memory-leak detecting
1745 implementation of malloc(), we want to explicitly
1746 deallocate in order to avoid extraneous noise from
1747 the allocator. */
1748 release_append_queue();
1749 FREE(buffer.text);
1750 FREE(hold.text);
1751 FREE(line.text);
1752 FREE(s_accum.text);
1753 #endif /*DEBUG_LEAKS*/
1754
1755 if (input.bad_count)
1756 status = 2;
1757
1758 return status;
1759 }
1760