xref: /386bsd/usr/src/usr.bin/sed/sed.c (revision a2142627)
1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
3 
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2, or (at your option)
7     any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
17 
18 #ifdef __STDC__
19 #define VOID void
20 #else
21 #define VOID char
22 #endif
23 
24 
25 #define _GNU_SOURCE
26 #include <ctype.h>
27 #ifndef isblank
28 #define isblank(c) ((c) == ' ' || (c) == '\t')
29 #endif
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include "rx.h"
33 #include "getopt.h"
34 #if defined(STDC_HEADERS)
35 #include <stdlib.h>
36 #endif
37 #if HAVE_STRING_H || defined(STDC_HEADERS)
38 #include <string.h>
39 #ifndef bzero
40 #define bzero(s, n)	memset ((s), 0, (n))
41 #endif
42 #if !defined(STDC_HEADERS)
43 #include <memory.h>
44 #endif
45 #else
46 #include <strings.h>
47 #endif
48 
49 #ifdef RX_MEMDBUG
50 #include <malloc.h>
51 #endif
52 
53 #ifndef HAVE_BCOPY
54 #ifdef HAVE_MEMCPY
55 #define bcopy(FROM,TO,LEN)  memcpy(TO,FROM,LEN)
56 #else
57 void
bcopy(from,to,len)58 bcopy (from, to, len)
59      char *from;
60      char *to;
61      int len;
62 {
63   if (from < to)
64     {
65       from += len - 1;
66       to += len - 1;
67       while (len--)
68 	*to-- = *from--;
69     }
70   else
71     while (len--)
72       *to++ = *from++;
73 }
74 
75 #endif
76 #endif
77 
78 char *version_string = "GNU sed version 2.04";
79 
80 /* Struct vector is used to describe a chunk of a compiled sed program.
81  * There is one vector for the main program, and one for each { } pair,
82  * and one for the entire program.  For {} blocks, RETURN_[VI] tells where
83  * to continue execution after this VECTOR.
84  */
85 
86 struct vector
87 {
88   struct sed_cmd *v;
89   int v_length;
90   int v_allocated;
91   struct vector *return_v;
92   int return_i;
93 };
94 
95 
96 /* Goto structure is used to hold both GOTO's and labels.  There are two
97  * separate lists, one of goto's, called 'jumps', and one of labels, called
98  * 'labels'.
99  * the V element points to the descriptor for the program-chunk in which the
100  * goto was encountered.
101  * the v_index element counts which element of the vector actually IS the
102  * goto/label.  The first element of the vector is zero.
103  * the NAME element is the null-terminated name of the label.
104  * next is the next goto/label in the list.
105  */
106 
107 struct sed_label
108 {
109   struct vector *v;
110   int v_index;
111   char *name;
112   struct sed_label *next;
113 };
114 
115 /* ADDR_TYPE is zero for a null address,
116  *  one if addr_number is valid, or
117  * two if addr_regex is valid,
118  * three, if the address is '$'
119  * Other values are undefined.
120  */
121 
122 enum addr_types
123 {
124   addr_is_null = 0,
125   addr_is_num = 1,
126   addr_is_regex = 2,
127   addr_is_last = 3,
128   addr_is_mod = 4
129 };
130 
131 struct addr
132 {
133   int addr_type;
134   struct re_pattern_buffer *addr_regex;
135   int addr_number;
136   int modulo, offset;
137 };
138 
139 
140 /* Aflags:  If the low order bit is set, a1 has been
141  * matched; apply this command until a2 matches.
142  * If the next bit is set, apply this command to all
143  * lines that DON'T match the address(es).
144  */
145 
146 #define A1_MATCHED_BIT	01
147 #define ADDR_BANG_BIT	02
148 
149 struct sed_cmd
150 {
151   struct addr a1, a2;
152   int aflags;
153 
154   char cmd;
155 
156   union
157     {
158       /* This structure is used for a, i, and c commands */
159       struct
160 	{
161 	  char *text;
162 	  int text_len;
163 	}
164       cmd_txt;
165 
166       /* This is used for b and t commands */
167       struct sed_cmd *label;
168 
169       /* This for r and w commands */
170       FILE *io_file;
171 
172       /* This for the hairy s command */
173       /* For the flags var:
174 	 low order bit means the 'g' option was given,
175 	 next bit means the 'p' option was given,
176 	 and the next bit means a 'w' option was given,
177 	 and wio_file contains the file to write to. */
178 
179 #define S_GLOBAL_BIT	01
180 #define S_PRINT_BIT	02
181 #define S_WRITE_BIT	04
182 #define S_NUM_BIT	010
183 
184       struct
185 	{
186 	  struct re_pattern_buffer *regx;
187 	  char *replacement;
188 	  int replace_length;
189 	  int flags;
190 	  int numb;
191 	  FILE *wio_file;
192 	}
193       cmd_regex;
194 
195       /* This for the y command */
196       unsigned char *translate;
197 
198       /* For { */
199       struct vector *sub;
200 
201       /* for t and b */
202       struct sed_label *jump;
203     } x;
204 };
205 
206 /* Sed operates a line at a time. */
207 struct line
208 {
209   char *text;			/* Pointer to line allocated by malloc. */
210   int length;			/* Length of text. */
211   int alloc;			/* Allocated space for text. */
212 };
213 
214 /* This structure holds information about files opend by the 'r', 'w',
215    and 's///w' commands.  In paticular, it holds the FILE pointers to
216    use, the file's name. */
217 
218 #define NUM_FPS	32
219 struct
220   {
221     FILE *for_read;
222     FILE *for_write;
223     char *name;
224   }
225 
226 file_ptrs[NUM_FPS];
227 
228 
229 #if defined(__STDC__)
230 # define P_(s) s
231 #else
232 # define P_(s) ()
233 #endif
234 
235 void close_files ();
236 void panic P_ ((char *str,...));
237 char *__fp_name P_ ((FILE * fp));
238 FILE *ck_fopen P_ ((char *name, char *mode));
239 void ck_fwrite P_ ((char *ptr, int size, int nmemb, FILE * stream));
240 void ck_fclose P_ ((FILE * stream));
241 VOID *ck_malloc P_ ((int size));
242 VOID *ck_realloc P_ ((VOID * ptr, int size));
243 char *ck_strdup P_ ((char *str));
244 VOID *init_buffer P_ ((void));
245 void flush_buffer P_ ((VOID * bb));
246 int size_buffer P_ ((VOID * b));
247 void add_buffer P_ ((VOID * bb, char *p, int n));
248 void add1_buffer P_ ((VOID * bb, int ch));
249 char *get_buffer P_ ((VOID * bb));
250 
251 void compile_string P_ ((char *str));
252 void compile_file P_ ((char *str));
253 struct vector *compile_program P_ ((struct vector * vector, int));
254 void bad_prog P_ ((char *why));
255 int inchar P_ ((void));
256 void savchar P_ ((int ch));
257 int compile_address P_ ((struct addr * addr));
258 char * last_regex_string = 0;
259 void buffer_regex  P_ ((int slash));
260 void compile_regex P_ ((void));
261 struct sed_label *setup_jump P_ ((struct sed_label * list, struct sed_cmd * cmd, struct vector * vec));
262 FILE *compile_filename P_ ((int readit));
263 void read_file P_ ((char *name));
264 void execute_program P_ ((struct vector * vec));
265 int match_address P_ ((struct addr * addr));
266 int read_pattern_space P_ ((void));
267 void append_pattern_space P_ ((void));
268 void line_copy P_ ((struct line * from, struct line * to));
269 void line_append P_ ((struct line * from, struct line * to));
270 void str_append P_ ((struct line * to, char *string, int length));
271 void usage P_ ((int));
272 
273 extern char *myname;
274 
275 /* If set, don't write out the line unless explictly told to */
276 int no_default_output = 0;
277 
278 /* Current input line # */
279 int input_line_number = 0;
280 
281 /* Are we on the last input file? */
282 int last_input_file = 0;
283 
284 /* Have we hit EOF on the last input file?  This is used to decide if we
285    have hit the '$' address yet. */
286 int input_EOF = 0;
287 
288 /* non-zero if a quit command has been executed. */
289 int quit_cmd = 0;
290 
291 /* Have we done any replacements lately?  This is used by the 't' command. */
292 int replaced = 0;
293 
294 /* How many '{'s are we executing at the moment */
295 int program_depth = 0;
296 
297 /* The complete compiled SED program that we are going to run */
298 struct vector *the_program = 0;
299 
300 /* information about labels and jumps-to-labels.  This is used to do
301    the required backpatching after we have compiled all the scripts. */
302 struct sed_label *jumps = 0;
303 struct sed_label *labels = 0;
304 
305 /* The 'current' input line. */
306 struct line line;
307 
308 /* An input line that's been stored by later use by the program */
309 struct line hold;
310 
311 /* A 'line' to append to the current line when it comes time to write it out */
312 struct line append;
313 
314 
315 /* When we're reading a script command from a string, 'prog_start' and
316    'prog_end' point to the beginning and end of the string.  This
317    would allow us to compile script strings that contain nulls, except
318    that script strings are only read from the command line, which is
319    null-terminated */
320 unsigned char *prog_start;
321 unsigned char *prog_end;
322 
323 /* When we're reading a script command from a string, 'prog_cur' points
324    to the current character in the string */
325 unsigned char *prog_cur;
326 
327 /* This is the name of the current script file.
328    It is used for error messages. */
329 char *prog_name;
330 
331 /* This is the current script file.  If it is zero, we are reading
332    from a string stored in 'prog_start' instead.  If both 'prog_file'
333    and 'prog_start' are zero, we're in trouble! */
334 FILE *prog_file;
335 
336 /* this is the number of the current script line that we're compiling.  It is
337    used to give out useful and informative error messages. */
338 int prog_line = 1;
339 
340 /* This is the file pointer that we're currently reading data from.  It may
341    be stdin */
342 FILE *input_file;
343 
344 /* If this variable is non-zero at exit, one or more of the input
345    files couldn't be opened. */
346 
347 int bad_input = 0;
348 
349 /* 'an empty regular expression is equivalent to the last regular
350    expression read' so we have to keep track of the last regex used.
351    Here's where we store a pointer to it (it is only malloc()'d once) */
352 struct re_pattern_buffer *last_regex;
353 
354 /* Various error messages we may want to print */
355 static char ONE_ADDR[] = "Command only uses one address";
356 static char NO_ADDR[] = "Command doesn't take any addresses";
357 static char LINE_JUNK[] = "Extra characters after command";
358 static char BAD_EOF[] = "Unexpected End-of-file";
359 static char NO_REGEX[] = "No previous regular expression";
360 static char NO_COMMAND[] = "Missing command";
361 
362 static struct option longopts[] =
363 {
364   {"expression", 1, NULL, 'e'},
365   {"file", 1, NULL, 'f'},
366   {"quiet", 0, NULL, 'n'},
367   {"silent", 0, NULL, 'n'},
368   {"version", 0, NULL, 'V'},
369   {"help", 0, NULL, 'h'},
370   {NULL, 0, NULL, 0}
371 };
372 
373 void
main(argc,argv)374 main (argc, argv)
375      int argc;
376      char **argv;
377 {
378   int opt;
379   char *e_strings = NULL;
380   int compiled = 0;
381   struct sed_label *go, *lbl;
382 
383   /* see regex.h */
384   re_set_syntax (RE_SYNTAX_POSIX_BASIC);
385   rx_cache_bound = 4096;	/* Consume memory rampantly. */
386 
387   myname = argv[0];
388   while ((opt = getopt_long (argc, argv, "hne:f:V", longopts, (int *) 0))
389 	 != EOF)
390     {
391       switch (opt)
392 	{
393 	case 'n':
394 	  no_default_output = 1;
395 	  break;
396 	case 'e':
397 	  if (e_strings == NULL)
398 	    {
399 	      e_strings = ck_malloc (strlen (optarg) + 2);
400 	      strcpy (e_strings, optarg);
401 	    }
402 	  else
403 	    {
404 	      e_strings = ck_realloc (e_strings, strlen (e_strings) + strlen (optarg) + 2);
405 	      strcat (e_strings, optarg);
406 	    }
407 	  strcat (e_strings, "\n");
408 	  compiled = 1;
409 	  break;
410 	case 'f':
411 	  if (e_strings)
412 	    {
413 	      compile_string (e_strings);
414 	      free (e_strings);
415 	      e_strings = 0;
416 	    }
417 	  compile_file (optarg);
418 	  compiled = 1;
419 	  break;
420 	case 'V':
421 	  fprintf (stderr, "%s\n", version_string);
422 	  exit (0);
423 	  break;
424 	case 'h':
425 	  usage (0);
426 	  break;
427 	default:
428 	  usage (4);
429 	  break;
430 	}
431     }
432   if (e_strings)
433     {
434       compile_string (e_strings);
435       free (e_strings);
436     }
437   if (!compiled)
438     {
439       if (optind == argc)
440 	usage (4);
441       compile_string (argv[optind++]);
442     }
443 
444   for (go = jumps; go; go = go->next)
445     {
446       for (lbl = labels; lbl; lbl = lbl->next)
447 	if (!strcmp (lbl->name, go->name))
448 	  break;
449       if (*go->name && !lbl)
450 	panic ("Can't find label for jump to '%s'", go->name);
451       go->v->v[go->v_index].x.jump = lbl;
452     }
453 
454   line.length = 0;
455   line.alloc = 50;
456   line.text = ck_malloc (50);
457 
458   append.length = 0;
459   append.alloc = 50;
460   append.text = ck_malloc (50);
461 
462   hold.length = 1;
463   hold.alloc = 50;
464   hold.text = ck_malloc (50);
465   hold.text[0] = '\n';
466 
467   if (argc <= optind)
468     {
469       last_input_file++;
470       read_file ("-");
471     }
472   else
473     while (optind < argc)
474       {
475 	if (optind == argc - 1)
476 	  last_input_file++;
477 	read_file (argv[optind]);
478 	optind++;
479 	if (quit_cmd)
480 	  break;
481       }
482   close_files ();
483   if (bad_input)
484     exit (2);
485   exit (0);
486 }
487 
488 void
close_files()489 close_files ()
490 {
491   int nf;
492 
493   for (nf = 0; nf < NUM_FPS; nf++)
494     {
495       if (file_ptrs[nf].for_write)
496 	fclose (file_ptrs[nf].for_write);
497       if (file_ptrs[nf].for_read)
498 	fclose (file_ptrs[nf].for_read);
499     }
500 }
501 
502 /* 'str' is a string (from the command line) that contains a sed command.
503    Compile the command, and add it to the end of 'the_program' */
504 void
compile_string(str)505 compile_string (str)
506      char *str;
507 {
508   prog_file = 0;
509   prog_line = 0;
510   prog_start = prog_cur = (unsigned char *)str;
511   prog_end = (unsigned char *)str + strlen (str);
512   the_program = compile_program (the_program, prog_line);
513 }
514 
515 /* 'str' is the name of a file containing sed commands.  Read them in
516    and add them to the end of 'the_program' */
517 void
compile_file(str)518 compile_file (str)
519      char *str;
520 {
521   int ch;
522 
523   prog_start = prog_cur = prog_end = 0;
524   prog_name = str;
525   prog_line = 1;
526   if (str[0] == '-' && str[1] == '\0')
527     prog_file = stdin;
528   else
529     prog_file = ck_fopen (str, "r");
530   ch = getc (prog_file);
531   if (ch == '#')
532     {
533       ch = getc (prog_file);
534       if (ch == 'n')
535 	no_default_output++;
536       while (ch != EOF && ch != '\n')
537 	{
538 	  ch = getc (prog_file);
539 	  if (ch == '\\')
540 	    ch = getc (prog_file);
541 	}
542       ++prog_line;
543     }
544   else if (ch != EOF)
545     ungetc (ch, prog_file);
546   the_program = compile_program (the_program, prog_line);
547 }
548 
549 #define MORE_CMDS 40
550 
551 /* Read a program (or a subprogram within '{' '}' pairs) in and store
552    the compiled form in *'vector'  Return a pointer to the new vector.  */
553 struct vector *
compile_program(vector,open_line)554 compile_program (vector, open_line)
555      struct vector *vector;
556      int open_line;
557 {
558   struct sed_cmd *cur_cmd;
559   int ch = 0;
560   int pch;
561   int slash;
562   VOID *b;
563   unsigned char *string;
564   int num;
565 
566   if (!vector)
567     {
568       vector = (struct vector *) ck_malloc (sizeof (struct vector));
569       vector->v = (struct sed_cmd *) ck_malloc (MORE_CMDS * sizeof (struct sed_cmd));
570       vector->v_allocated = MORE_CMDS;
571       vector->v_length = 0;
572       vector->return_v = 0;
573       vector->return_i = 0;
574     }
575   for (;;)
576     {
577     skip_comment:
578       do
579 	{
580 	  pch = ch;
581 	  ch = inchar ();
582 	  if ((pch == '\\') && (ch == '\n'))
583 	    ch = inchar ();
584 	}
585       while (ch != EOF && (isblank (ch) || ch == '\n' || ch == ';'));
586       if (ch == EOF)
587 	break;
588       savchar (ch);
589 
590       if (vector->v_length == vector->v_allocated)
591 	{
592 	  vector->v = ((struct sed_cmd *)
593 		       ck_realloc ((VOID *) vector->v,
594 				   ((vector->v_length + MORE_CMDS)
595 				    * sizeof (struct sed_cmd))));
596 	  vector->v_allocated += MORE_CMDS;
597 	}
598       cur_cmd = vector->v + vector->v_length;
599       vector->v_length++;
600 
601       cur_cmd->a1.addr_type = 0;
602       cur_cmd->a2.addr_type = 0;
603       cur_cmd->aflags = 0;
604       cur_cmd->cmd = 0;
605 
606       if (compile_address (&(cur_cmd->a1)))
607 	{
608 	  ch = inchar ();
609 	  if (ch == ',')
610 	    {
611 	      do
612 		ch = inchar ();
613 	      while (ch != EOF && isblank (ch));
614 	      savchar (ch);
615 	      if (compile_address (&(cur_cmd->a2)))
616 		;
617 	      else
618 		bad_prog ("Unexpected ','");
619 	    }
620 	  else
621 	    savchar (ch);
622 	}
623       if (cur_cmd->a1.addr_type == addr_is_num
624 	  && cur_cmd->a2.addr_type == addr_is_num
625 	  && cur_cmd->a2.addr_number < cur_cmd->a1.addr_number)
626 	cur_cmd->a2.addr_number = cur_cmd->a1.addr_number;
627 
628       ch = inchar ();
629       if (ch == EOF)
630 	bad_prog (NO_COMMAND);
631     new_cmd:
632       switch (ch)
633 	{
634 	case '#':
635 	  if (cur_cmd->a1.addr_type != 0)
636 	    bad_prog (NO_ADDR);
637 	  do
638 	    {
639 	      ch = inchar ();
640 	      if (ch == '\\')
641 		ch = inchar ();
642 	    }
643 	  while (ch != EOF && ch != '\n');
644 	  vector->v_length--;
645 	  goto skip_comment;
646 	case '!':
647 	  if (cur_cmd->aflags & ADDR_BANG_BIT)
648 	    bad_prog ("Multiple '!'s");
649 	  cur_cmd->aflags |= ADDR_BANG_BIT;
650 	  do
651 	    ch = inchar ();
652 	  while (ch != EOF && isblank (ch));
653 	  if (ch == EOF)
654 	    bad_prog (NO_COMMAND);
655 #if 0
656 	  savchar (ch);
657 #endif
658 	  goto new_cmd;
659 	case 'a':
660 	case 'i':
661 	  if (cur_cmd->a2.addr_type != 0)
662 	    bad_prog (ONE_ADDR);
663 	  /* Fall Through */
664 	case 'c':
665 	  cur_cmd->cmd = ch;
666 	  if (inchar () != '\\' || inchar () != '\n')
667 	    bad_prog (LINE_JUNK);
668 	  b = init_buffer ();
669 	  while ((ch = inchar ()) != EOF && ch != '\n')
670 	    {
671 	      if (ch == '\\')
672 		ch = inchar ();
673 	      add1_buffer (b, ch);
674 	    }
675 	  if (ch != EOF)
676 	    add1_buffer (b, ch);
677 	  num = size_buffer (b);
678 	  string = (unsigned char *) ck_malloc (num);
679 	  bcopy (get_buffer (b), string, num);
680 	  flush_buffer (b);
681 	  cur_cmd->x.cmd_txt.text_len = num;
682 	  cur_cmd->x.cmd_txt.text = (char *) string;
683 	  break;
684 	case '{':
685 	  cur_cmd->cmd = ch;
686 	  program_depth++;
687 #if 0
688 	  while ((ch = inchar ()) != EOF && ch != '\n')
689 	    if (!isblank (ch))
690 	      bad_prog (LINE_JUNK);
691 #endif
692 	  cur_cmd->x.sub = compile_program ((struct vector *) 0, prog_line);
693 	  /* FOO JF is this the right thing to do?
694 			   almost.  don't forget a return addr.  -t */
695 	  cur_cmd->x.sub->return_v = vector;
696 	  cur_cmd->x.sub->return_i = vector->v_length - 1;
697 	  break;
698 	case '}':
699 	  if (!program_depth)
700 	    bad_prog ("Unexpected '}'");
701 	  --program_depth;
702 	  /* a return insn for subprograms -t */
703 	  cur_cmd->cmd = ch;
704 	  if (cur_cmd->a1.addr_type != 0)
705 	    bad_prog ("} doesn't want any addresses");
706 	  while ((ch = inchar ()) != EOF && ch != '\n' && ch != ';')
707 	    if (!isblank (ch))
708 	      bad_prog (LINE_JUNK);
709 	  return vector;
710 	case ':':
711 	  cur_cmd->cmd = ch;
712 	  if (cur_cmd->a1.addr_type != 0)
713 	    bad_prog (": doesn't want any addresses");
714 	  labels = setup_jump (labels, cur_cmd, vector);
715 	  break;
716 	case 'b':
717 	case 't':
718 	  cur_cmd->cmd = ch;
719 	  jumps = setup_jump (jumps, cur_cmd, vector);
720 	  break;
721 	case 'q':
722 	case '=':
723 	  if (cur_cmd->a2.addr_type)
724 	    bad_prog (ONE_ADDR);
725 	  /* Fall Through */
726 	case 'd':
727 	case 'D':
728 	case 'g':
729 	case 'G':
730 	case 'h':
731 	case 'H':
732 	case 'l':
733 	case 'n':
734 	case 'N':
735 	case 'p':
736 	case 'P':
737 	case 'x':
738 	  cur_cmd->cmd = ch;
739 	  do
740 	    ch = inchar ();
741 	  while (ch != EOF && isblank (ch) && ch != '\n' && ch != ';');
742 	  if (ch != '\n' && ch != ';' && ch != EOF)
743 	    bad_prog (LINE_JUNK);
744 	  break;
745 
746 	case 'r':
747 	  if (cur_cmd->a2.addr_type != 0)
748 	    bad_prog (ONE_ADDR);
749 	  /* FALL THROUGH */
750 	case 'w':
751 	  cur_cmd->cmd = ch;
752 	  cur_cmd->x.io_file = compile_filename (ch == 'r');
753 	  break;
754 
755 	case 's':
756 	  cur_cmd->cmd = ch;
757 	  slash = inchar ();
758 	  buffer_regex (slash);
759 	  compile_regex ();
760 
761 	  cur_cmd->x.cmd_regex.regx = last_regex;
762 
763 	  b = init_buffer ();
764 	  while (((ch = inchar ()) != EOF) && (ch != slash) && (ch != '\n'))
765 	    {
766 	      if (ch == '\\')
767 		{
768 		  int ci;
769 
770 		  ci = inchar ();
771 		  if (ci != EOF)
772 		    {
773 		      if (ci != '\n')
774 			add1_buffer (b, ch);
775 		      add1_buffer (b, ci);
776 		    }
777 		}
778 	      else
779 		add1_buffer (b, ch);
780 	    }
781 	  if (ch != slash)
782 	    {
783 	      if (ch == '\n' && prog_line > 1)
784 		--prog_line;
785 	      bad_prog ("Unterminated `s' command");
786 	    }
787 	  cur_cmd->x.cmd_regex.replace_length = size_buffer (b);
788 	  cur_cmd->x.cmd_regex.replacement = ck_malloc (cur_cmd->x.cmd_regex.replace_length);
789 	  bcopy (get_buffer (b), cur_cmd->x.cmd_regex.replacement, cur_cmd->x.cmd_regex.replace_length);
790 	  flush_buffer (b);
791 
792 	  cur_cmd->x.cmd_regex.flags = 0;
793 	  cur_cmd->x.cmd_regex.numb = 0;
794 
795 	  if (ch == EOF)
796 	    break;
797 	  do
798 	    {
799 	      ch = inchar ();
800 	      switch (ch)
801 		{
802 		case 'p':
803 		  if (cur_cmd->x.cmd_regex.flags & S_PRINT_BIT)
804 		    bad_prog ("multiple 'p' options to 's' command");
805 		  cur_cmd->x.cmd_regex.flags |= S_PRINT_BIT;
806 		  break;
807 		case 'g':
808 		  if (cur_cmd->x.cmd_regex.flags & S_NUM_BIT)
809 		    cur_cmd->x.cmd_regex.flags &= ~S_NUM_BIT;
810 		  if (cur_cmd->x.cmd_regex.flags & S_GLOBAL_BIT)
811 		    bad_prog ("multiple 'g' options to 's' command");
812 		  cur_cmd->x.cmd_regex.flags |= S_GLOBAL_BIT;
813 		  break;
814 		case 'w':
815 		  cur_cmd->x.cmd_regex.flags |= S_WRITE_BIT;
816 		  cur_cmd->x.cmd_regex.wio_file = compile_filename (0);
817 		  ch = '\n';
818 		  break;
819 		case '0':
820 		case '1':
821 		case '2':
822 		case '3':
823 		case '4':
824 		case '5':
825 		case '6':
826 		case '7':
827 		case '8':
828 		case '9':
829 		  if (cur_cmd->x.cmd_regex.flags & S_NUM_BIT)
830 		    bad_prog ("multiple number options to 's' command");
831 		  if ((cur_cmd->x.cmd_regex.flags & S_GLOBAL_BIT) == 0)
832 		    cur_cmd->x.cmd_regex.flags |= S_NUM_BIT;
833 		  num = 0;
834 		  while (isdigit (ch))
835 		    {
836 		      num = num * 10 + ch - '0';
837 		      ch = inchar ();
838 		    }
839 		  savchar (ch);
840 		  cur_cmd->x.cmd_regex.numb = num;
841 		  break;
842 		case '\n':
843 		case ';':
844 		case EOF:
845 		  break;
846 		default:
847 		  bad_prog ("Unknown option to 's'");
848 		  break;
849 		}
850 	    }
851 	  while (ch != EOF && ch != '\n' && ch != ';');
852 	  if (ch == EOF)
853 	    break;
854 	  break;
855 
856 	case 'y':
857 	  cur_cmd->cmd = ch;
858 	  string = (unsigned char *) ck_malloc (256);
859 	  for (num = 0; num < 256; num++)
860 	    string[num] = num;
861 	  b = init_buffer ();
862 	  slash = inchar ();
863 	  while ((ch = inchar ()) != EOF && ch != slash)
864 	    add1_buffer (b, ch);
865 	  cur_cmd->x.translate = string;
866 	  string = (unsigned char *) get_buffer (b);
867 	  for (num = size_buffer (b); num; --num)
868 	    {
869 	      ch = inchar ();
870 	      if (ch == EOF)
871 		bad_prog (BAD_EOF);
872 	      if (ch == slash)
873 		bad_prog ("strings for y command are different lengths");
874 	      cur_cmd->x.translate[*string++] = ch;
875 	    }
876 	  flush_buffer (b);
877 	  if (inchar () != slash || ((ch = inchar ()) != EOF && ch != '\n' && ch != ';'))
878 	    bad_prog (LINE_JUNK);
879 	  break;
880 
881 	default:
882 	  bad_prog ("Unknown command");
883 	}
884     }
885   if (program_depth)
886     {
887       prog_line = open_line;
888       bad_prog ("Unmatched `{'");
889     }
890   return vector;
891 }
892 
893 /* Complain about a programming error and exit. */
894 void
bad_prog(why)895 bad_prog (why)
896      char *why;
897 {
898   if (prog_line > 0)
899     fprintf (stderr, "%s: file %s line %d: %s\n",
900 	     myname, prog_name, prog_line, why);
901   else
902     fprintf (stderr, "%s: %s\n", myname, why);
903   exit (1);
904 }
905 
906 /* Read the next character from the program.  Return EOF if there isn't
907    anything to read.  Keep prog_line up to date, so error messages can
908    be meaningful. */
909 int
inchar()910 inchar ()
911 {
912   int ch;
913   if (prog_file)
914     {
915       if (feof (prog_file))
916 	return EOF;
917       else
918 	ch = getc (prog_file);
919     }
920   else
921     {
922       if (!prog_cur)
923 	return EOF;
924       else if (prog_cur == prog_end)
925 	{
926 	  ch = EOF;
927 	  prog_cur = 0;
928 	}
929       else
930 	ch = *prog_cur++;
931     }
932   if ((ch == '\n') && prog_line)
933     prog_line++;
934   return ch;
935 }
936 
937 /* unget 'ch' so the next call to inchar will return it.  'ch' must not be
938    EOF or anything nasty like that. */
939 void
savchar(ch)940 savchar (ch)
941      int ch;
942 {
943   if (ch == EOF)
944     return;
945   if (ch == '\n' && prog_line > 1)
946     --prog_line;
947   if (prog_file)
948     ungetc (ch, prog_file);
949   else
950     *--prog_cur = ch;
951 }
952 
953 
954 /* Try to read an address for a sed command.  If it succeeeds,
955    return non-zero and store the resulting address in *'addr'.
956    If the input doesn't look like an address read nothing
957    and return zero. */
958 int
compile_address(addr)959 compile_address (addr)
960      struct addr *addr;
961 {
962   int ch;
963   int num;
964 
965   ch = inchar ();
966 
967   if (isdigit (ch))
968     {
969       num = ch - '0';
970       while ((ch = inchar ()) != EOF && isdigit (ch))
971       if (ch == '~')
972 	{
973 	  addr->addr_type = addr_is_mod;
974 	  addr->offset = num;
975 	  ch = inchar();
976 	  num=0;
977 	  if (isdigit(ch)) {
978 	    num = ch - '0';
979 	    while ((ch = inchar ()) != EOF && isdigit (ch))
980 	      num = num * 10 + ch - '0';
981 	    addr->modulo = num;
982 	  }
983 	  addr->modulo += (addr->modulo==0);
984 	} else {
985 	  addr->addr_type = addr_is_num;
986 	  addr->addr_number = num;
987 	}
988 	num = num * 10 + ch - '0';
989       while (ch != EOF && isblank (ch))
990 	ch = inchar ();
991       savchar (ch);
992       return 1;
993     }
994   else if (ch == '/' || ch == '\\')
995     {
996       addr->addr_type = addr_is_regex;
997       if (ch == '\\')
998 	ch = inchar ();
999       buffer_regex (ch);
1000       compile_regex ();
1001       addr->addr_regex = last_regex;
1002       do
1003 	ch = inchar ();
1004       while (ch != EOF && isblank (ch));
1005       savchar (ch);
1006       return 1;
1007     }
1008   else if (ch == '$')
1009     {
1010       addr->addr_type = addr_is_last;
1011       do
1012 	ch = inchar ();
1013       while (ch != EOF && isblank (ch));
1014       savchar (ch);
1015       return 1;
1016     }
1017   else
1018     savchar (ch);
1019   return 0;
1020 }
1021 
1022 void
buffer_regex(slash)1023 buffer_regex (slash)
1024      int slash;
1025 {
1026   VOID *b;
1027   int ch;
1028   int char_class_pos = -1;
1029 
1030   b = init_buffer ();
1031   while ((ch = inchar ()) != EOF && (ch != slash || (char_class_pos >= 0)))
1032     {
1033       if (ch == '^')
1034 	{
1035 	  if (size_buffer (b) == 0)
1036 	    {
1037 	      add1_buffer (b, '\\');
1038 	      add1_buffer (b, '`');
1039 	    }
1040 	  else
1041 	    add1_buffer (b, ch);
1042 	  continue;
1043 	}
1044       else if (ch == '$')
1045 	{
1046 	  ch = inchar ();
1047 	  savchar (ch);
1048 	  if (ch == slash)
1049 	    {
1050 	      add1_buffer (b, '\\');
1051 	      add1_buffer (b, '\'');
1052 	    }
1053 	  else
1054 	    add1_buffer (b, '$');
1055 	  continue;
1056 	}
1057       else if (ch == '[')
1058 	{
1059 	  if (char_class_pos < 0)
1060 	    char_class_pos = size_buffer (b);
1061 	  add1_buffer (b, ch);
1062 	  continue;
1063 	}
1064       else if (ch == ']')
1065 	{
1066 	  add1_buffer (b, ch);
1067 	  {
1068 	    char * regexp = get_buffer (b);
1069 	    int pos = size_buffer (b) - 1;
1070 	    if (!(   (char_class_pos >= 0)
1071 		  && (   (pos == char_class_pos + 1)
1072 		      || (   (pos == char_class_pos + 2)
1073 			  && (regexp[char_class_pos + 1] == '^')))))
1074 	      char_class_pos = -1;
1075 	    continue;
1076 	  }
1077 	}
1078       else if (ch != '\\' || (char_class_pos >= 0))
1079 	{
1080 	  add1_buffer (b, ch);
1081 	  continue;
1082 	}
1083       ch = inchar ();
1084       switch (ch)
1085 	{
1086 	case 'n':
1087 	  add1_buffer (b, '\n');
1088 	  break;
1089 #if 0
1090 	case 'b':
1091 	  add1_buffer (b, '\b');
1092 	  break;
1093 	case 'f':
1094 	  add1_buffer (b, '\f');
1095 	  break;
1096 	case 'r':
1097 	  add1_buffer (b, '\r');
1098 	  break;
1099 	case 't':
1100 	  add1_buffer (b, '\t');
1101 	  break;
1102 #endif /* 0 */
1103 	case EOF:
1104 	  break;
1105 	default:
1106 	  add1_buffer (b, '\\');
1107 	  add1_buffer (b, ch);
1108 	  break;
1109 	}
1110     }
1111   if (ch == EOF)
1112     bad_prog (BAD_EOF);
1113   if (size_buffer (b))
1114     {
1115       if (last_regex_string)
1116 	last_regex_string = (char *)ck_realloc (last_regex_string,
1117 						size_buffer (b) + 1);
1118       else
1119 	last_regex_string = (char *)ck_malloc (size_buffer (b) + 1);
1120       bcopy (get_buffer (b), last_regex_string, size_buffer (b));
1121       last_regex_string [size_buffer (b)] = 0;
1122     }
1123   else if (!last_regex)
1124     bad_prog (NO_REGEX);
1125   flush_buffer (b);
1126 }
1127 
1128 void
compile_regex()1129 compile_regex ()
1130 {
1131   const char * error;
1132   last_regex = ((struct re_pattern_buffer *)
1133 		ck_malloc (sizeof (struct re_pattern_buffer)));
1134   bzero (last_regex, sizeof (*last_regex));
1135   last_regex->fastmap = ck_malloc (256);
1136   error = re_compile_pattern (last_regex_string,
1137 			      strlen (last_regex_string), last_regex);
1138   if (error)
1139     bad_prog ((char *)error);
1140 }
1141 
1142 /* Store a label (or label reference) created by a ':', 'b', or 't'
1143    comand so that the jump to/from the lable can be backpatched after
1144    compilation is complete */
1145 struct sed_label *
setup_jump(list,cmd,vec)1146 setup_jump (list, cmd, vec)
1147      struct sed_label *list;
1148      struct sed_cmd *cmd;
1149      struct vector *vec;
1150 {
1151   struct sed_label *tmp;
1152   VOID *b;
1153   int ch;
1154 
1155   b = init_buffer ();
1156   while ((ch = inchar ()) != EOF && isblank (ch))
1157     ;
1158   /* Possible non posixicity. */
1159   while (ch != EOF && ch != '\n' && (!isblank (ch)) && ch != ';' && ch != '}')
1160     {
1161       add1_buffer (b, ch);
1162       ch = inchar ();
1163     }
1164   savchar (ch);
1165   add1_buffer (b, '\0');
1166   tmp = (struct sed_label *) ck_malloc (sizeof (struct sed_label));
1167   tmp->v = vec;
1168   tmp->v_index = cmd - vec->v;
1169   tmp->name = ck_strdup (get_buffer (b));
1170   tmp->next = list;
1171   flush_buffer (b);
1172   return tmp;
1173 }
1174 
1175 /* read in a filename for a 'r', 'w', or 's///w' command, and
1176    update the internal structure about files.  The file is
1177    opened if it isn't already open. */
1178 FILE *
compile_filename(readit)1179 compile_filename (readit)
1180      int readit;
1181 {
1182   char *file_name;
1183   int n;
1184   VOID *b;
1185   int ch;
1186 
1187   if (inchar () != ' ')
1188     bad_prog ("missing ' ' before filename");
1189   b = init_buffer ();
1190   while ((ch = inchar ()) != EOF && ch != '\n')
1191     add1_buffer (b, ch);
1192   add1_buffer (b, '\0');
1193   file_name = get_buffer (b);
1194   for (n = 0; n < NUM_FPS; n++)
1195     {
1196       if (!file_ptrs[n].name)
1197 	break;
1198     }
1199   if (n < NUM_FPS)
1200     {
1201       file_ptrs[n].name = ck_strdup (file_name);
1202       if (!readit)
1203 	{
1204 	  if (!file_ptrs[n].for_write)
1205 	    file_ptrs[n].for_write = ck_fopen (file_name, "w");
1206 	}
1207       else
1208 	{
1209 	  if (!file_ptrs[n].for_read)
1210 	    file_ptrs[n].for_read = fopen (file_name, "r");
1211 	}
1212       flush_buffer (b);
1213       return readit ? file_ptrs[n].for_read : file_ptrs[n].for_write;
1214     }
1215   else
1216     {
1217       bad_prog ("Hopelessely evil compiled in limit on number of open files.  re-compile sed");
1218       return 0;
1219     }
1220 }
1221 
1222 /* Read a file and apply the compiled script to it. */
1223 void
read_file(name)1224 read_file (name)
1225      char *name;
1226 {
1227   if (*name == '-' && name[1] == '\0')
1228     input_file = stdin;
1229   else
1230     {
1231       input_file = fopen (name, "r");
1232       if (input_file == 0)
1233 	{
1234 	  extern int errno;
1235 	  extern char *sys_errlist[];
1236 	  extern int sys_nerr;
1237 
1238 	  char *ptr;
1239 
1240 	  ptr = ((errno >= 0 && errno < sys_nerr)
1241 		 ? sys_errlist[errno] : "Unknown error code");
1242 	  bad_input++;
1243 	  fprintf (stderr, "%s: can't read %s: %s\n", myname, name, ptr);
1244 	  return;
1245 	}
1246     }
1247 
1248   while (read_pattern_space ())
1249     {
1250       execute_program (the_program);
1251       if (!no_default_output)
1252 	ck_fwrite (line.text, 1, line.length, stdout);
1253       if (append.length)
1254 	{
1255 	  ck_fwrite (append.text, 1, append.length, stdout);
1256 	  append.length = 0;
1257 	}
1258       if (quit_cmd)
1259 	break;
1260     }
1261   ck_fclose (input_file);
1262 }
1263 
1264 static char *
eol_pos(str,len)1265 eol_pos (str, len)
1266      char *str;
1267      int len;
1268 {
1269   while (len--)
1270     if (*str++ == '\n')
1271       return --str;
1272   return --str;
1273 }
1274 
1275 static void
chr_copy(dest,src,len)1276 chr_copy (dest, src, len)
1277      char *dest;
1278      char *src;
1279      int len;
1280 {
1281   while (len--)
1282     *dest++ = *src++;
1283 }
1284 
1285 /* Execute the program 'vec' on the current input line. */
1286 static struct re_registers regs =
1287 {0, 0, 0};
1288 
1289 void
execute_program(vec)1290 execute_program (vec)
1291      struct vector *vec;
1292 {
1293   struct sed_cmd *cur_cmd;
1294   int n;
1295   int addr_matched;
1296   static int end_cycle;
1297 
1298   int start;
1299   int remain;
1300   int offset;
1301 
1302   static struct line tmp;
1303   struct line t;
1304   char *rep, *rep_end, *rep_next, *rep_cur;
1305 
1306   int count;
1307   struct vector *restart_vec = vec;
1308 
1309 restart:
1310   vec = restart_vec;
1311   count = 0;
1312 
1313   end_cycle = 0;
1314 
1315   for (cur_cmd = vec->v, n = vec->v_length; n; cur_cmd++, n--)
1316     {
1317     exe_loop:
1318       addr_matched = 0;
1319       if (cur_cmd->aflags & A1_MATCHED_BIT)
1320 	{
1321 	  addr_matched = 1;
1322 	  if (match_address (&(cur_cmd->a2)))
1323 	    cur_cmd->aflags &= ~A1_MATCHED_BIT;
1324 	}
1325       else if (match_address (&(cur_cmd->a1)))
1326 	{
1327 	  addr_matched = 1;
1328 	  if (cur_cmd->a2.addr_type != addr_is_null)
1329 	    if (   (cur_cmd->a2.addr_type == addr_is_regex)
1330 		|| !match_address (&(cur_cmd->a2)))
1331 	      cur_cmd->aflags |= A1_MATCHED_BIT;
1332 
1333 	}
1334       if (cur_cmd->aflags & ADDR_BANG_BIT)
1335 	addr_matched = !addr_matched;
1336       if (!addr_matched)
1337 	continue;
1338       switch (cur_cmd->cmd)
1339 	{
1340 	case '{':		/* Execute sub-program */
1341 	  if (cur_cmd->x.sub->v_length)
1342 	    {
1343 	      vec = cur_cmd->x.sub;
1344 	      cur_cmd = vec->v;
1345 	      n = vec->v_length;
1346 	      goto exe_loop;
1347 	    }
1348 	  break;
1349 
1350 	case '}':
1351 	  cur_cmd = vec->return_v->v + vec->return_i;
1352 	  n = vec->return_v->v_length - vec->return_i;
1353 	  vec = vec->return_v;
1354 	  break;
1355 
1356 	case ':':		/* Executing labels is easy. */
1357 	  break;
1358 
1359 	case '=':
1360 	  printf ("%d\n", input_line_number);
1361 	  break;
1362 
1363 	case 'a':
1364 	  while (append.alloc - append.length < cur_cmd->x.cmd_txt.text_len)
1365 	    {
1366 	      append.alloc *= 2;
1367 	      append.text = ck_realloc (append.text, append.alloc);
1368 	    }
1369 	  bcopy (cur_cmd->x.cmd_txt.text,
1370 		 append.text + append.length, cur_cmd->x.cmd_txt.text_len);
1371 	  append.length += cur_cmd->x.cmd_txt.text_len;
1372 	  break;
1373 
1374 	case 'b':
1375 	  if (!cur_cmd->x.jump)
1376 	    end_cycle++;
1377 	  else
1378 	    {
1379 	      struct sed_label *j = cur_cmd->x.jump;
1380 
1381 	      n = j->v->v_length - j->v_index;
1382 	      cur_cmd = j->v->v + j->v_index;
1383 	      vec = j->v;
1384 	      goto exe_loop;
1385 	    }
1386 	  break;
1387 
1388 	case 'c':
1389 	  line.length = 0;
1390 	  if (!((cur_cmd->aflags & A1_MATCHED_BIT)))
1391 	    ck_fwrite (cur_cmd->x.cmd_txt.text,
1392 		       1, cur_cmd->x.cmd_txt.text_len, stdout);
1393 	  end_cycle++;
1394 	  break;
1395 
1396 	case 'd':
1397 	  line.length = 0;
1398 	  end_cycle++;
1399 	  break;
1400 
1401 	case 'D':
1402 	  {
1403 	    char *tmp;
1404 	    int newlength;
1405 
1406 	    tmp = eol_pos (line.text, line.length);
1407 	    newlength = line.length - (tmp - line.text) - 1;
1408 	    if (newlength)
1409 	      {
1410 		chr_copy (line.text, tmp + 1, newlength);
1411 		line.length = newlength;
1412 		goto restart;
1413 	      }
1414 	    line.length = 0;
1415 	    end_cycle++;
1416 	  }
1417 	  break;
1418 
1419 	case 'g':
1420 	  line_copy (&hold, &line);
1421 	  break;
1422 
1423 	case 'G':
1424 	  line_append (&hold, &line);
1425 	  break;
1426 
1427 	case 'h':
1428 	  line_copy (&line, &hold);
1429 	  break;
1430 
1431 	case 'H':
1432 	  line_append (&line, &hold);
1433 	  break;
1434 
1435 	case 'i':
1436 	  ck_fwrite (cur_cmd->x.cmd_txt.text, 1,
1437 		     cur_cmd->x.cmd_txt.text_len, stdout);
1438 	  break;
1439 
1440 	case 'l':
1441 	  {
1442 	    char *tmp;
1443 	    int n;
1444 	    int width = 0;
1445 
1446 	    n = line.length;
1447 	    tmp = line.text;
1448 	    while (n--)
1449 	      {
1450 		/* Skip the trailing newline, if there is one */
1451 		if (!n && (*tmp == '\n'))
1452 		  break;
1453 		if (width > 77)
1454 		  {
1455 		    width = 0;
1456 		    putchar ('\n');
1457 		  }
1458 		if (*tmp == '\\')
1459 		  {
1460 		    printf ("\\\\");
1461 		    width += 2;
1462 		  }
1463 		else if (isprint (*tmp))
1464 		  {
1465 		    putchar (*tmp);
1466 		    width++;
1467 		  }
1468 		else
1469 		  switch (*tmp)
1470 		    {
1471 #if 0
1472 		      /* Should print \00 instead of \0 because (a) POSIX */
1473 		      /* requires it, and (b) this way \01 is unambiguous.  */
1474 		    case '\0':
1475 		      printf ("\\0");
1476 		      width += 2;
1477 		      break;
1478 #endif
1479 		    case 007:
1480 		      printf ("\\a");
1481 		      width += 2;
1482 		      break;
1483 		    case '\b':
1484 		      printf ("\\b");
1485 		      width += 2;
1486 		      break;
1487 		    case '\f':
1488 		      printf ("\\f");
1489 		      width += 2;
1490 		      break;
1491 		    case '\n':
1492 		      printf ("\\n");
1493 		      width += 2;
1494 		      break;
1495 		    case '\r':
1496 		      printf ("\\r");
1497 		      width += 2;
1498 		      break;
1499 		    case '\t':
1500 		      printf ("\\t");
1501 		      width += 2;
1502 		      break;
1503 		    case '\v':
1504 		      printf ("\\v");
1505 		      width += 2;
1506 		      break;
1507 		    default:
1508 		      printf ("\\%02x", (*tmp) & 0xFF);
1509 		      width += 2;
1510 		      break;
1511 		    }
1512 		tmp++;
1513 	      }
1514 	    putchar ('\n');
1515 	  }
1516 	  break;
1517 
1518 	case 'n':
1519 	  if (feof (input_file))
1520 	    goto quit;
1521 	  if (!no_default_output)
1522 	    ck_fwrite (line.text, 1, line.length, stdout);
1523 	  read_pattern_space ();
1524 	  break;
1525 
1526 	case 'N':
1527 	  if (feof (input_file))
1528 	    {
1529 	      line.length = 0;
1530 	      goto quit;
1531 	    }
1532 	  append_pattern_space ();
1533 	  break;
1534 
1535 	case 'p':
1536 	  ck_fwrite (line.text, 1, line.length, stdout);
1537 	  break;
1538 
1539 	case 'P':
1540 	  {
1541 	    char *tmp;
1542 
1543 	    tmp = eol_pos (line.text, line.length);
1544 	    ck_fwrite (line.text, 1,
1545 		       tmp ? tmp - line.text + 1
1546 		       : line.length, stdout);
1547 	  }
1548 	  break;
1549 
1550 	case 'q':
1551 	quit:
1552 	  quit_cmd++;
1553 	  end_cycle++;
1554 	  break;
1555 
1556 	case 'r':
1557 	  {
1558 	    int n = 0;
1559 
1560 	    if (cur_cmd->x.io_file)
1561 	      {
1562 		rewind (cur_cmd->x.io_file);
1563 		do
1564 		  {
1565 		    append.length += n;
1566 		    if (append.length == append.alloc)
1567 		      {
1568 			append.alloc *= 2;
1569 			append.text = ck_realloc (append.text, append.alloc);
1570 		      }
1571 		    n = fread (append.text + append.length, sizeof (char),
1572 			       append.alloc - append.length,
1573 			       cur_cmd->x.io_file);
1574 		  }
1575 		while (n > 0);
1576 		if (ferror (cur_cmd->x.io_file))
1577 		  panic ("Read error on input file to 'r' command");
1578 	      }
1579 	  }
1580 	  break;
1581 
1582 	case 's':
1583 	  {
1584 	    int trail_nl_p = line.text [line.length - 1] == '\n';
1585 	    if (!tmp.alloc)
1586 	      {
1587 		tmp.alloc = 50;
1588 		tmp.text = ck_malloc (50);
1589 	      }
1590 	    count = 0;
1591 	    start = 0;
1592 	    remain = line.length - trail_nl_p;
1593 	    tmp.length = 0;
1594 	    rep = cur_cmd->x.cmd_regex.replacement;
1595 	    rep_end = rep + cur_cmd->x.cmd_regex.replace_length;
1596 
1597 	    while ((offset = re_search (cur_cmd->x.cmd_regex.regx,
1598 					line.text,
1599 					line.length - trail_nl_p,
1600 					start,
1601 					remain,
1602 					&regs)) >= 0)
1603 	      {
1604 		count++;
1605 		if (offset - start)
1606 		  str_append (&tmp, line.text + start, offset - start);
1607 
1608 		if (cur_cmd->x.cmd_regex.flags & S_NUM_BIT)
1609 		  {
1610 		    if (count != cur_cmd->x.cmd_regex.numb)
1611 		      {
1612 			int matched = regs.end[0] - regs.start[0];
1613 			if (!matched) matched = 1;
1614 			str_append (&tmp, line.text + regs.start[0], matched);
1615 			start = (offset == regs.end[0]
1616 				 ? offset + 1 : regs.end[0]);
1617 			remain = (line.length - trail_nl_p) - start;
1618 			continue;
1619 		      }
1620 		  }
1621 
1622 		for (rep_next = rep_cur = rep; rep_next < rep_end; rep_next++)
1623 		  {
1624 		    if (*rep_next == '&')
1625 		      {
1626 			if (rep_next - rep_cur)
1627 			  str_append (&tmp, rep_cur, rep_next - rep_cur);
1628 			str_append (&tmp, line.text + regs.start[0], regs.end[0] - regs.start[0]);
1629 			rep_cur = rep_next + 1;
1630 		      }
1631 		    else if (*rep_next == '\\')
1632 		      {
1633 			if (rep_next - rep_cur)
1634 			  str_append (&tmp, rep_cur, rep_next - rep_cur);
1635 			rep_next++;
1636 			if (rep_next != rep_end)
1637 			  {
1638 			    int n;
1639 
1640 			    if (*rep_next >= '0' && *rep_next <= '9')
1641 			      {
1642 				n = *rep_next - '0';
1643 				str_append (&tmp, line.text + regs.start[n], regs.end[n] - regs.start[n]);
1644 			      }
1645 			    else
1646 			      str_append (&tmp, rep_next, 1);
1647 			  }
1648 			rep_cur = rep_next + 1;
1649 		      }
1650 		  }
1651 		if (rep_next - rep_cur)
1652 		  str_append (&tmp, rep_cur, rep_next - rep_cur);
1653 		if (offset == regs.end[0])
1654 		  {
1655 		    str_append (&tmp, line.text + offset, 1);
1656 		    ++regs.end[0];
1657 		  }
1658 		start = regs.end[0];
1659 
1660 		remain = (line.length - trail_nl_p) - start;
1661 		if (remain < 0)
1662 		  break;
1663 		if (!(cur_cmd->x.cmd_regex.flags & S_GLOBAL_BIT))
1664 		  break;
1665 	      }
1666 	    if (!count)
1667 	      break;
1668 	    replaced = 1;
1669 	    str_append (&tmp, line.text + start, remain + trail_nl_p);
1670 	    t.text = line.text;
1671 	    t.length = line.length;
1672 	    t.alloc = line.alloc;
1673 	    line.text = tmp.text;
1674 	    line.length = tmp.length;
1675 	    line.alloc = tmp.alloc;
1676 	    tmp.text = t.text;
1677 	    tmp.length = t.length;
1678 	    tmp.alloc = t.alloc;
1679 	    if ((cur_cmd->x.cmd_regex.flags & S_WRITE_BIT)
1680 		&& cur_cmd->x.cmd_regex.wio_file)
1681 	      ck_fwrite (line.text, 1, line.length,
1682 			 cur_cmd->x.cmd_regex.wio_file);
1683 	    if (cur_cmd->x.cmd_regex.flags & S_PRINT_BIT)
1684 	      ck_fwrite (line.text, 1, line.length, stdout);
1685 	    break;
1686 	  }
1687 
1688 	case 't':
1689 	  if (replaced)
1690 	    {
1691 	      replaced = 0;
1692 	      if (!cur_cmd->x.jump)
1693 		end_cycle++;
1694 	      else
1695 		{
1696 		  struct sed_label *j = cur_cmd->x.jump;
1697 
1698 		  n = j->v->v_length - j->v_index;
1699 		  cur_cmd = j->v->v + j->v_index;
1700 		  vec = j->v;
1701 		  goto exe_loop;
1702 		}
1703 	    }
1704 	  break;
1705 
1706 	case 'w':
1707 	  if (cur_cmd->x.io_file)
1708 	    {
1709 	      ck_fwrite (line.text, 1, line.length, cur_cmd->x.io_file);
1710 	      fflush (cur_cmd->x.io_file);
1711 	    }
1712 	  break;
1713 
1714 	case 'x':
1715 	  {
1716 	    struct line tmp;
1717 
1718 	    tmp = line;
1719 	    line = hold;
1720 	    hold = tmp;
1721 	  }
1722 	  break;
1723 
1724 	case 'y':
1725 	  {
1726 	    unsigned char *p, *e;
1727 
1728 	    for (p = (unsigned char *) (line.text), e = p + line.length;
1729 		 p < e;
1730 		 p++)
1731 	      *p = cur_cmd->x.translate[*p];
1732 	  }
1733 	  break;
1734 
1735 	default:
1736 	  panic ("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1737 	}
1738       if (end_cycle)
1739 	break;
1740     }
1741 }
1742 
1743 
1744 /* Return non-zero if the current line matches the address
1745    pointed to by 'addr'. */
1746 int
match_address(addr)1747 match_address (addr)
1748      struct addr *addr;
1749 {
1750   switch (addr->addr_type)
1751     {
1752     case addr_is_null:
1753       return 1;
1754     case addr_is_num:
1755       return (input_line_number == addr->addr_number);
1756     case addr_is_mod:
1757       return ((input_line_number%addr->modulo) == addr->offset);
1758 
1759 
1760     case addr_is_regex:
1761       {
1762 	int trail_nl_p = line.text [line.length - 1] == '\n';
1763 	return (re_search (addr->addr_regex,
1764 			   line.text,
1765 			   line.length - trail_nl_p,
1766 			   0,
1767 			   line.length - trail_nl_p,
1768 			   (struct re_registers *) 0) >= 0) ? 1 : 0;
1769       }
1770     case addr_is_last:
1771       return (input_EOF) ? 1 : 0;
1772 
1773     default:
1774       panic ("INTERNAL ERROR: bad address type");
1775       break;
1776     }
1777   return -1;
1778 }
1779 
1780 /* Read in the next line of input, and store it in the
1781    pattern space.  Return non-zero if this is the last line of input */
1782 
1783 int
read_pattern_space()1784 read_pattern_space ()
1785 {
1786   int n;
1787   char *p;
1788   int ch;
1789 
1790   p = line.text;
1791   n = line.alloc;
1792 
1793   if (feof (input_file))
1794     return 0;
1795   input_line_number++;
1796   replaced = 0;
1797   for (;;)
1798     {
1799       if (n == 0)
1800 	{
1801 	  line.text = ck_realloc (line.text, line.alloc * 2);
1802 	  p = line.text + line.alloc;
1803 	  n = line.alloc;
1804 	  line.alloc *= 2;
1805 	}
1806       ch = getc (input_file);
1807       if (ch == EOF)
1808 	{
1809 	  if (n == line.alloc)
1810 	    return 0;
1811 	  /* *p++ = '\n'; */
1812 	  /* --n; */
1813 	  line.length = line.alloc - n;
1814 	  if (last_input_file)
1815 	    input_EOF++;
1816 	  return 1;
1817 	}
1818       *p++ = ch;
1819       --n;
1820       if (ch == '\n')
1821 	{
1822 	  line.length = line.alloc - n;
1823 	  break;
1824 	}
1825     }
1826   ch = getc (input_file);
1827   if (ch != EOF)
1828     ungetc (ch, input_file);
1829   else if (last_input_file)
1830     input_EOF++;
1831   return 1;
1832 }
1833 
1834 /* Inplement the 'N' command, which appends the next line of input to
1835    the pattern space. */
1836 void
append_pattern_space()1837 append_pattern_space ()
1838 {
1839   char *p;
1840   int n;
1841   int ch;
1842 
1843   p = line.text + line.length;
1844   n = line.alloc - line.length;
1845 
1846   input_line_number++;
1847   replaced = 0;
1848   for (;;)
1849     {
1850       ch = getc (input_file);
1851       if (ch == EOF)
1852 	{
1853 	  if (n == line.alloc)
1854 	    return;
1855 	  /* *p++ = '\n'; */
1856 	  /* --n; */
1857 	  line.length = line.alloc - n;
1858 	  if (last_input_file)
1859 	    input_EOF++;
1860 	  return;
1861 	}
1862       if (n == 0)
1863 	{
1864 	  line.text = ck_realloc (line.text, line.alloc * 2);
1865 	  p = line.text + line.alloc;
1866 	  n = line.alloc;
1867 	  line.alloc *= 2;
1868 	}
1869       *p++ = ch;
1870       --n;
1871       if (ch == '\n')
1872 	{
1873 	  line.length = line.alloc - n;
1874 	  break;
1875 	}
1876     }
1877   ch = getc (input_file);
1878   if (ch != EOF)
1879     ungetc (ch, input_file);
1880   else if (last_input_file)
1881     input_EOF++;
1882 }
1883 
1884 /* Copy the contents of the line 'from' into the line 'to'.
1885    This destroys the old contents of 'to'.  It will still work
1886    if the line 'from' contains nulls. */
1887 void
line_copy(from,to)1888 line_copy (from, to)
1889      struct line *from, *to;
1890 {
1891   if (from->length > to->alloc)
1892     {
1893       to->alloc = from->length;
1894       to->text = ck_realloc (to->text, to->alloc);
1895     }
1896   bcopy (from->text, to->text, from->length);
1897   to->length = from->length;
1898 }
1899 
1900 /* Append the contents of the line 'from' to the line 'to'.
1901    This routine will work even if the line 'from' contains nulls */
1902 void
line_append(from,to)1903 line_append (from, to)
1904      struct line *from, *to;
1905 {
1906   if (from->length > (to->alloc - to->length))
1907     {
1908       to->alloc += from->length;
1909       to->text = ck_realloc (to->text, to->alloc);
1910     }
1911   bcopy (from->text, to->text + to->length, from->length);
1912   to->length += from->length;
1913 }
1914 
1915 /* Append 'length' bytes from 'string' to the line 'to'
1916    This routine *will* append bytes with nulls in them, without
1917    failing. */
1918 void
str_append(to,string,length)1919 str_append (to, string, length)
1920      struct line *to;
1921      char *string;
1922      int length;
1923 {
1924   if (length > to->alloc - to->length)
1925     {
1926       to->alloc += length;
1927       to->text = ck_realloc (to->text, to->alloc);
1928     }
1929   bcopy (string, to->text + to->length, length);
1930   to->length += length;
1931 }
1932 
1933 void
usage(status)1934 usage (status)
1935      int status;
1936 {
1937   fprintf (status ? stderr : stdout, "\
1938 Usage: %s [-nV] [--quiet] [--silent] [--version] [-e script]\n\
1939         [-f script-file] [--expression=script] [--file=script-file] [file...]\n",
1940 	   myname);
1941   exit (status);
1942 }
1943