1 /*
2  * sed -- stream  editor
3  *
4  *
5  */
6 #include <u.h>
7 #include <libc.h>
8 #include <bio.h>
9 #include <regexp.h>
10 
11 enum {
12 	DEPTH		= 20,		/* max nesting depth of {} */
13 	MAXCMDS		= 512,		/* max sed commands */
14 	ADDSIZE		= 10000,	/* size of add & read buffer */
15 	MAXADDS		= 20,		/* max pending adds and reads */
16 	LBSIZE		= 8192,		/* input line size */
17 	LABSIZE		= 50,		/* max label name size */
18 	MAXSUB		= 10,		/* max number of sub reg exp */
19 	MAXFILES	= 120		/* max output files */
20 };
21 	/* An address is a line #, a R.E., "$", a reference to the last
22 	 * R.E., or nothing.
23 	 */
24 typedef struct	{
25 	enum {
26 		A_NONE,
27 		A_DOL,
28 		A_LINE,
29 		A_RE,
30 		A_LAST
31 	}type;
32 	union {
33 		long line;		/* Line # */
34 		Reprog *rp;		/* Compiled R.E. */
35 	} u;
36 } Addr;
37 
38 typedef struct	SEDCOM {
39 	Addr	ad1;			/* optional start address */
40 	Addr	ad2;			/* optional end address */
41 	union {
42 		Reprog	*re1;		/* compiled R.E. */
43 		Rune	*text;		/* added text or file name */
44 		struct	SEDCOM	*lb1;	/* destination command of branch */
45 	} u;
46 	Rune	*rhs;			/* Right-hand side of substitution */
47 	Biobuf*	fcode;			/* File ID for read and write */
48 	char	command;		/* command code -see below */
49 	char	gfl;			/* 'Global' flag for substitutions */
50 	char	pfl;			/* 'print' flag for substitutions */
51 	char	active;			/* 1 => data between start and end */
52 	char	negfl;			/* negation flag */
53 } SedCom;
54 
55 	/* Command Codes for field SedCom.command */
56 #define ACOM	01
57 #define BCOM	020
58 #define CCOM	02
59 #define	CDCOM	025
60 #define	CNCOM	022
61 #define COCOM	017
62 #define	CPCOM	023
63 #define DCOM	03
64 #define ECOM	015
65 #define EQCOM	013
66 #define FCOM	016
67 #define GCOM	027
68 #define CGCOM	030
69 #define HCOM	031
70 #define CHCOM	032
71 #define ICOM	04
72 #define LCOM	05
73 #define NCOM	012
74 #define PCOM	010
75 #define QCOM	011
76 #define RCOM	06
77 #define SCOM	07
78 #define TCOM	021
79 #define WCOM	014
80 #define	CWCOM	024
81 #define	YCOM	026
82 #define XCOM	033
83 
84 
85 typedef struct label {			/* Label symbol table */
86 	Rune	asc[9];			/* Label name */
87 	SedCom	*chain;
88 	SedCom	*address;		/* Command associated with label */
89 } Label;
90 
91 typedef	struct	FILE_CACHE {		/* Data file control block */
92 	struct FILE_CACHE *next;	/* Forward Link */
93 	char 		  *name;	/* Name of file */
94 } FileCache;
95 
96 SedCom pspace[MAXCMDS];			/* Command storage */
97 SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
98 SedCom *rep = pspace;			/* Current fill point */
99 
100 Reprog	*lastre = 0;			/* Last regular expression */
101 Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
102 
103 Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
104 Rune	*addend = addspace+ADDSIZE;
105 
106 SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
107 SedCom	**aptr = abuf;
108 
109 struct {				/* Sed program input control block */
110 	enum PTYPE 			/* Either on command line or in file */
111 		{ P_ARG,
112 		  P_FILE
113 		} type;
114 	union PCTL {			/* Pointer to data */
115 		Biobuf	*bp;
116 		char	*curr;
117 	} pctl;
118 } prog;
119 
120 Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
121 
122 FileCache	*fhead = 0;		/* Head of File Cache Chain */
123 FileCache	*ftail = 0;		/* Tail of File Cache Chain */
124 
125 Rune	*loc1;				/* Start of pattern match */
126 Rune	*loc2;				/* End of pattern match */
127 Rune	seof;				/* Pattern delimiter char */
128 
129 Rune	linebuf[LBSIZE+1];		/* Input data buffer */
130 Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
131 Rune	*spend = linebuf;			/* End of input data */
132 Rune	*cp;				/* Current scan point in linebuf */
133 
134 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
135 Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
136 Rune	*hspend = holdsp;		/* End of hold data */
137 
138 int	nflag;				/* Command line flags */
139 int	gflag;
140 int	lflag;
141 
142 int	dolflag;			/* Set when at true EOF */
143 int	sflag;				/* Set when substitution done */
144 int	jflag;				/* Set when jump required */
145 int	delflag;			/* Delete current line when set */
146 
147 long	lnum = 0;			/* Input line count */
148 
149 char	fname[MAXFILES][40];		/* File name cache */
150 Biobuf	*fcode[MAXFILES];		/* File ID cache */
151 int	nfiles = 0;			/* Cache fill point */
152 
153 Biobuf	fout;				/* Output stream */
154 Biobuf	bstdin;				/* Default input */
155 Biobuf*	f = 0;				/* Input data */
156 
157 Label	ltab[LABSIZE];			/* Label name symbol table */
158 Label	*labend = ltab+LABSIZE;		/* End of label table */
159 Label	*lab = ltab+1;			/* Current Fill point */
160 
161 int	depth = 0;			/* {} stack pointer */
162 
163 Rune	bad;				/* Dummy err ptr reference */
164 Rune	*badp = &bad;
165 
166 
167 char	CGMES[]	 = 	"Command garbled: %S";
168 char	TMMES[]	 = 	"Too much text: %S";
169 char	LTL[]	 = 	"Label too long: %S";
170 char	AD0MES[] =	"No addresses allowed: %S";
171 char	AD1MES[] =	"Only one address allowed: %S";
172 
173 void	address(Addr *);
174 void	arout(void);
175 int	cmp(char *, char *);
176 int	rcmp(Rune *, Rune *);
177 void	command(SedCom *);
178 Reprog	*compile(void);
179 Rune	*compsub(Rune *, Rune *);
180 void	dechain(void);
181 void	dosub(Rune *);
182 int	ecmp(Rune *, Rune *, int);
183 void	enroll(char *);
184 void	errexit(void);
185 int	executable(SedCom *);
186 void	execute(void);
187 void	fcomp(void);
188 long	getrune(void);
189 Rune	*gline(Rune *);
190 int	match(Reprog *, Rune *);
191 void	newfile(enum PTYPE, char *);
192 int 	opendata(void);
193 Biobuf	*open_file(char *);
194 Rune	*place(Rune *, Rune *, Rune *);
195 void	quit(char *, char *);
196 int	rline(Rune *, Rune *);
197 Label	*search(Label *);
198 int	substitute(SedCom *);
199 char	*text(char *);
200 Rune	*stext(Rune *, Rune *);
201 int	ycomp(SedCom *);
202 char *	trans(int c);
203 void	putline(Biobuf *bp, Rune *buf, int n);
204 
205 void
main(int argc,char ** argv)206 main(int argc, char **argv)
207 {
208 	int	compfl;
209 
210 	lnum = 0;
211 	Binit(&fout, 1, OWRITE);
212 	fcode[nfiles++] = &fout;
213 	compfl = 0;
214 
215 	if(argc == 1)
216 		exits(0);
217 	ARGBEGIN{
218 		case 'n':
219 			nflag++;
220 			continue;
221 		case 'f':
222 			if(argc <= 1)
223 				quit("no pattern-file", 0);
224 			newfile(P_FILE, ARGF());
225 			fcomp();
226 			compfl = 1;
227 			continue;
228 		case 'e':
229 			if (argc <= 1)
230 				quit("missing pattern", 0);
231 			newfile(P_ARG, ARGF());
232 			fcomp();
233 			compfl = 1;
234 			continue;
235 		case 'g':
236 			gflag++;
237 			continue;
238 		case 'l':
239 			lflag++;
240 			continue;
241 		default:
242 			fprint(2, "sed: Unknown flag: %c\n", ARGC());
243 			continue;
244 	} ARGEND
245 
246 	if(compfl == 0) {
247 		if (--argc < 0)
248 			quit("missing pattern", 0);
249 		newfile(P_ARG, *argv++);
250 		fcomp();
251 	}
252 
253 	if(depth)
254 		quit("Too many {'s", 0);
255 
256 	ltab[0].address = rep;
257 
258 	dechain();
259 
260 	if(argc <= 0)
261 		enroll(0);		/* Add stdin to cache */
262 	else while(--argc >= 0) {
263 		enroll(*argv++);
264 	}
265 	execute();
266 	exits(0);
267 }
268 void
fcomp(void)269 fcomp(void)
270 {
271 	Rune	*tp;
272 	SedCom	*pt, *pt1;
273 	int	i;
274 	Label	*lpt;
275 
276 	static Rune	*p = addspace;
277 	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
278 
279 	while (rline(linebuf, lbend) >= 0) {
280 		cp = linebuf;
281 comploop:
282 		while(*cp == ' ' || *cp == '\t')
283 			cp++;
284 		if(*cp == '\0' || *cp == '#')
285 			continue;
286 		if(*cp == ';') {
287 			cp++;
288 			goto comploop;
289 		}
290 
291 		address(&rep->ad1);
292 		if (rep->ad1.type != A_NONE) {
293 			if (rep->ad1.type == A_LAST) {
294 				if (!lastre)
295 					quit("First RE may not be null", 0);
296 				rep->ad1.type = A_RE;
297 				rep->ad1.u.rp = lastre;
298 			}
299 			if(*cp == ',' || *cp == ';') {
300 				cp++;
301 				address(&rep->ad2);
302 				if (rep->ad2.type == A_LAST) {
303 					rep->ad1.type = A_RE;
304 					rep->ad2.u.rp = lastre;
305 				}
306 			} else
307 				rep->ad2.type = A_NONE;
308 		}
309 		while(*cp == ' ' || *cp == '\t')
310 			cp++;
311 
312 swit:
313 		switch(*cp++) {
314 
315 			default:
316 				quit("Unrecognized command: %S", (char *)linebuf);
317 
318 			case '!':
319 				rep->negfl = 1;
320 				goto swit;
321 
322 			case '{':
323 				rep->command = BCOM;
324 				rep->negfl = !(rep->negfl);
325 				cmpend[depth++] = &rep->u.lb1;
326 				if(++rep >= pend)
327 					quit("Too many commands: %S", (char *) linebuf);
328 				if(*cp == '\0')	continue;
329 				goto comploop;
330 
331 			case '}':
332 				if(rep->ad1.type != A_NONE)
333 					quit(AD0MES, (char *) linebuf);
334 				if(--depth < 0)
335 					quit("Too many }'s", 0);
336 				*cmpend[depth] = rep;
337 				if(*cp == 0)	continue;
338 				goto comploop;
339 
340 			case '=':
341 				rep->command = EQCOM;
342 				if(rep->ad2.type != A_NONE)
343 					quit(AD1MES, (char *) linebuf);
344 				break;
345 
346 			case ':':
347 				if(rep->ad1.type != A_NONE)
348 					quit(AD0MES, (char *) linebuf);
349 
350 				while(*cp == ' ')
351 					cp++;
352 				tp = lab->asc;
353 				while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
354 					*tp++ = *cp++;
355 					if(tp >= &(lab->asc[8]))
356 						quit(LTL, (char *) linebuf);
357 				}
358 				*tp = '\0';
359 
360 				if(lpt = search(lab)) {
361 					if(lpt->address)
362 						quit("Duplicate labels: %S", (char *) linebuf);
363 				} else {
364 					lab->chain = 0;
365 					lpt = lab;
366 					if(++lab >= labend)
367 						quit("Too many labels: %S", (char *) linebuf);
368 				}
369 				lpt->address = rep;
370 				if (*cp == '#')
371 					continue;
372 				rep--;			/* reuse this slot */
373 				break;
374 
375 			case 'a':
376 				rep->command = ACOM;
377 				if(rep->ad2.type != A_NONE)
378 					quit(AD1MES, (char *) linebuf);
379 				if(*cp == '\\')	cp++;
380 				if(*cp++ != '\n')
381 					quit(CGMES, (char *) linebuf);
382 				rep->u.text = p;
383 				p = stext(p, addend);
384 				break;
385 			case 'c':
386 				rep->command = CCOM;
387 				if(*cp == '\\')	cp++;
388 				if(*cp++ != '\n')
389 					quit(CGMES, (char *) linebuf);
390 				rep->u.text = p;
391 				p = stext(p, addend);
392 				break;
393 			case 'i':
394 				rep->command = ICOM;
395 				if(rep->ad2.type != A_NONE)
396 					quit(AD1MES, (char *) linebuf);
397 				if(*cp == '\\')	cp++;
398 				if(*cp++ != '\n')
399 					quit(CGMES, (char *) linebuf);
400 				rep->u.text = p;
401 				p = stext(p, addend);
402 				break;
403 
404 			case 'g':
405 				rep->command = GCOM;
406 				break;
407 
408 			case 'G':
409 				rep->command = CGCOM;
410 				break;
411 
412 			case 'h':
413 				rep->command = HCOM;
414 				break;
415 
416 			case 'H':
417 				rep->command = CHCOM;
418 				break;
419 
420 			case 't':
421 				rep->command = TCOM;
422 				goto jtcommon;
423 
424 			case 'b':
425 				rep->command = BCOM;
426 jtcommon:
427 				while(*cp == ' ')cp++;
428 				if(*cp == '\0') {
429 					if(pt = ltab[0].chain) {
430 						while(pt1 = pt->u.lb1)
431 							pt = pt1;
432 						pt->u.lb1 = rep;
433 					} else
434 						ltab[0].chain = rep;
435 					break;
436 				}
437 				tp = lab->asc;
438 				while((*tp++ = *cp++))
439 					if(tp >= &(lab->asc[8]))
440 						quit(LTL, (char *) linebuf);
441 				cp--;
442 				tp[-1] = '\0';
443 
444 				if(lpt = search(lab)) {
445 					if(lpt->address) {
446 						rep->u.lb1 = lpt->address;
447 					} else {
448 						pt = lpt->chain;
449 						while(pt1 = pt->u.lb1)
450 							pt = pt1;
451 						pt->u.lb1 = rep;
452 					}
453 				} else {
454 					lab->chain = rep;
455 					lab->address = 0;
456 					if(++lab >= labend)
457 						quit("Too many labels: %S",
458 							(char *) linebuf);
459 				}
460 				break;
461 
462 			case 'n':
463 				rep->command = NCOM;
464 				break;
465 
466 			case 'N':
467 				rep->command = CNCOM;
468 				break;
469 
470 			case 'p':
471 				rep->command = PCOM;
472 				break;
473 
474 			case 'P':
475 				rep->command = CPCOM;
476 				break;
477 
478 			case 'r':
479 				rep->command = RCOM;
480 				if(rep->ad2.type != A_NONE)
481 					quit(AD1MES, (char *) linebuf);
482 				if(*cp++ != ' ')
483 					quit(CGMES, (char *) linebuf);
484 				rep->u.text = p;
485 				p = stext(p, addend);
486 				break;
487 
488 			case 'd':
489 				rep->command = DCOM;
490 				break;
491 
492 			case 'D':
493 				rep->command = CDCOM;
494 				rep->u.lb1 = pspace;
495 				break;
496 
497 			case 'q':
498 				rep->command = QCOM;
499 				if(rep->ad2.type != A_NONE)
500 					quit(AD1MES, (char *) linebuf);
501 				break;
502 
503 			case 'l':
504 				rep->command = LCOM;
505 				break;
506 
507 			case 's':
508 				rep->command = SCOM;
509 				seof = *cp++;
510 				if ((rep->u.re1 = compile()) == 0) {
511 					if(!lastre)
512 						quit("First RE may not be null.", 0);
513 					rep->u.re1 = lastre;
514 				}
515 				rep->rhs = p;
516 				if((p = compsub(p, addend)) == 0)
517 					quit(CGMES, (char *) linebuf);
518 				if(*cp == 'g') {
519 					cp++;
520 					rep->gfl++;
521 				} else if(gflag)
522 					rep->gfl++;
523 
524 				if(*cp == 'p') {
525 					cp++;
526 					rep->pfl = 1;
527 				}
528 
529 				if(*cp == 'P') {
530 					cp++;
531 					rep->pfl = 2;
532 				}
533 
534 				if(*cp == 'w') {
535 					cp++;
536 					if(*cp++ !=  ' ')
537 						quit(CGMES, (char *) linebuf);
538 					text(fname[nfiles]);
539 					for(i = nfiles - 1; i >= 0; i--)
540 						if(cmp(fname[nfiles],fname[i]) == 0) {
541 							rep->fcode = fcode[i];
542 							goto done;
543 						}
544 					if(nfiles >= MAXFILES)
545 						quit("Too many files in w commands 1", 0);
546 					rep->fcode = open_file(fname[nfiles]);
547 				}
548 				break;
549 
550 			case 'w':
551 				rep->command = WCOM;
552 				if(*cp++ != ' ')
553 					quit(CGMES, (char *) linebuf);
554 				text(fname[nfiles]);
555 				for(i = nfiles - 1; i >= 0; i--)
556 					if(cmp(fname[nfiles], fname[i]) == 0) {
557 						rep->fcode = fcode[i];
558 						goto done;
559 					}
560 				if(nfiles >= MAXFILES){
561 					fprint(2, "sed: Too many files in w commands 2 \n");
562 					fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
563 					errexit();
564 				}
565 				rep->fcode = open_file(fname[nfiles]);
566 				break;
567 
568 			case 'x':
569 				rep->command = XCOM;
570 				break;
571 
572 			case 'y':
573 				rep->command = YCOM;
574 				seof = *cp++;
575 				if (ycomp(rep) == 0)
576 					quit(CGMES, (char *) linebuf);
577 				break;
578 
579 		}
580 done:
581 		if(++rep >= pend)
582 			quit("Too many commands, last: %S", (char *) linebuf);
583 
584 		if(*cp++ != '\0') {
585 			if(cp[-1] == ';')
586 				goto comploop;
587 			quit(CGMES, (char *) linebuf);
588 		}
589 
590 	}
591 }
592 
593 Biobuf *
open_file(char * name)594 open_file(char *name)
595 {
596 	Biobuf *bp;
597 	int fd;
598 
599 	if ((bp = malloc(sizeof(Biobuf))) == 0)
600 		quit("Out of memory", 0);
601 	if ((fd = open(name, OWRITE)) < 0 &&
602 		(fd = create(name, OWRITE, 0666)) < 0)
603 			quit("Cannot create %s", name);
604 	Binit(bp, fd, OWRITE);
605 	Bseek(bp, 0, 2);
606 	fcode[nfiles++] = bp;
607 	return bp;
608 }
609 
610 Rune	*
compsub(Rune * rhs,Rune * end)611 compsub(Rune *rhs, Rune *end)
612 {
613 	Rune	r;
614 
615 	while ((r = *cp++) != '\0') {
616 		if(r == '\\') {
617 			if (rhs < end)
618 				*rhs++ = Runemax;
619 			else
620 				return 0;
621 			r = *cp++;
622 			if(r == 'n')
623 				r = '\n';
624 		} else {
625 			if(r == seof) {
626 				if (rhs < end)
627 					*rhs++ = '\0';
628 				else
629 					return 0;
630 				return rhs;
631 			}
632 		}
633 		if (rhs < end)
634 			*rhs++ = r;
635 		else
636 			return 0;
637 
638 	}
639 	return 0;
640 }
641 
642 Reprog *
compile(void)643 compile(void)
644 {
645 	Rune c;
646 	char *ep;
647 	char expbuf[512];
648 
649 	if((c = *cp++) == seof)		/* '//' */
650 		return 0;
651 	ep = expbuf;
652 	do {
653 		if (c == 0 || c == '\n')
654 			quit(TMMES, (char *) linebuf);
655 		if (c == '\\') {
656 			if (ep >= expbuf+sizeof(expbuf))
657 				quit(TMMES, (char *) linebuf);
658 			ep += runetochar(ep, &c);
659 			if ((c = *cp++) == 'n')
660 				c = '\n';
661 		}
662 		if (ep >= expbuf+sizeof(expbuf))
663 			quit(TMMES, (char *) linebuf);
664 		ep += runetochar(ep, &c);
665 	} while ((c = *cp++) != seof);
666 	*ep = 0;
667 	return lastre = regcomp(expbuf);
668 }
669 
670 void
regerror(char * s)671 regerror(char *s)
672 {
673 	USED(s);
674 	quit(CGMES, (char *) linebuf);
675 }
676 
677 void
newfile(enum PTYPE type,char * name)678 newfile(enum PTYPE type, char *name)
679 {
680 	if (type == P_ARG)
681 		prog.pctl.curr = name;
682 	else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
683 		quit("Cannot open pattern-file: %s\n", name);
684 	prog.type = type;
685 }
686 
687 int
rline(Rune * buf,Rune * end)688 rline(Rune *buf, Rune *end)
689 {
690 	long c;
691 	Rune r;
692 
693 	while ((c = getrune()) >= 0) {
694 		r = c;
695 		if (r == '\\') {
696 			if (buf <= end)
697 				*buf++ = r;
698 			if ((c = getrune()) < 0)
699 				break;
700 			r = c;
701 		} else if (r == '\n') {
702 			*buf = '\0';
703 			return(1);
704 		}
705 		if (buf <= end)
706 			*buf++ = r;
707 	}
708 	*buf = '\0';
709 	return(-1);
710 }
711 
712 long
getrune(void)713 getrune(void)
714 {
715 	char *p;
716 	long c;
717 	Rune r;
718 
719 	if (prog.type == P_ARG) {
720 		if ((p = prog.pctl.curr) != 0) {
721 			if (*p) {
722 				prog.pctl.curr += chartorune(&r, p);
723 				c = r;
724 			} else {
725 				c = '\n';	/* fake an end-of-line */
726 				prog.pctl.curr = 0;
727 			}
728 		} else
729 			c = -1;
730 	} else if ((c = Bgetrune(prog.pctl.bp)) < 0)
731 			Bterm(prog.pctl.bp);
732 	return c;
733 }
734 
735 void
address(Addr * ap)736 address(Addr *ap)
737 {
738 	int c;
739 	long	lno;
740 
741 	if((c = *cp++) == '$')
742 		ap->type = A_DOL;
743 	else if(c == '/') {
744 		seof = c;
745 		if (ap->u.rp = compile())
746 			ap->type = A_RE;
747 		else
748 			ap->type = A_LAST;
749 	}
750 	else if (c >= '0' && c <= '9') {
751 		lno = c-'0';
752 		while ((c = *cp) >= '0' && c <= '9')
753 			lno = lno*10 + *cp++-'0';
754 		if(!lno)
755 			quit("line number 0 is illegal",0);
756 		ap->type = A_LINE;
757 		ap->u.line = lno;
758 	}
759 	else {
760 		cp--;
761 		ap->type = A_NONE;
762 	}
763 }
764 
765 int
cmp(char * a,char * b)766 cmp(char *a, char *b)		/* compare characters */
767 {
768 	while(*a == *b++)
769 		if (*a == '\0')
770 			return(0);
771 		else a++;
772 	return(1);
773 }
774 
775 int
rcmp(Rune * a,Rune * b)776 rcmp(Rune *a, Rune *b)		/* compare runes */
777 {
778 	while(*a == *b++)
779 		if (*a == '\0')
780 			return(0);
781 		else a++;
782 	return(1);
783 }
784 
785 char *
text(char * p)786 text(char *p)		/* extract character string */
787 {
788 	Rune	r;
789 
790 	while(*cp == '\t' || *cp == ' ')
791 			cp++;
792 	while (*cp) {
793 		if ((r = *cp++) == '\\')
794 			if ((r = *cp++) == 0)
795 				break;;
796 		if (r == '\n')
797 			while (*cp == '\t' || *cp == ' ')
798 					cp++;
799 		p += runetochar(p, &r);
800 	}
801 	*p++ = '\0';
802 	return p;
803 }
804 
805 Rune *
stext(Rune * p,Rune * end)806 stext(Rune *p, Rune *end)		/* extract rune string */
807 {
808 	while(*cp == '\t' || *cp == ' ')
809 		cp++;
810 	while (*cp) {
811 		if (*cp == '\\')
812 			if (*++cp == 0)
813 				break;
814 		if (p >= end-1)
815 			quit(TMMES, (char *) linebuf);
816 		if ((*p++ = *cp++) == '\n')
817 			while(*cp == '\t' || *cp == ' ')
818 					cp++;
819 	}
820 	*p++ = 0;
821 	return p;
822 }
823 
824 
825 Label *
search(Label * ptr)826 search (Label *ptr)
827 {
828 	Label	*rp;
829 
830 	for (rp = ltab; rp < ptr; rp++)
831 		if(rcmp(rp->asc, ptr->asc) == 0)
832 			return(rp);
833 	return(0);
834 }
835 
836 void
dechain(void)837 dechain(void)
838 {
839 	Label	*lptr;
840 	SedCom	*rptr, *trptr;
841 
842 	for(lptr = ltab; lptr < lab; lptr++) {
843 
844 		if(lptr->address == 0)
845 			quit("Undefined label: %S", (char *) lptr->asc);
846 
847 		if(lptr->chain) {
848 			rptr = lptr->chain;
849 			while(trptr = rptr->u.lb1) {
850 				rptr->u.lb1 = lptr->address;
851 				rptr = trptr;
852 			}
853 			rptr->u.lb1 = lptr->address;
854 		}
855 	}
856 }
857 
858 int
ycomp(SedCom * r)859 ycomp(SedCom *r)
860 {
861 	int 	i;
862 	Rune	*rp;
863 	Rune	c, *tsp, highc;
864 	Rune	*sp;
865 
866 	highc = 0;
867 	for(tsp = cp; *tsp != seof; tsp++) {
868 		if(*tsp == '\\')
869 			tsp++;
870 		if(*tsp == '\n' || *tsp == '\0')
871 			return(0);
872 		if (*tsp > highc) highc = *tsp;
873 	}
874 	tsp++;
875 	if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
876 		quit("Out of memory", 0);
877 	*rp++ = highc;				/* save upper bound */
878 	for (i = 0; i <= highc; i++)
879 		rp[i] = i;
880 	sp = cp;
881 	while((c = *sp++) != seof) {
882 		if(c == '\\' && *sp == 'n') {
883 			sp++;
884 			c = '\n';
885 		}
886 		if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
887 			rp[c] = '\n';
888 			tsp++;
889 		}
890 		if(rp[c] == seof || rp[c] == '\0') {
891 			free(r->u.re1);
892 			r->u.re1 = 0;
893 			return(0);
894 		}
895 	}
896 	if(*tsp != seof) {
897 		free(r->u.re1);
898 		r->u.re1 = 0;
899 		return(0);
900 	}
901 	cp = tsp+1;
902 	return(1);
903 }
904 
905 void
execute(void)906 execute(void)
907 {
908 	SedCom	*ipc;
909 
910 	while (spend = gline(linebuf)){
911 		for(ipc = pspace; ipc->command; ) {
912 			if (!executable(ipc)) {
913 				ipc++;
914 				continue;
915 			}
916 			command(ipc);
917 
918 			if(delflag)
919 				break;
920 			if(jflag) {
921 				jflag = 0;
922 				if((ipc = ipc->u.lb1) == 0)
923 					break;
924 			} else
925 				ipc++;
926 
927 		}
928 		if(!nflag && !delflag)
929 			putline(&fout, linebuf, spend-linebuf);
930 		if(aptr > abuf) {
931 			arout();
932 		}
933 		delflag = 0;
934 	}
935 }
936 	/* determine if a statement should be applied to an input line */
937 int
executable(SedCom * ipc)938 executable(SedCom *ipc)
939 {
940 	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
941 		if (ipc->active == 1)		/* Second line */
942 			ipc->active = 2;
943 		switch(ipc->ad2.type) {
944 			case A_NONE:	/* No second addr; use first */
945 				ipc->active = 0;
946 				break;
947 			case A_DOL:	/* Accept everything */
948 				return !ipc->negfl;
949 			case A_LINE:	/* Line at end of range? */
950 				if (lnum <= ipc->ad2.u.line) {
951 					if (ipc->ad2.u.line == lnum)
952 						ipc->active = 0;
953 					return !ipc->negfl;
954 				}
955 				ipc->active = 0;	/* out of range */
956 				return ipc->negfl;
957 			case A_RE:	/* Check for matching R.E. */
958 				if (match(ipc->ad2.u.rp, linebuf))
959 					ipc->active = 0;
960 				return !ipc->negfl;
961 			default:		/* internal error */
962 				quit("Internal error", 0);
963 		}
964 	}
965 	switch (ipc->ad1.type) {	/* Check first address */
966 		case A_NONE:			/* Everything matches */
967 			return !ipc->negfl;
968 		case A_DOL:			/* Only last line */
969 			if (dolflag)
970 				return !ipc->negfl;
971 			break;
972 		case A_LINE:			/* Check line number */
973 			if (ipc->ad1.u.line == lnum) {
974 				ipc->active = 1;	/* In range */
975 				return !ipc->negfl;
976 			}
977 			break;
978 		case A_RE:			/* Check R.E. */
979 			if (match(ipc->ad1.u.rp, linebuf)) {
980 				ipc->active = 1;	/* In range */
981 				return !ipc->negfl;
982 			}
983 			break;
984 		default:
985 			quit("Internal error", 0);
986 	}
987 	return ipc->negfl;
988 }
989 
990 int
match(Reprog * pattern,Rune * buf)991 match(Reprog *pattern, Rune *buf)
992 {
993 	if (!pattern)
994 		return 0;
995 	subexp[0].s.rsp = buf;
996 	subexp[0].e.rep = 0;
997 	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
998 		loc1 = subexp[0].s.rsp;
999 		loc2 = subexp[0].e.rep;
1000 		return 1;
1001 	}
1002 	loc1 = loc2 = 0;
1003 	return 0;
1004 }
1005 
1006 int
substitute(SedCom * ipc)1007 substitute(SedCom *ipc)
1008 {
1009 	int len;
1010 
1011 	if(!match(ipc->u.re1, linebuf))
1012 		return 0;
1013 
1014 	/*
1015 	 * we have at least one match.  some patterns, e.g. '$' or '^', can
1016 	 * produce zero-length matches, so during a global substitute we
1017 	 * must bump to the character after a zero-length match to keep from looping.
1018 	 */
1019 	sflag = 1;
1020 	if(ipc->gfl == 0)		/* single substitution */
1021 		dosub(ipc->rhs);
1022 	else
1023 	do{				/* global substitution */
1024 		len = loc2-loc1;	/* length of match */
1025 		dosub(ipc->rhs);	/* dosub moves loc2 */
1026 		if(*loc2 == 0)		/* end of string */
1027 			break;
1028 		if(len == 0)		/* zero-length R.E. match */
1029 			loc2++;		/* bump over zero-length match */
1030 		if(*loc2 == 0)		/* end of string */
1031 			break;
1032 	} while(match(ipc->u.re1, loc2));
1033 	return 1;
1034 }
1035 
1036 void
dosub(Rune * rhsbuf)1037 dosub(Rune *rhsbuf)
1038 {
1039 	Rune *lp, *sp;
1040 	Rune *rp;
1041 	int c, n;
1042 
1043 	lp = linebuf;
1044 	sp = genbuf;
1045 	rp = rhsbuf;
1046 	while (lp < loc1)
1047 		*sp++ = *lp++;
1048 	while(c = *rp++) {
1049 		if (c == '&') {
1050 			sp = place(sp, loc1, loc2);
1051 			continue;
1052 		}
1053 		if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
1054 			n = c-'0';
1055 			if (subexp[n].s.rsp && subexp[n].e.rep) {
1056 				sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1057 				continue;
1058 			}
1059 			else {
1060 				fprint(2, "sed: Invalid back reference \\%d\n",n);
1061 				errexit();
1062 			}
1063 		}
1064 		*sp++ = c;
1065 		if (sp >= &genbuf[LBSIZE])
1066 			fprint(2, "sed: Output line too long.\n");
1067 	}
1068 	lp = loc2;
1069 	loc2 = sp - genbuf + linebuf;
1070 	while (*sp++ = *lp++)
1071 		if (sp >= &genbuf[LBSIZE])
1072 			fprint(2, "sed: Output line too long.\n");
1073 	lp = linebuf;
1074 	sp = genbuf;
1075 	while (*lp++ = *sp++)
1076 		;
1077 	spend = lp-1;
1078 }
1079 
1080 Rune *
place(Rune * sp,Rune * l1,Rune * l2)1081 place(Rune *sp, Rune *l1, Rune *l2)
1082 {
1083 	while (l1 < l2) {
1084 		*sp++ = *l1++;
1085 		if (sp >= &genbuf[LBSIZE])
1086 			fprint(2, "sed: Output line too long.\n");
1087 	}
1088 	return(sp);
1089 }
1090 
1091 char *
trans(int c)1092 trans(int c)
1093 {
1094 	static char buf[] = "\\x0000";
1095 	static char hex[] = "0123456789abcdef";
1096 
1097 	switch(c) {
1098 		case '\b':
1099 			return "\\b";
1100 		case '\n':
1101 			return "\\n";
1102 		case '\r':
1103 			return "\\r";
1104 		case '\t':
1105 			return "\\t";
1106 		case '\\':
1107 			return "\\\\";
1108 	}
1109 	buf[2] = hex[(c>>12)&0xF];
1110 	buf[3] = hex[(c>>8)&0xF];
1111 	buf[4] = hex[(c>>4)&0xF];
1112 	buf[5] = hex[c&0xF];
1113 	return buf;
1114 }
1115 
1116 void
command(SedCom * ipc)1117 command(SedCom *ipc)
1118 {
1119 	int	i, c;
1120 	Rune	*p1, *p2;
1121 	char	*ucp;
1122 	Rune	*rp;
1123 	Rune	*execp;
1124 
1125 	switch(ipc->command) {
1126 
1127 		case ACOM:
1128 			*aptr++ = ipc;
1129 			if(aptr >= abuf+MAXADDS) {
1130 				quit("sed: Too many appends after line %ld\n",
1131 					(char *) lnum);
1132 			}
1133 			*aptr = 0;
1134 			break;
1135 		case CCOM:
1136 			delflag = 1;
1137 			if(ipc->active == 1) {
1138 				for(rp = ipc->u.text; *rp; rp++)
1139 					Bputrune(&fout, *rp);
1140 				Bputc(&fout, '\n');
1141 			}
1142 			break;
1143 		case DCOM:
1144 			delflag++;
1145 			break;
1146 		case CDCOM:
1147 			p1 = p2 = linebuf;
1148 			while(*p1 != '\n') {
1149 				if(*p1++ == 0) {
1150 					delflag++;
1151 					return;
1152 				}
1153 			}
1154 			p1++;
1155 			while(*p2++ = *p1++)
1156 				;
1157 			spend = p2-1;
1158 			jflag++;
1159 			break;
1160 		case EQCOM:
1161 			Bprint(&fout, "%ld\n", lnum);
1162 			break;
1163 		case GCOM:
1164 			p1 = linebuf;
1165 			p2 = holdsp;
1166 			while(*p1++ = *p2++)
1167 				;
1168 			spend = p1-1;
1169 			break;
1170 		case CGCOM:
1171 			*spend++ = '\n';
1172 			p1 = spend;
1173 			p2 = holdsp;
1174 			while(*p1++ = *p2++)
1175 				if(p1 >= lbend)
1176 					break;
1177 			spend = p1-1;
1178 			break;
1179 		case HCOM:
1180 			p1 = holdsp;
1181 			p2 = linebuf;
1182 			while(*p1++ = *p2++);
1183 			hspend = p1-1;
1184 			break;
1185 		case CHCOM:
1186 			*hspend++ = '\n';
1187 			p1 = hspend;
1188 			p2 = linebuf;
1189 			while(*p1++ = *p2++)
1190 				if(p1 >= hend)
1191 					break;
1192 			hspend = p1-1;
1193 			break;
1194 		case ICOM:
1195 			for(rp = ipc->u.text; *rp; rp++)
1196 				Bputrune(&fout, *rp);
1197 			Bputc(&fout, '\n');
1198 			break;
1199 		case BCOM:
1200 			jflag = 1;
1201 			break;
1202 		case LCOM:
1203 			c = 0;
1204 			for (i = 0, rp = linebuf; *rp; rp++) {
1205 				c = *rp;
1206 				if(c >= 0x20 && c < 0x7F && c != '\\') {
1207 					Bputc(&fout, c);
1208 					if(i++ > 71) {
1209 						Bprint(&fout, "\\\n");
1210 						i = 0;
1211 					}
1212 				} else {
1213 					for (ucp = trans(*rp); *ucp; ucp++){
1214 						c = *ucp;
1215 						Bputc(&fout, c);
1216 						if(i++ > 71) {
1217 							Bprint(&fout, "\\\n");
1218 							i = 0;
1219 						}
1220 					}
1221 				}
1222 			}
1223 			if(c == ' ')
1224 				Bprint(&fout, "\\n");
1225 			Bputc(&fout, '\n');
1226 			break;
1227 		case NCOM:
1228 			if(!nflag)
1229 				putline(&fout, linebuf, spend-linebuf);
1230 
1231 			if(aptr > abuf)
1232 				arout();
1233 			if((execp = gline(linebuf)) == 0) {
1234 				delflag = 1;
1235 				break;
1236 			}
1237 			spend = execp;
1238 			break;
1239 		case CNCOM:
1240 			if(aptr > abuf)
1241 				arout();
1242 			*spend++ = '\n';
1243 			if((execp = gline(spend)) == 0) {
1244 				delflag = 1;
1245 				break;
1246 			}
1247 			spend = execp;
1248 			break;
1249 		case PCOM:
1250 			putline(&fout, linebuf, spend-linebuf);
1251 			break;
1252 		case CPCOM:
1253 	cpcom:
1254 			for(rp = linebuf; *rp && *rp != '\n'; rp++)
1255 				Bputc(&fout, *rp);
1256 			Bputc(&fout, '\n');
1257 			break;
1258 		case QCOM:
1259 			if(!nflag)
1260 				putline(&fout, linebuf, spend-linebuf);
1261 			if(aptr > abuf)
1262 				arout();
1263 			exits(0);
1264 		case RCOM:
1265 			*aptr++ = ipc;
1266 			if(aptr >= &abuf[MAXADDS])
1267 				quit("sed: Too many reads after line %ld\n",
1268 					(char *) lnum);
1269 			*aptr = 0;
1270 			break;
1271 		case SCOM:
1272 			i = substitute(ipc);
1273 			if(i && ipc->pfl)
1274 				if(ipc->pfl == 1)
1275 					putline(&fout, linebuf, spend-linebuf);
1276 				else
1277 					goto cpcom;
1278 			if(i && ipc->fcode)
1279 				goto wcom;
1280 			break;
1281 
1282 		case TCOM:
1283 			if(sflag == 0)	break;
1284 			sflag = 0;
1285 			jflag = 1;
1286 			break;
1287 
1288 		wcom:
1289 		case WCOM:
1290 			putline(ipc->fcode,linebuf, spend-linebuf);
1291 			break;
1292 		case XCOM:
1293 			p1 = linebuf;
1294 			p2 = genbuf;
1295 			while(*p2++ = *p1++);
1296 			p1 = holdsp;
1297 			p2 = linebuf;
1298 			while(*p2++ = *p1++);
1299 			spend = p2 - 1;
1300 			p1 = genbuf;
1301 			p2 = holdsp;
1302 			while(*p2++ = *p1++);
1303 			hspend = p2 - 1;
1304 			break;
1305 		case YCOM:
1306 			p1 = linebuf;
1307 			p2 = ipc->u.text;
1308 			for (i = *p2++;	*p1; p1++){
1309 				if (*p1 <= i) *p1 = p2[*p1];
1310 			}
1311 			break;
1312 	}
1313 
1314 }
1315 
1316 void
putline(Biobuf * bp,Rune * buf,int n)1317 putline(Biobuf *bp, Rune *buf, int n)
1318 {
1319 	while (n--)
1320 		Bputrune(bp, *buf++);
1321 	Bputc(bp, '\n');
1322 	if(lflag)
1323 		Bflush(bp);
1324 }
1325 
1326 int
ecmp(Rune * a,Rune * b,int count)1327 ecmp(Rune *a, Rune *b, int count)
1328 {
1329 	while(count--)
1330 		if(*a++ != *b++)	return(0);
1331 	return(1);
1332 }
1333 
1334 void
arout(void)1335 arout(void)
1336 {
1337 	Rune	*p1;
1338 	Biobuf	*fi;
1339 	int	c;
1340 	char	*s;
1341 	char	buf[128];
1342 
1343 	for (aptr = abuf; *aptr; aptr++) {
1344 		if((*aptr)->command == ACOM) {
1345 			for(p1 = (*aptr)->u.text; *p1; p1++ )
1346 				Bputrune(&fout, *p1);
1347 			Bputc(&fout, '\n');
1348 		} else {
1349 			for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1350 					s += runetochar(s, p1);
1351 			*s = '\0';
1352 			if((fi = Bopen(buf, OREAD)) == 0)
1353 				continue;
1354 			while((c = Bgetc(fi)) >= 0)
1355 				Bputc(&fout, c);
1356 			Bterm(fi);
1357 		}
1358 	}
1359 	aptr = abuf;
1360 	*aptr = 0;
1361 }
1362 
1363 void
errexit(void)1364 errexit(void)
1365 {
1366 	exits("error");
1367 }
1368 
1369 void
quit(char * msg,char * arg)1370 quit (char *msg, char *arg)
1371 {
1372 	fprint(2, "sed: ");
1373 	fprint(2, msg, arg);
1374 	fprint(2, "\n");
1375 	errexit();
1376 }
1377 
1378 Rune *
gline(Rune * addr)1379 gline(Rune *addr)
1380 {
1381 	long	c;
1382 	Rune *p;
1383 
1384 	static long peekc = 0;
1385 
1386 	if (f == 0 && opendata() < 0)
1387 		return 0;
1388 	sflag = 0;
1389 	lnum++;
1390 /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
1391 	do {
1392 		p = addr;
1393 		for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1394 			if (c == '\n') {
1395 				if ((peekc = Bgetrune(f)) < 0) {
1396 					if (fhead == 0)
1397 						dolflag = 1;
1398 				}
1399 				*p = '\0';
1400 				return p;
1401 			}
1402 			if (c && p < lbend)
1403 				*p++ = c;
1404 		}
1405 		/* return partial final line, adding implicit newline */
1406 		if(p != addr) {
1407 			*p = '\0';
1408 			peekc = -1;
1409 			if (fhead == 0)
1410 				dolflag = 1;
1411 			return p;
1412 		}
1413 		peekc = 0;
1414 		Bterm(f);
1415 	} while (opendata() > 0);	/* Switch to next stream */
1416 	f = 0;
1417 	return 0;
1418 }
1419 
1420 	/* Data file input section - the intent is to transparently
1421 	 *	catenate all data input streams.
1422 	 */
1423 void
enroll(char * filename)1424 enroll(char *filename)		/* Add a file to the input file cache */
1425 {
1426 	FileCache *fp;
1427 
1428 	if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1429 		quit("Out of memory", 0);
1430 	if (ftail == 0)
1431 		fhead = fp;
1432 	else
1433 		ftail->next = fp;
1434 	ftail = fp;
1435 	fp->next = 0;
1436 	fp->name = filename;	/* 0 => stdin */
1437 }
1438 
1439 int
opendata(void)1440 opendata(void)
1441 {
1442 	if (fhead == 0)
1443 		return -1;
1444 	if (fhead->name) {
1445 		if ((f = Bopen(fhead->name, OREAD)) == 0)
1446 			quit("Can't open %s", fhead->name);
1447 	} else {
1448 		Binit(&bstdin, 0, OREAD);
1449 		f = &bstdin;
1450 	}
1451 	fhead = fhead->next;
1452 	return 1;
1453 }
1454