xref: /original-bsd/usr.bin/sed/process.c (revision 3d4d7284)
1 /*-
2  * Copyright (c) 1992 Diomidis Spinellis.
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Diomidis Spinellis of Imperial College, University of London.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 04/20/94";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "defs.h"
32 #include "extern.h"
33 
34 static SPACE HS, PS, SS;
35 #define	pd		PS.deleted
36 #define	ps		PS.space
37 #define	psl		PS.len
38 #define	hs		HS.space
39 #define	hsl		HS.len
40 
41 static inline int	 applies __P((struct s_command *));
42 static void		 flush_appends __P((void));
43 static void		 lputs __P((char *));
44 static inline int	 regexec_e __P((regex_t *, const char *, int, int, size_t));
45 static void		 regsub __P((SPACE *, char *, char *));
46 static int		 substitute __P((struct s_command *));
47 
48 struct s_appends *appends;	/* Array of pointers to strings to append. */
49 static int appendx;		/* Index into appends array. */
50 int appendnum;			/* Size of appends array. */
51 
52 static int lastaddr;		/* Set by applies if last address of a range. */
53 static int sdone;		/* If any substitutes since last line input. */
54 				/* Iov structure for 'w' commands. */
55 static regex_t *defpreg;
56 size_t maxnsub;
57 regmatch_t *match;
58 
59 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
60 
61 void
62 process()
63 {
64 	struct s_command *cp;
65 	SPACE tspace;
66 	size_t len;
67 	char oldc, *p;
68 
69 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
70 		pd = 0;
71 		cp = prog;
72 redirect:
73 		while (cp != NULL) {
74 			if (!applies(cp)) {
75 				cp = cp->next;
76 				continue;
77 			}
78 			switch (cp->code) {
79 			case '{':
80 				cp = cp->u.c;
81 				goto redirect;
82 			case 'a':
83 				if (appendx >= appendnum)
84 					appends = xrealloc(appends,
85 					    sizeof(struct s_appends) *
86 					    (appendnum *= 2));
87 				appends[appendx].type = AP_STRING;
88 				appends[appendx].s = cp->t;
89 				appends[appendx].len = strlen(cp->t);
90 				appendx++;
91 				break;
92 			case 'b':
93 				cp = cp->u.c;
94 				goto redirect;
95 			case 'c':
96 				pd = 1;
97 				psl = 0;
98 				if (cp->a2 == NULL || lastaddr)
99 					(void)printf("%s", cp->t);
100 				break;
101 			case 'd':
102 				pd = 1;
103 				goto new;
104 			case 'D':
105 				if (pd)
106 					goto new;
107 				if ((p = memchr(ps, '\n', psl)) == NULL)
108 					pd = 1;
109 				else {
110 					psl -= (p - ps) + 1;
111 					memmove(ps, p + 1, psl);
112 				}
113 				goto new;
114 			case 'g':
115 				cspace(&PS, hs, hsl, REPLACE);
116 				break;
117 			case 'G':
118 				cspace(&PS, hs, hsl, 0);
119 				break;
120 			case 'h':
121 				cspace(&HS, ps, psl, REPLACE);
122 				break;
123 			case 'H':
124 				cspace(&HS, ps, psl, 0);
125 				break;
126 			case 'i':
127 				(void)printf("%s", cp->t);
128 				break;
129 			case 'l':
130 				lputs(ps);
131 				break;
132 			case 'n':
133 				if (!nflag && !pd)
134 					OUT(ps)
135 				flush_appends();
136 				if (!mf_fgets(&PS, REPLACE))
137 					exit(0);
138 				pd = 0;
139 				break;
140 			case 'N':
141 				flush_appends();
142 				if (!mf_fgets(&PS, 0)) {
143 					if (!nflag && !pd)
144 						OUT(ps)
145 					exit(0);
146 				}
147 				break;
148 			case 'p':
149 				if (pd)
150 					break;
151 				OUT(ps)
152 				break;
153 			case 'P':
154 				if (pd)
155 					break;
156 				if ((p = memchr(ps, '\n', psl)) != NULL) {
157 					oldc = *p;
158 					*p = '\0';
159 				}
160 				OUT(ps)
161 				if (p != NULL)
162 					*p = oldc;
163 				break;
164 			case 'q':
165 				if (!nflag && !pd)
166 					OUT(ps)
167 				flush_appends();
168 				exit(0);
169 			case 'r':
170 				if (appendx >= appendnum)
171 					appends = xrealloc(appends,
172 					    sizeof(struct s_appends) *
173 					    (appendnum *= 2));
174 				appends[appendx].type = AP_FILE;
175 				appends[appendx].s = cp->t;
176 				appends[appendx].len = strlen(cp->t);
177 				appendx++;
178 				break;
179 			case 's':
180 				sdone |= substitute(cp);
181 				break;
182 			case 't':
183 				if (sdone) {
184 					sdone = 0;
185 					cp = cp->u.c;
186 					goto redirect;
187 				}
188 				break;
189 			case 'w':
190 				if (pd)
191 					break;
192 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
193 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
194 				    DEFFILEMODE)) == -1)
195 					err(FATAL, "%s: %s\n",
196 					    cp->t, strerror(errno));
197 				if (write(cp->u.fd, ps, psl) != psl)
198 					err(FATAL, "%s: %s\n",
199 					    cp->t, strerror(errno));
200 				break;
201 			case 'x':
202 				if (hs == NULL)
203 					cspace(&HS, "", 0, REPLACE);
204 				tspace = PS;
205 				PS = HS;
206 				HS = tspace;
207 				break;
208 			case 'y':
209 				if (pd)
210 					break;
211 				for (p = ps, len = psl; --len; ++p)
212 					*p = cp->u.y[*p];
213 				break;
214 			case ':':
215 			case '}':
216 				break;
217 			case '=':
218 				(void)printf("%lu\n", linenum);
219 			}
220 			cp = cp->next;
221 		} /* for all cp */
222 
223 new:		if (!nflag && !pd)
224 			OUT(ps)
225 		flush_appends();
226 	} /* for all lines */
227 }
228 
229 /*
230  * TRUE if the address passed matches the current program state
231  * (lastline, linenumber, ps).
232  */
233 #define	MATCH(a)						\
234 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
235 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
236 
237 /*
238  * Return TRUE if the command applies to the current line.  Sets the inrange
239  * flag to process ranges.  Interprets the non-select (``!'') flag.
240  */
241 static inline int
242 applies(cp)
243 	struct s_command *cp;
244 {
245 	int r;
246 
247 	lastaddr = 0;
248 	if (cp->a1 == NULL && cp->a2 == NULL)
249 		r = 1;
250 	else if (cp->a2)
251 		if (cp->inrange) {
252 			if (MATCH(cp->a2)) {
253 				cp->inrange = 0;
254 				lastaddr = 1;
255 			}
256 			r = 1;
257 		} else if (MATCH(cp->a1)) {
258 			/*
259 			 * If the second address is a number less than or
260 			 * equal to the line number first selected, only
261 			 * one line shall be selected.
262 			 *	-- POSIX 1003.2
263 			 */
264 			if (cp->a2->type == AT_LINE &&
265 			    linenum >= cp->a2->u.l)
266 				lastaddr = 1;
267 			else
268 				cp->inrange = 1;
269 			r = 1;
270 		} else
271 			r = 0;
272 	else
273 		r = MATCH(cp->a1);
274 	return (cp->nonsel ? ! r : r);
275 }
276 
277 /*
278  * substitute --
279  *	Do substitutions in the pattern space.  Currently, we build a
280  *	copy of the new pattern space in the substitute space structure
281  *	and then swap them.
282  */
283 static int
284 substitute(cp)
285 	struct s_command *cp;
286 {
287 	SPACE tspace;
288 	regex_t *re;
289 	size_t re_off, slen;
290 	int lastempty, n;
291 	char *s;
292 
293 	s = ps;
294 	re = cp->u.s->re;
295 	if (re == NULL) {
296 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
297 			linenum = cp->u.s->linenum;
298 			err(COMPILE, "\\%d not defined in the RE",
299 			    cp->u.s->maxbref);
300 		}
301 	}
302 	if (!regexec_e(re, s, 0, 0, psl))
303 		return (0);
304 
305   	SS.len = 0;				/* Clean substitute space. */
306   	slen = psl;
307   	n = cp->u.s->n;
308 	lastempty = 1;
309 
310   	switch (n) {
311   	case 0:					/* Global */
312   		do {
313 			if (lastempty || match[0].rm_so != match[0].rm_eo) {
314 				/* Locate start of replaced string. */
315 				re_off = match[0].rm_so;
316 				/* Copy leading retained string. */
317 				cspace(&SS, s, re_off, APPEND);
318 				/* Add in regular expression. */
319 				regsub(&SS, s, cp->u.s->new);
320 			}
321 
322   			/* Move past this match. */
323 			if (match[0].rm_so != match[0].rm_eo) {
324 				s += match[0].rm_eo;
325 				slen -= match[0].rm_eo;
326 				lastempty = 0;
327 			} else {
328 				if (match[0].rm_so == 0)
329 					cspace(&SS,
330 					    s, match[0].rm_so + 1, APPEND);
331 				else
332 					cspace(&SS,
333 					    s + match[0].rm_so, 1, APPEND);
334 				s += match[0].rm_so + 1;
335 				slen -= match[0].rm_so + 1;
336 				lastempty = 1;
337 			}
338 		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
339 		/* Copy trailing retained string. */
340 		if (slen > 0)
341 			cspace(&SS, s, slen, APPEND);
342   		break;
343 	default:				/* Nth occurrence */
344 		while (--n) {
345 			s += match[0].rm_eo;
346 			slen -= match[0].rm_eo;
347 			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
348 				return (0);
349 		}
350 		/* FALLTHROUGH */
351 	case 1:					/* 1st occurrence */
352 		/* Locate start of replaced string. */
353 		re_off = match[0].rm_so + (s - ps);
354 		/* Copy leading retained string. */
355 		cspace(&SS, ps, re_off, APPEND);
356 		/* Add in regular expression. */
357 		regsub(&SS, s, cp->u.s->new);
358 		/* Copy trailing retained string. */
359 		s += match[0].rm_eo;
360 		slen -= match[0].rm_eo;
361 		cspace(&SS, s, slen, APPEND);
362 		break;
363 	}
364 
365 	/*
366 	 * Swap the substitute space and the pattern space, and make sure
367 	 * that any leftover pointers into stdio memory get lost.
368 	 */
369 	tspace = PS;
370 	PS = SS;
371 	SS = tspace;
372 	SS.space = SS.back;
373 
374 	/* Handle the 'p' flag. */
375 	if (cp->u.s->p)
376 		OUT(ps)
377 
378 	/* Handle the 'w' flag. */
379 	if (cp->u.s->wfile && !pd) {
380 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
381 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
382 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
383 		if (write(cp->u.s->wfd, ps, psl) != psl)
384 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
385 	}
386 	return (1);
387 }
388 
389 /*
390  * Flush append requests.  Always called before reading a line,
391  * therefore it also resets the substitution done (sdone) flag.
392  */
393 static void
394 flush_appends()
395 {
396 	FILE *f;
397 	int count, i;
398 	char buf[8 * 1024];
399 
400 	for (i = 0; i < appendx; i++)
401 		switch (appends[i].type) {
402 		case AP_STRING:
403 			fwrite(appends[i].s, sizeof(char), appends[i].len,
404 			    stdout);
405 			break;
406 		case AP_FILE:
407 			/*
408 			 * Read files probably shouldn't be cached.  Since
409 			 * it's not an error to read a non-existent file,
410 			 * it's possible that another program is interacting
411 			 * with the sed script through the file system.  It
412 			 * would be truly bizarre, but possible.  It's probably
413 			 * not that big a performance win, anyhow.
414 			 */
415 			if ((f = fopen(appends[i].s, "r")) == NULL)
416 				break;
417 			while (count = fread(buf, sizeof(char), sizeof(buf), f))
418 				(void)fwrite(buf, sizeof(char), count, stdout);
419 			(void)fclose(f);
420 			break;
421 		}
422 	if (ferror(stdout))
423 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
424 	appendx = sdone = 0;
425 }
426 
427 static void
428 lputs(s)
429 	register char *s;
430 {
431 	register int count;
432 	register char *escapes, *p;
433 	struct winsize win;
434 	static int termwidth = -1;
435 
436 	if (termwidth == -1)
437 		if (p = getenv("COLUMNS"))
438 			termwidth = atoi(p);
439 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
440 		    win.ws_col > 0)
441 			termwidth = win.ws_col;
442 		else
443 			termwidth = 60;
444 
445 	for (count = 0; *s; ++s) {
446 		if (count >= termwidth) {
447 			(void)printf("\\\n");
448 			count = 0;
449 		}
450 		if (isascii(*s) && isprint(*s) && *s != '\\') {
451 			(void)putchar(*s);
452 			count++;
453 		} else {
454 			escapes = "\\\a\b\f\n\r\t\v";
455 			(void)putchar('\\');
456 			if (p = strchr(escapes, *s)) {
457 				(void)putchar("\\abfnrtv"[p - escapes]);
458 				count += 2;
459 			} else {
460 				(void)printf("%03o", *(u_char *)s);
461 				count += 4;
462 			}
463 		}
464 	}
465 	(void)putchar('$');
466 	(void)putchar('\n');
467 	if (ferror(stdout))
468 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
469 }
470 
471 static inline int
472 regexec_e(preg, string, eflags, nomatch, slen)
473 	regex_t *preg;
474 	const char *string;
475 	int eflags, nomatch;
476 	size_t slen;
477 {
478 	int eval;
479 
480 	if (preg == NULL) {
481 		if (defpreg == NULL)
482 			err(FATAL, "first RE may not be empty");
483 	} else
484 		defpreg = preg;
485 
486 	/* Set anchors, discounting trailing newline (if any). */
487 	if (slen > 0 && string[slen - 1] == '\n')
488 		slen--;
489 	match[0].rm_so = 0;
490 	match[0].rm_eo = slen;
491 
492 	eval = regexec(defpreg, string,
493 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
494 	switch(eval) {
495 	case 0:
496 		return (1);
497 	case REG_NOMATCH:
498 		return (0);
499 	}
500 	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
501 	/* NOTREACHED */
502 }
503 
504 /*
505  * regsub - perform substitutions after a regexp match
506  * Based on a routine by Henry Spencer
507  */
508 static void
509 regsub(sp, string, src)
510 	SPACE *sp;
511 	char *string, *src;
512 {
513 	register int len, no;
514 	register char c, *dst;
515 
516 #define	NEEDSP(reqlen)							\
517 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
518 		sp->blen += (reqlen) + 1024;				\
519 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
520 		dst = sp->space + sp->len;				\
521 	}
522 
523 	dst = sp->space + sp->len;
524 	while ((c = *src++) != '\0') {
525 		if (c == '&')
526 			no = 0;
527 		else if (c == '\\' && isdigit(*src))
528 			no = *src++ - '0';
529 		else
530 			no = -1;
531 		if (no < 0) {		/* Ordinary character. */
532  			if (c == '\\' && (*src == '\\' || *src == '&'))
533  				c = *src++;
534 			NEEDSP(1);
535  			*dst++ = c;
536 			++sp->len;
537  		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
538 			len = match[no].rm_eo - match[no].rm_so;
539 			NEEDSP(len);
540 			memmove(dst, string + match[no].rm_so, len);
541 			dst += len;
542 			sp->len += len;
543 		}
544 	}
545 	NEEDSP(1);
546 	*dst = '\0';
547 }
548 
549 /*
550  * aspace --
551  *	Append the source space to the destination space, allocating new
552  *	space as necessary.
553  */
554 void
555 cspace(sp, p, len, spflag)
556 	SPACE *sp;
557 	char *p;
558 	size_t len;
559 	enum e_spflag spflag;
560 {
561 	size_t tlen;
562 
563 	/* Make sure SPACE has enough memory and ramp up quickly. */
564 	tlen = sp->len + len + 1;
565 	if (tlen > sp->blen) {
566 		sp->blen = tlen + 1024;
567 		sp->space = sp->back = xrealloc(sp->back, sp->blen);
568 	}
569 
570 	if (spflag == REPLACE)
571 		sp->len = 0;
572 
573 	memmove(sp->space + sp->len, p, len);
574 
575 	sp->space[sp->len += len] = '\0';
576 }
577 
578 /*
579  * Close all cached opened files and report any errors
580  */
581 void
582 cfclose(cp, end)
583 	register struct s_command *cp, *end;
584 {
585 
586 	for (; cp != end; cp = cp->next)
587 		switch(cp->code) {
588 		case 's':
589 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
590 				err(FATAL,
591 				    "%s: %s", cp->u.s->wfile, strerror(errno));
592 			cp->u.s->wfd = -1;
593 			break;
594 		case 'w':
595 			if (cp->u.fd != -1 && close(cp->u.fd))
596 				err(FATAL, "%s: %s", cp->t, strerror(errno));
597 			cp->u.fd = -1;
598 			break;
599 		case '{':
600 			cfclose(cp->u.c, cp->next);
601 			break;
602 		}
603 }
604