xref: /openbsd/usr.bin/sed/process.c (revision 4ad58405)
1 /*	$OpenBSD: process.c,v 1.35 2022/01/12 15:13:36 martijn Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Diomidis Spinellis.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis of Imperial College, University of London.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 
40 #include <ctype.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <regex.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "defs.h"
51 #include "extern.h"
52 
53 static SPACE HS, PS, SS;
54 #define	pd		PS.deleted
55 #define	ps		PS.space
56 #define	psl		PS.len
57 #define	psanl		PS.append_newline
58 #define	hs		HS.space
59 #define	hsl		HS.len
60 
61 static inline int	 applies(struct s_command *);
62 static void		 flush_appends(void);
63 static void		 lputs(char *, size_t);
64 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65 			     size_t);
66 static void		 regsub(SPACE *, char *, char *);
67 static int		 substitute(struct s_command *);
68 
69 struct s_appends *appends;	/* Array of pointers to strings to append. */
70 static size_t appendx;		/* Index into appends array. */
71 size_t appendnum;		/* Size of appends array. */
72 
73 static int lastaddr;		/* Set by applies if last address of a range. */
74 static int sdone;		/* If any substitutes since last line input. */
75 				/* Iov structure for 'w' commands. */
76 static regex_t *defpreg;
77 size_t maxnsub;
78 regmatch_t *match;
79 
80 #define OUT() do {\
81 	fwrite(ps, 1, psl, outfile);\
82 	if (psanl) fputc('\n', outfile);\
83 } while (0)
84 
85 void
process(void)86 process(void)
87 {
88 	struct s_command *cp;
89 	SPACE tspace;
90 	size_t len, oldpsl;
91 	char *p;
92 
93 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
94 		pd = 0;
95 top:
96 		cp = prog;
97 redirect:
98 		while (cp != NULL) {
99 			if (!applies(cp)) {
100 				cp = cp->next;
101 				continue;
102 			}
103 			switch (cp->code) {
104 			case '{':
105 				cp = cp->u.c;
106 				goto redirect;
107 			case 'a':
108 				if (appendx >= appendnum) {
109 					appends = xreallocarray(appends,
110 					    appendnum,
111 					    2 * sizeof(struct s_appends));
112 					appendnum *= 2;
113 				}
114 				appends[appendx].type = AP_STRING;
115 				appends[appendx].s = cp->t;
116 				appends[appendx].len = strlen(cp->t);
117 				appendx++;
118 				break;
119 			case 'b':
120 				cp = cp->u.c;
121 				goto redirect;
122 			case 'c':
123 				pd = 1;
124 				psl = 0;
125 				if (cp->a2 == NULL || lastaddr || lastline())
126 					(void)fprintf(outfile, "%s", cp->t);
127 				break;
128 			case 'd':
129 				pd = 1;
130 				goto new;
131 			case 'D':
132 				if (pd)
133 					goto new;
134 				if (psl == 0 ||
135 				    (p = memchr(ps, '\n', psl)) == NULL) {
136 					pd = 1;
137 					goto new;
138 				} else {
139 					psl -= (p + 1) - ps;
140 					memmove(ps, p + 1, psl);
141 					goto top;
142 				}
143 			case 'g':
144 				cspace(&PS, hs, hsl, REPLACE);
145 				break;
146 			case 'G':
147 				cspace(&PS, "\n", 1, 0);
148 				cspace(&PS, hs, hsl, 0);
149 				break;
150 			case 'h':
151 				cspace(&HS, ps, psl, REPLACE);
152 				break;
153 			case 'H':
154 				cspace(&HS, "\n", 1, 0);
155 				cspace(&HS, ps, psl, 0);
156 				break;
157 			case 'i':
158 				(void)fprintf(outfile, "%s", cp->t);
159 				break;
160 			case 'l':
161 				lputs(ps, psl);
162 				break;
163 			case 'n':
164 				if (!nflag && !pd)
165 					OUT();
166 				flush_appends();
167 				if (!mf_fgets(&PS, REPLACE))
168 					exit(0);
169 				pd = 0;
170 				break;
171 			case 'N':
172 				flush_appends();
173 				cspace(&PS, "\n", 1, 0);
174 				if (!mf_fgets(&PS, 0))
175 					exit(0);
176 				break;
177 			case 'p':
178 				if (pd)
179 					break;
180 				OUT();
181 				break;
182 			case 'P':
183 				if (pd)
184 					break;
185 				if ((p = memchr(ps, '\n', psl)) != NULL) {
186 					oldpsl = psl;
187 					psl = p - ps;
188 					psanl = 1;
189 					OUT();
190 					psl = oldpsl;
191 				} else {
192 					OUT();
193 				}
194 				break;
195 			case 'q':
196 				if (!nflag && !pd)
197 					OUT();
198 				flush_appends();
199 				finish_file();
200 				exit(0);
201 			case 'r':
202 				if (appendx >= appendnum) {
203 					appends = xreallocarray(appends,
204 					    appendnum,
205 					    2 * sizeof(struct s_appends));
206 					appendnum *= 2;
207 				}
208 				appends[appendx].type = AP_FILE;
209 				appends[appendx].s = cp->t;
210 				appends[appendx].len = strlen(cp->t);
211 				appendx++;
212 				break;
213 			case 's':
214 				sdone |= substitute(cp);
215 				break;
216 			case 't':
217 				if (sdone) {
218 					sdone = 0;
219 					cp = cp->u.c;
220 					goto redirect;
221 				}
222 				break;
223 			case 'w':
224 				if (pd)
225 					break;
226 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
227 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
228 				    DEFFILEMODE)) == -1)
229 					error(FATAL, "%s: %s",
230 					    cp->t, strerror(errno));
231 				if ((size_t)write(cp->u.fd, ps, psl) != psl ||
232 				    write(cp->u.fd, "\n", 1) != 1)
233 					error(FATAL, "%s: %s",
234 					    cp->t, strerror(errno));
235 				break;
236 			case 'x':
237 				if (hs == NULL)
238 					cspace(&HS, "", 0, REPLACE);
239 				tspace = PS;
240 				PS = HS;
241 				psanl = tspace.append_newline;
242 				HS = tspace;
243 				break;
244 			case 'y':
245 				if (pd || psl == 0)
246 					break;
247 				for (p = ps, len = psl; len--; ++p)
248 					*p = cp->u.y[(unsigned char)*p];
249 				break;
250 			case ':':
251 			case '}':
252 				break;
253 			case '=':
254 				(void)fprintf(outfile, "%lu\n", linenum);
255 			}
256 			cp = cp->next;
257 		} /* for all cp */
258 
259 new:		if (!nflag && !pd)
260 			OUT();
261 		flush_appends();
262 	} /* for all lines */
263 }
264 
265 /*
266  * TRUE if the address passed matches the current program state
267  * (lastline, linenumber, ps).
268  */
269 #define	MATCH(a)						\
270 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
271 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
272 
273 /*
274  * Return TRUE if the command applies to the current line.  Sets the inrange
275  * flag to process ranges.  Interprets the non-select (``!'') flag.
276  */
277 static inline int
applies(struct s_command * cp)278 applies(struct s_command *cp)
279 {
280 	int r;
281 
282 	lastaddr = 0;
283 	if (cp->a1 == NULL && cp->a2 == NULL)
284 		r = 1;
285 	else if (cp->a2)
286 		if (cp->inrange) {
287 			if (MATCH(cp->a2)) {
288 				cp->inrange = 0;
289 				lastaddr = 1;
290 			}
291 			r = 1;
292 		} else if (MATCH(cp->a1)) {
293 			/*
294 			 * If the second address is a number less than or
295 			 * equal to the line number first selected, only
296 			 * one line shall be selected.
297 			 *	-- POSIX 1003.2
298 			 */
299 			if (cp->a2->type == AT_LINE &&
300 			    linenum >= cp->a2->u.l)
301 				lastaddr = 1;
302 			else
303 				cp->inrange = 1;
304 			r = 1;
305 		} else
306 			r = 0;
307 	else
308 		r = MATCH(cp->a1);
309 	return (cp->nonsel ? !r : r);
310 }
311 
312 /*
313  * Reset all inrange markers.
314  */
315 void
resetstate(void)316 resetstate(void)
317 {
318 	struct s_command *cp;
319 
320 	free(HS.back);
321 	memset(&HS, 0, sizeof(HS));
322 
323 	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
324 		if (cp->a2)
325 			cp->inrange = 0;
326 }
327 
328 /*
329  * substitute --
330  *	Do substitutions in the pattern space.  Currently, we build a
331  *	copy of the new pattern space in the substitute space structure
332  *	and then swap them.
333  */
334 static int
substitute(struct s_command * cp)335 substitute(struct s_command *cp)
336 {
337 	SPACE tspace;
338 	regex_t *re;
339 	regoff_t slen;
340 	int n, lastempty;
341 	regoff_t le = 0;
342 	char *s;
343 
344 	s = ps;
345 	re = cp->u.s->re;
346 	if (re == NULL) {
347 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
348 			linenum = cp->u.s->linenum;
349 			error(COMPILE, "\\%d not defined in the RE",
350 			    cp->u.s->maxbref);
351 		}
352 	}
353 	if (!regexec_e(re, ps, 0, 0, 0, psl))
354 		return (0);
355 
356 	SS.len = 0;				/* Clean substitute space. */
357 	slen = psl;
358 	n = cp->u.s->n;
359 	lastempty = 1;
360 
361 	do {
362 		/* Copy the leading retained string. */
363 		if (n <= 1 && (match[0].rm_so > le))
364 			cspace(&SS, s, match[0].rm_so - le, APPEND);
365 
366 		/* Skip zero-length matches right after other matches. */
367 		if (lastempty || (match[0].rm_so - le) ||
368 		    match[0].rm_so != match[0].rm_eo) {
369 			if (n <= 1) {
370 				/* Want this match: append replacement. */
371 				regsub(&SS, ps, cp->u.s->new);
372 				if (n == 1)
373 					n = -1;
374 			} else {
375 				/* Want a later match: append original. */
376 				if (match[0].rm_eo - le)
377 					cspace(&SS, s, match[0].rm_eo - le,
378 					    APPEND);
379 				n--;
380 			}
381 		}
382 
383 		/* Move past this match. */
384 		s = ps + match[0].rm_eo;
385 		slen = psl - match[0].rm_eo;
386 		le = match[0].rm_eo;
387 
388 		/*
389 		 * After a zero-length match, advance one byte,
390 		 * and at the end of the line, terminate.
391 		 */
392 		if (match[0].rm_so == match[0].rm_eo) {
393 			if (*s == '\0' || *s == '\n')
394 				slen = -1;
395 			else
396 				slen--;
397 			if (*s != '\0') {
398 				cspace(&SS, s++, 1, APPEND);
399 				le++;
400 			}
401 			lastempty = 1;
402 		} else
403 			lastempty = 0;
404 
405 	} while (n >= 0 && slen >= 0 &&
406 	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
407 
408 	/* Did not find the requested number of matches. */
409 	if (n > 0)
410 		return (0);
411 
412 	/* Copy the trailing retained string. */
413 	if (slen > 0)
414 		cspace(&SS, s, slen, APPEND);
415 
416 	/*
417 	 * Swap the substitute space and the pattern space, and make sure
418 	 * that any leftover pointers into stdio memory get lost.
419 	 */
420 	tspace = PS;
421 	PS = SS;
422 	psanl = tspace.append_newline;
423 	SS = tspace;
424 	SS.space = SS.back;
425 
426 	/* Handle the 'p' flag. */
427 	if (cp->u.s->p)
428 		OUT();
429 
430 	/* Handle the 'w' flag. */
431 	if (cp->u.s->wfile && !pd) {
432 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
433 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
434 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
435 		if ((size_t)write(cp->u.s->wfd, ps, psl) != psl ||
436 		    write(cp->u.s->wfd, "\n", 1) != 1)
437 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
438 	}
439 	return (1);
440 }
441 
442 /*
443  * Flush append requests.  Always called before reading a line,
444  * therefore it also resets the substitution done (sdone) flag.
445  */
446 static void
flush_appends(void)447 flush_appends(void)
448 {
449 	FILE *f;
450 	size_t count, idx;
451 	char buf[8 * 1024];
452 
453 	for (idx = 0; idx < appendx; idx++)
454 		switch (appends[idx].type) {
455 		case AP_STRING:
456 			fwrite(appends[idx].s, sizeof(char), appends[idx].len,
457 			    outfile);
458 			break;
459 		case AP_FILE:
460 			/*
461 			 * Read files probably shouldn't be cached.  Since
462 			 * it's not an error to read a non-existent file,
463 			 * it's possible that another program is interacting
464 			 * with the sed script through the file system.  It
465 			 * would be truly bizarre, but possible.  It's probably
466 			 * not that big a performance win, anyhow.
467 			 */
468 			if ((f = fopen(appends[idx].s, "r")) == NULL)
469 				break;
470 			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
471 				(void)fwrite(buf, sizeof(char), count, outfile);
472 			(void)fclose(f);
473 			break;
474 		}
475 	if (ferror(outfile))
476 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
477 	appendx = sdone = 0;
478 }
479 
480 static void
lputs(char * s,size_t len)481 lputs(char *s, size_t len)
482 {
483 	int count;
484 	extern int termwidth;
485 	const char *escapes;
486 	char *p;
487 
488 	for (count = 0; len > 0; len--, s++) {
489 		if (count >= termwidth) {
490 			(void)fprintf(outfile, "\\\n");
491 			count = 0;
492 		}
493 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
494 		    && *s != '\\') {
495 			(void)fputc(*s, outfile);
496 			count++;
497 		} else if (*s == '\n') {
498 			(void)fputc('$', outfile);
499 			(void)fputc('\n', outfile);
500 			count = 0;
501 		} else {
502 			escapes = "\\\a\b\f\r\t\v";
503 			(void)fputc('\\', outfile);
504 			if ((p = strchr(escapes, *s)) && *s != '\0') {
505 				(void)fputc("\\abfrtv"[p - escapes], outfile);
506 				count += 2;
507 			} else {
508 				(void)fprintf(outfile, "%03o", *(u_char *)s);
509 				count += 4;
510 			}
511 		}
512 	}
513 	(void)fputc('$', outfile);
514 	(void)fputc('\n', outfile);
515 	if (ferror(outfile))
516 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
517 }
518 
519 static inline int
regexec_e(regex_t * preg,const char * string,int eflags,int nomatch,size_t start,size_t stop)520 regexec_e(regex_t *preg, const char *string, int eflags,
521     int nomatch, size_t start, size_t stop)
522 {
523 	int eval;
524 
525 	if (preg == NULL) {
526 		if (defpreg == NULL)
527 			error(FATAL, "first RE may not be empty");
528 	} else
529 		defpreg = preg;
530 
531 	/* Set anchors */
532 	match[0].rm_so = start;
533 	match[0].rm_eo = stop;
534 
535 	eval = regexec(defpreg, string,
536 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
537 	switch (eval) {
538 	case 0:
539 		return (1);
540 	case REG_NOMATCH:
541 		return (0);
542 	}
543 	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
544 }
545 
546 /*
547  * regsub - perform substitutions after a regexp match
548  * Based on a routine by Henry Spencer
549  */
550 static void
regsub(SPACE * sp,char * string,char * src)551 regsub(SPACE *sp, char *string, char *src)
552 {
553 	int len, no;
554 	char c, *dst;
555 
556 #define	NEEDSP(reqlen)							\
557 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
558 		size_t newlen = sp->blen + (reqlen) + 1024;		\
559 		sp->space = sp->back = xrealloc(sp->back, newlen);	\
560 		sp->blen = newlen;					\
561 		dst = sp->space + sp->len;				\
562 	}
563 
564 	dst = sp->space + sp->len;
565 	while ((c = *src++) != '\0') {
566 		if (c == '&')
567 			no = 0;
568 		else if (c == '\\' && isdigit((unsigned char)*src))
569 			no = *src++ - '0';
570 		else
571 			no = -1;
572 		if (no < 0) {		/* Ordinary character. */
573 			if (c == '\\' && (*src == '\\' || *src == '&'))
574 				c = *src++;
575 			NEEDSP(1);
576 			*dst++ = c;
577 			++sp->len;
578 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
579 			len = match[no].rm_eo - match[no].rm_so;
580 			NEEDSP(len);
581 			memmove(dst, string + match[no].rm_so, len);
582 			dst += len;
583 			sp->len += len;
584 		}
585 	}
586 	NEEDSP(1);
587 	*dst = '\0';
588 }
589 
590 /*
591  * aspace --
592  *	Append the source space to the destination space, allocating new
593  *	space as necessary.
594  */
595 void
cspace(SPACE * sp,const char * p,size_t len,enum e_spflag spflag)596 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
597 {
598 	size_t tlen;
599 
600 	/* Make sure SPACE has enough memory and ramp up quickly. */
601 	tlen = sp->len + len + 1;
602 	if (tlen > sp->blen) {
603 		size_t newlen = tlen + 1024;
604 		sp->space = sp->back = xrealloc(sp->back, newlen);
605 		sp->blen = newlen;
606 	}
607 
608 	if (spflag == REPLACE)
609 		sp->len = 0;
610 
611 	memmove(sp->space + sp->len, p, len);
612 
613 	sp->space[sp->len += len] = '\0';
614 }
615 
616 /*
617  * Close all cached opened files and report any errors
618  */
619 void
cfclose(struct s_command * cp,struct s_command * end)620 cfclose(struct s_command *cp, struct s_command *end)
621 {
622 
623 	for (; cp != end; cp = cp->next)
624 		switch (cp->code) {
625 		case 's':
626 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
627 				error(FATAL,
628 				    "%s: %s", cp->u.s->wfile, strerror(errno));
629 			cp->u.s->wfd = -1;
630 			break;
631 		case 'w':
632 			if (cp->u.fd != -1 && close(cp->u.fd))
633 				error(FATAL, "%s: %s", cp->t, strerror(errno));
634 			cp->u.fd = -1;
635 			break;
636 		case '{':
637 			cfclose(cp->u.c, cp->next);
638 			break;
639 		}
640 }
641