xref: /openbsd/usr.bin/sed/process.c (revision 8a7444b3)
1 /*	$OpenBSD: process.c,v 1.39 2024/12/10 23:49:55 millert Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Diomidis Spinellis.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis of Imperial College, University of London.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <regex.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "defs.h"
51 #include "extern.h"
52 
53 static SPACE HS, PS, SS;
54 #define	pd		PS.deleted
55 #define	ps		PS.space
56 #define	psl		PS.len
57 #define	psanl		PS.append_newline
58 #define	hs		HS.space
59 #define	hsl		HS.len
60 
61 static inline int	 applies(struct s_command *);
62 static void		 flush_appends(void);
63 static void		 lputs(char *, size_t);
64 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65 			     size_t);
66 static void		 regsub(SPACE *, char *, char *);
67 static int		 substitute(struct s_command *);
68 
69 struct s_appends *appends;	/* Array of pointers to strings to append. */
70 static size_t appendx;		/* Index into appends array. */
71 size_t appendnum;		/* Size of appends array. */
72 
73 static int lastaddr;		/* Set by applies if last address of a range. */
74 static int sdone;		/* If any substitutes since last line input. */
75 				/* Iov structure for 'w' commands. */
76 static regex_t *defpreg;
77 size_t maxnsub;
78 regmatch_t *match;
79 
80 #define OUT() do {\
81 	fwrite(ps, 1, psl, outfile);\
82 	if (psanl) fputc('\n', outfile);\
83 } while (0)
84 
85 void
process(void)86 process(void)
87 {
88 	struct s_command *cp;
89 	SPACE tspace;
90 	size_t len, oldpsl;
91 	char *p;
92 
93 	for (linenum = 0; mf_getline(&PS, REPLACE);) {
94 		pd = 0;
95 top:
96 		cp = prog;
97 redirect:
98 		while (cp != NULL) {
99 			if (!applies(cp)) {
100 				cp = cp->next;
101 				continue;
102 			}
103 			switch (cp->code) {
104 			case '{':
105 				cp = cp->u.c;
106 				goto redirect;
107 			case 'a':
108 				if (appendx >= appendnum) {
109 					appends = xreallocarray(appends,
110 					    appendnum,
111 					    2 * sizeof(struct s_appends));
112 					appendnum *= 2;
113 				}
114 				appends[appendx].type = AP_STRING;
115 				appends[appendx].s = cp->t;
116 				appends[appendx].len = strlen(cp->t);
117 				appendx++;
118 				break;
119 			case 'b':
120 				cp = cp->u.c;
121 				goto redirect;
122 			case 'c':
123 				pd = 1;
124 				psl = 0;
125 				if (cp->a2 == NULL || lastaddr || lastline())
126 					(void)fprintf(outfile, "%s", cp->t);
127 				goto new;
128 			case 'd':
129 				pd = 1;
130 				goto new;
131 			case 'D':
132 				if (pd)
133 					goto new;
134 				if (psl == 0 ||
135 				    (p = memchr(ps, '\n', psl)) == NULL) {
136 					pd = 1;
137 					goto new;
138 				} else {
139 					psl -= (p + 1) - ps;
140 					memmove(ps, p + 1, psl);
141 					goto top;
142 				}
143 			case 'g':
144 				cspace(&PS, hs, hsl, REPLACE);
145 				break;
146 			case 'G':
147 				cspace(&PS, "\n", 1, 0);
148 				cspace(&PS, hs, hsl, 0);
149 				break;
150 			case 'h':
151 				cspace(&HS, ps, psl, REPLACE);
152 				break;
153 			case 'H':
154 				cspace(&HS, "\n", 1, 0);
155 				cspace(&HS, ps, psl, 0);
156 				break;
157 			case 'i':
158 				(void)fprintf(outfile, "%s", cp->t);
159 				break;
160 			case 'l':
161 				lputs(ps, psl);
162 				break;
163 			case 'n':
164 				if (!nflag && !pd)
165 					OUT();
166 				flush_appends();
167 				if (!mf_getline(&PS, REPLACE))
168 					exit(0);
169 				pd = 0;
170 				break;
171 			case 'N':
172 				flush_appends();
173 				cspace(&PS, "\n", 1, 0);
174 				if (!mf_getline(&PS, 0))
175 					exit(0);
176 				break;
177 			case 'p':
178 				if (pd)
179 					break;
180 				OUT();
181 				break;
182 			case 'P':
183 				if (pd)
184 					break;
185 				if ((p = memchr(ps, '\n', psl)) != NULL) {
186 					oldpsl = psl;
187 					psl = p - ps;
188 					psanl = 1;
189 					OUT();
190 					psl = oldpsl;
191 				} else {
192 					OUT();
193 				}
194 				break;
195 			case 'q':
196 				if (!nflag && !pd)
197 					OUT();
198 				flush_appends();
199 				finish_file();
200 				exit(0);
201 			case 'r':
202 				if (appendx >= appendnum) {
203 					appends = xreallocarray(appends,
204 					    appendnum,
205 					    2 * sizeof(struct s_appends));
206 					appendnum *= 2;
207 				}
208 				appends[appendx].type = AP_FILE;
209 				appends[appendx].s = cp->t;
210 				appends[appendx].len = strlen(cp->t);
211 				appendx++;
212 				break;
213 			case 's':
214 				sdone |= substitute(cp);
215 				break;
216 			case 't':
217 				if (sdone) {
218 					sdone = 0;
219 					cp = cp->u.c;
220 					goto redirect;
221 				}
222 				break;
223 			case 'w':
224 				if (pd)
225 					break;
226 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
227 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
228 				    DEFFILEMODE)) == -1)
229 					err(1, "%s", cp->t);
230 				if ((size_t)write(cp->u.fd, ps, psl) != psl ||
231 				    write(cp->u.fd, "\n", 1) != 1)
232 					err(1, "%s", cp->t);
233 				break;
234 			case 'x':
235 				if (hs == NULL)
236 					cspace(&HS, "", 0, REPLACE);
237 				tspace = PS;
238 				PS = HS;
239 				psanl = tspace.append_newline;
240 				HS = tspace;
241 				break;
242 			case 'y':
243 				if (pd || psl == 0)
244 					break;
245 				for (p = ps, len = psl; len--; ++p)
246 					*p = cp->u.y[(unsigned char)*p];
247 				break;
248 			case ':':
249 			case '}':
250 				break;
251 			case '=':
252 				(void)fprintf(outfile, "%lu\n", linenum);
253 			}
254 			cp = cp->next;
255 		} /* for all cp */
256 
257 new:		if (!nflag && !pd)
258 			OUT();
259 		flush_appends();
260 	} /* for all lines */
261 }
262 
263 /*
264  * TRUE if the address passed matches the current program state
265  * (lastline, linenumber, ps).
266  */
267 #define	MATCH(a)						\
268 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
269 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
270 
271 /*
272  * Return TRUE if the command applies to the current line.  Sets the inrange
273  * flag to process ranges.  Interprets the non-select (``!'') flag.
274  */
275 static inline int
applies(struct s_command * cp)276 applies(struct s_command *cp)
277 {
278 	int r;
279 
280 	lastaddr = 0;
281 	if (cp->a1 == NULL && cp->a2 == NULL)
282 		r = 1;
283 	else if (cp->a2)
284 		if (cp->inrange) {
285 			if (MATCH(cp->a2)) {
286 				cp->inrange = 0;
287 				lastaddr = 1;
288 			}
289 			r = 1;
290 		} else if (MATCH(cp->a1)) {
291 			/*
292 			 * If the second address is a number less than or
293 			 * equal to the line number first selected, only
294 			 * one line shall be selected.
295 			 *	-- POSIX 1003.2
296 			 */
297 			if (cp->a2->type == AT_LINE &&
298 			    linenum >= cp->a2->u.l)
299 				lastaddr = 1;
300 			else
301 				cp->inrange = 1;
302 			r = 1;
303 		} else
304 			r = 0;
305 	else
306 		r = MATCH(cp->a1);
307 	return (cp->nonsel ? !r : r);
308 }
309 
310 /*
311  * Reset all inrange markers.
312  */
313 void
resetstate(void)314 resetstate(void)
315 {
316 	struct s_command *cp;
317 
318 	free(HS.back);
319 	memset(&HS, 0, sizeof(HS));
320 
321 	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
322 		if (cp->a2)
323 			cp->inrange = 0;
324 }
325 
326 /*
327  * substitute --
328  *	Do substitutions in the pattern space.  Currently, we build a
329  *	copy of the new pattern space in the substitute space structure
330  *	and then swap them.
331  */
332 static int
substitute(struct s_command * cp)333 substitute(struct s_command *cp)
334 {
335 	SPACE tspace;
336 	regex_t *re;
337 	regoff_t slen;
338 	int n, lastempty;
339 	regoff_t le = 0;
340 	char *s;
341 
342 	s = ps;
343 	re = cp->u.s->re;
344 	if (re == NULL) {
345 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
346 			linenum = cp->u.s->linenum;
347 			error("\\%d not defined in the RE", cp->u.s->maxbref);
348 		}
349 	}
350 	if (!regexec_e(re, ps, 0, 0, 0, psl))
351 		return (0);
352 
353 	SS.len = 0;				/* Clean substitute space. */
354 	slen = psl;
355 	n = cp->u.s->n;
356 	lastempty = 1;
357 
358 	do {
359 		/* Copy the leading retained string. */
360 		if (n <= 1 && (match[0].rm_so > le))
361 			cspace(&SS, s, match[0].rm_so - le, APPEND);
362 
363 		/* Skip zero-length matches right after other matches. */
364 		if (lastempty || (match[0].rm_so - le) ||
365 		    match[0].rm_so != match[0].rm_eo) {
366 			if (n <= 1) {
367 				/* Want this match: append replacement. */
368 				regsub(&SS, ps, cp->u.s->new);
369 				if (n == 1)
370 					n = -1;
371 			} else {
372 				/* Want a later match: append original. */
373 				if (match[0].rm_eo - le)
374 					cspace(&SS, s, match[0].rm_eo - le,
375 					    APPEND);
376 				n--;
377 			}
378 		}
379 
380 		/* Move past this match. */
381 		s = ps + match[0].rm_eo;
382 		slen = psl - match[0].rm_eo;
383 		le = match[0].rm_eo;
384 
385 		/*
386 		 * After a zero-length match, advance one byte,
387 		 * and at the end of the line, terminate.
388 		 */
389 		if (match[0].rm_so == match[0].rm_eo) {
390 			if (slen > 0) {
391 				cspace(&SS, s++, 1, APPEND);
392 				slen--;
393 				le++;
394 			} else
395 				slen = -1;
396 			lastempty = 1;
397 		} else
398 			lastempty = 0;
399 
400 	} while (n >= 0 && slen >= 0 &&
401 	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
402 
403 	/* Did not find the requested number of matches. */
404 	if (n > 0)
405 		return (0);
406 
407 	/* Copy the trailing retained string. */
408 	if (slen > 0)
409 		cspace(&SS, s, slen, APPEND);
410 
411 	/*
412 	 * Swap the substitute space and the pattern space, and make sure
413 	 * that any leftover pointers into stdio memory get lost.
414 	 */
415 	tspace = PS;
416 	PS = SS;
417 	psanl = tspace.append_newline;
418 	SS = tspace;
419 	SS.space = SS.back;
420 
421 	/* Handle the 'p' flag. */
422 	if (cp->u.s->p)
423 		OUT();
424 
425 	/* Handle the 'w' flag. */
426 	if (cp->u.s->wfile && !pd) {
427 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
428 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
429 			err(1, "%s", cp->u.s->wfile);
430 		if ((size_t)write(cp->u.s->wfd, ps, psl) != psl ||
431 		    write(cp->u.s->wfd, "\n", 1) != 1)
432 			err(1, "%s", cp->u.s->wfile);
433 	}
434 	return (1);
435 }
436 
437 /*
438  * Flush append requests.  Always called before reading a line,
439  * therefore it also resets the substitution done (sdone) flag.
440  */
441 static void
flush_appends(void)442 flush_appends(void)
443 {
444 	FILE *f;
445 	size_t count, idx;
446 	char buf[8 * 1024];
447 
448 	for (idx = 0; idx < appendx; idx++)
449 		switch (appends[idx].type) {
450 		case AP_STRING:
451 			fwrite(appends[idx].s, sizeof(char), appends[idx].len,
452 			    outfile);
453 			break;
454 		case AP_FILE:
455 			/*
456 			 * Read files probably shouldn't be cached.  Since
457 			 * it's not an error to read a non-existent file,
458 			 * it's possible that another program is interacting
459 			 * with the sed script through the file system.  It
460 			 * would be truly bizarre, but possible.  It's probably
461 			 * not that big a performance win, anyhow.
462 			 */
463 			if ((f = fopen(appends[idx].s, "r")) == NULL)
464 				break;
465 			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
466 				(void)fwrite(buf, sizeof(char), count, outfile);
467 			(void)fclose(f);
468 			break;
469 		}
470 	if (ferror(outfile))
471 		err(1, "%s", outfname);
472 	appendx = sdone = 0;
473 }
474 
475 static void
lputs(char * s,size_t len)476 lputs(char *s, size_t len)
477 {
478 	int count;
479 	extern int termwidth;
480 	const char *escapes;
481 	char *p;
482 
483 	for (count = 0; len > 0; len--, s++) {
484 		if (count >= termwidth) {
485 			(void)fprintf(outfile, "\\\n");
486 			count = 0;
487 		}
488 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
489 		    && *s != '\\') {
490 			(void)fputc(*s, outfile);
491 			count++;
492 		} else if (*s == '\n') {
493 			(void)fputc('$', outfile);
494 			(void)fputc('\n', outfile);
495 			count = 0;
496 		} else {
497 			escapes = "\\\a\b\f\r\t\v";
498 			(void)fputc('\\', outfile);
499 			if ((p = strchr(escapes, *s)) && *s != '\0') {
500 				(void)fputc("\\abfrtv"[p - escapes], outfile);
501 				count += 2;
502 			} else {
503 				(void)fprintf(outfile, "%03o", *(u_char *)s);
504 				count += 4;
505 			}
506 		}
507 	}
508 	(void)fputc('$', outfile);
509 	(void)fputc('\n', outfile);
510 	if (ferror(outfile))
511 		err(1, "%s", outfname);
512 }
513 
514 static inline int
regexec_e(regex_t * preg,const char * string,int eflags,int nomatch,size_t start,size_t stop)515 regexec_e(regex_t *preg, const char *string, int eflags,
516     int nomatch, size_t start, size_t stop)
517 {
518 	int eval;
519 
520 	if (preg == NULL) {
521 		if (defpreg == NULL)
522 			errx(1, "first RE may not be empty");
523 	} else
524 		defpreg = preg;
525 
526 	/* Set anchors */
527 	match[0].rm_so = start;
528 	match[0].rm_eo = stop;
529 
530 	eval = regexec(defpreg, string,
531 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
532 	switch (eval) {
533 	case 0:
534 		return (1);
535 	case REG_NOMATCH:
536 		return (0);
537 	}
538 	errx(1, "RE error: %s", strregerror(eval, defpreg));
539 }
540 
541 /*
542  * regsub - perform substitutions after a regexp match
543  * Based on a routine by Henry Spencer
544  */
545 static void
regsub(SPACE * sp,char * string,char * src)546 regsub(SPACE *sp, char *string, char *src)
547 {
548 	int len, no;
549 	char c, *dst;
550 
551 #define	NEEDSP(reqlen)							\
552 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
553 		size_t newlen = sp->blen + (reqlen) + 1024;		\
554 		sp->space = sp->back = xrealloc(sp->back, newlen);	\
555 		sp->blen = newlen;					\
556 		dst = sp->space + sp->len;				\
557 	}
558 
559 	dst = sp->space + sp->len;
560 	while ((c = *src++) != '\0') {
561 		if (c == '&')
562 			no = 0;
563 		else if (c == '\\' && isdigit((unsigned char)*src))
564 			no = *src++ - '0';
565 		else
566 			no = -1;
567 		if (no < 0) {		/* Ordinary character. */
568 			if (c == '\\' && (*src == '\\' || *src == '&'))
569 				c = *src++;
570 			NEEDSP(1);
571 			*dst++ = c;
572 			++sp->len;
573 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
574 			len = match[no].rm_eo - match[no].rm_so;
575 			NEEDSP(len);
576 			memmove(dst, string + match[no].rm_so, len);
577 			dst += len;
578 			sp->len += len;
579 		}
580 	}
581 	NEEDSP(1);
582 	*dst = '\0';
583 }
584 
585 /*
586  * aspace --
587  *	Append the source space to the destination space, allocating new
588  *	space as necessary.
589  */
590 void
cspace(SPACE * sp,const char * p,size_t len,enum e_spflag spflag)591 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
592 {
593 	size_t tlen;
594 
595 	/* Make sure SPACE has enough memory and ramp up quickly. */
596 	tlen = sp->len + len + 1;
597 	if (tlen > sp->blen) {
598 		size_t newlen = tlen + 1024;
599 		sp->space = sp->back = xrealloc(sp->back, newlen);
600 		sp->blen = newlen;
601 	}
602 
603 	if (spflag == REPLACE)
604 		sp->len = 0;
605 
606 	memmove(sp->space + sp->len, p, len);
607 
608 	sp->space[sp->len += len] = '\0';
609 }
610 
611 /*
612  * Close all cached opened files and report any errors
613  */
614 void
cfclose(struct s_command * cp,struct s_command * end)615 cfclose(struct s_command *cp, struct s_command *end)
616 {
617 
618 	for (; cp != end; cp = cp->next)
619 		switch (cp->code) {
620 		case 's':
621 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
622 				err(1, "%s", cp->u.s->wfile);
623 			cp->u.s->wfd = -1;
624 			break;
625 		case 'w':
626 			if (cp->u.fd != -1 && close(cp->u.fd))
627 				err(1, "%s", cp->t);
628 			cp->u.fd = -1;
629 			break;
630 		case '{':
631 			cfclose(cp->u.c, cp->next);
632 			break;
633 		}
634 }
635