xref: /openbsd/usr.bin/m4/gnum4.c (revision d67fc904)
1 /* $OpenBSD: gnum4.c,v 1.54 2022/07/04 10:41:21 espie Exp $ */
2 
3 /*
4  * Copyright (c) 1999-2022 Marc Espie <espie@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * functions needed to support gnu-m4 extensions, including a fake freezing
21  */
22 
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <paths.h>
28 #include <regex.h>
29 #include <stdarg.h>
30 #include <stddef.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include "mdef.h"
39 #include "stdd.h"
40 #include "extern.h"
41 
42 
43 int mimic_gnu = 0;
44 
45 /*
46  * Support for include path search
47  * First search in the current directory.
48  * If not found, and the path is not absolute, include path kicks in.
49  * First, -I options, in the order found on the command line.
50  * Then M4PATH env variable
51  */
52 
53 struct path_entry {
54 	char *name;
55 	struct path_entry *next;
56 } *first, *last;
57 
58 static struct path_entry *new_path_entry(const char *);
59 static void ensure_m4path(void);
60 static struct input_file *dopath(struct input_file *, const char *);
61 
62 static struct path_entry *
new_path_entry(const char * dirname)63 new_path_entry(const char *dirname)
64 {
65 	struct path_entry *n;
66 
67 	n = malloc(sizeof(struct path_entry));
68 	if (!n)
69 		errx(1, "out of memory");
70 	n->name = xstrdup(dirname);
71 	n->next = 0;
72 	return n;
73 }
74 
75 void
addtoincludepath(const char * dirname)76 addtoincludepath(const char *dirname)
77 {
78 	struct path_entry *n;
79 
80 	n = new_path_entry(dirname);
81 
82 	if (last) {
83 		last->next = n;
84 		last = n;
85 	}
86 	else
87 		last = first = n;
88 }
89 
90 static void
ensure_m4path()91 ensure_m4path()
92 {
93 	static int envpathdone = 0;
94 	char *envpath;
95 	char *sweep;
96 	char *path;
97 
98 	if (envpathdone)
99 		return;
100 	envpathdone = TRUE;
101 	envpath = getenv("M4PATH");
102 	if (!envpath)
103 		return;
104 	/* for portability: getenv result is read-only */
105 	envpath = xstrdup(envpath);
106 	for (sweep = envpath;
107 	    (path = strsep(&sweep, ":")) != NULL;)
108 	    addtoincludepath(path);
109 	free(envpath);
110 }
111 
112 static
113 struct input_file *
dopath(struct input_file * i,const char * filename)114 dopath(struct input_file *i, const char *filename)
115 {
116 	char path[PATH_MAX];
117 	struct path_entry *pe;
118 	FILE *f;
119 
120 	for (pe = first; pe; pe = pe->next) {
121 		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
122 		if ((f = fopen(path, "r")) != 0) {
123 			set_input(i, f, path);
124 			return i;
125 		}
126 	}
127 	return NULL;
128 }
129 
130 struct input_file *
fopen_trypath(struct input_file * i,const char * filename)131 fopen_trypath(struct input_file *i, const char *filename)
132 {
133 	FILE *f;
134 
135 	f = fopen(filename, "r");
136 	if (f != NULL) {
137 		set_input(i, f, filename);
138 		return i;
139 	}
140 	if (filename[0] == '/')
141 		return NULL;
142 
143 	ensure_m4path();
144 
145 	return dopath(i, filename);
146 }
147 
148 void
doindir(const char * argv[],int argc)149 doindir(const char *argv[], int argc)
150 {
151 	ndptr n;
152 	struct macro_definition *p;
153 
154 	n = lookup(argv[2]);
155 	if (n == NULL || (p = macro_getdef(n)) == NULL)
156 		m4errx(1, "indir: undefined macro %s.", argv[2]);
157 	argv[1] = p->defn;
158 
159 	eval(argv+1, argc-1, p->type, is_traced(n));
160 }
161 
162 void
dobuiltin(const char * argv[],int argc)163 dobuiltin(const char *argv[], int argc)
164 {
165 	ndptr p;
166 
167 	argv[1] = NULL;
168 	p = macro_getbuiltin(argv[2]);
169 	if (p != NULL)
170 		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
171 	else
172 		m4errx(1, "unknown builtin %s.", argv[2]);
173 }
174 
175 
176 /* We need some temporary buffer space, as pb pushes BACK and substitution
177  * proceeds forward... */
178 static char *buffer;
179 static size_t bufsize = 0;
180 static size_t current = 0;
181 
182 static void addchars(const char *, size_t);
183 static void addchar(int);
184 static char *twiddle(const char *);
185 static char *getstring(void);
186 static void exit_regerror(int, regex_t *, const char *);
187 static void do_subst(const char *, regex_t *, const char *, const char *,
188     regmatch_t *);
189 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
190 static void do_regexp(const char *, regex_t *, const char *, const char *,
191     regmatch_t *);
192 static void add_sub(int, const char *, regex_t *, regmatch_t *);
193 static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
194 #define addconstantstring(s) addchars((s), sizeof(s)-1)
195 
196 static void
addchars(const char * c,size_t n)197 addchars(const char *c, size_t n)
198 {
199 	if (n == 0)
200 		return;
201 	while (current + n > bufsize) {
202 		if (bufsize == 0)
203 			bufsize = 1024;
204 		else if (bufsize <= SIZE_MAX/2) {
205 			bufsize *= 2;
206 		} else {
207 			errx(1, "size overflow");
208 		}
209 		buffer = xrealloc(buffer, bufsize, NULL);
210 	}
211 	memcpy(buffer+current, c, n);
212 	current += n;
213 }
214 
215 static void
addchar(int c)216 addchar(int c)
217 {
218 	if (current +1 > bufsize) {
219 		if (bufsize == 0)
220 			bufsize = 1024;
221 		else
222 			bufsize *= 2;
223 		buffer = xrealloc(buffer, bufsize, NULL);
224 	}
225 	buffer[current++] = c;
226 }
227 
228 static char *
getstring(void)229 getstring(void)
230 {
231 	addchar('\0');
232 	current = 0;
233 	return buffer;
234 }
235 
236 
237 static void
exit_regerror(int er,regex_t * re,const char * source)238 exit_regerror(int er, regex_t *re, const char *source)
239 {
240 	size_t	errlen;
241 	char	*errbuf;
242 
243 	errlen = regerror(er, re, NULL, 0);
244 	errbuf = xalloc(errlen,
245 	    "malloc in regerror: %lu", (unsigned long)errlen);
246 	regerror(er, re, errbuf, errlen);
247 	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
248 }
249 
250 /* warnx() plus check to see if we need to change exit code or exit.
251  * -E flag functionality.
252  */
253 void
m4_warnx(const char * fmt,...)254 m4_warnx(const char *fmt, ...)
255 {
256 	va_list ap;
257 
258 	va_start(ap, fmt);
259 	vwarnx(fmt, ap);
260 	va_end(ap);
261 
262 	if (fatal_warns)
263 		exit(1);
264 	if (error_warns)
265 		exit_code = 1;
266 }
267 
268 static void
add_sub(int n,const char * string,regex_t * re,regmatch_t * pm)269 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
270 {
271 	if (n > re->re_nsub)
272 		m4_warnx("No subexpression %d", n);
273 	/* Subexpressions that did not match are
274 	 * not an error.  */
275 	else if (pm[n].rm_so != -1 &&
276 	    pm[n].rm_eo != -1) {
277 		addchars(string + pm[n].rm_so,
278 			pm[n].rm_eo - pm[n].rm_so);
279 	}
280 }
281 
282 /* Add replacement string to the output buffer, recognizing special
283  * constructs and replacing them with substrings of the original string.
284  */
285 static void
add_replace(const char * string,regex_t * re,const char * replace,regmatch_t * pm)286 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
287 {
288 	const char *p;
289 
290 	for (p = replace; *p != '\0'; p++) {
291 		if (*p == '&' && !mimic_gnu) {
292 			add_sub(0, string, re, pm);
293 			continue;
294 		}
295 		if (*p == '\\') {
296 			if (p[1] == '\\') {
297 				addchar(p[1]);
298 				p++;
299 				continue;
300 			}
301 			if (p[1] == '&') {
302 				if (mimic_gnu)
303 					add_sub(0, string, re, pm);
304 				else
305 					addchar(p[1]);
306 				p++;
307 				continue;
308 			}
309 			if (isdigit((unsigned char)p[1])) {
310 				add_sub(*(++p) - '0', string, re, pm);
311 				continue;
312 			}
313 		}
314 		addchar(*p);
315 	}
316 }
317 
318 static void
do_subst(const char * string,regex_t * re,const char * source,const char * replace,regmatch_t * pm)319 do_subst(const char *string, regex_t *re, const char *source,
320     const char *replace, regmatch_t *pm)
321 {
322 	int error;
323 	int flags = 0;
324 	const char *last_match = NULL;
325 
326 	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
327 		if (pm[0].rm_eo != 0) {
328 			if (string[pm[0].rm_eo-1] == '\n')
329 				flags = 0;
330 			else
331 				flags = REG_NOTBOL;
332 		}
333 
334 		/* NULL length matches are special... We use the `vi-mode'
335 		 * rule: don't allow a NULL-match at the last match
336 		 * position.
337 		 */
338 		if (pm[0].rm_so == pm[0].rm_eo &&
339 		    string + pm[0].rm_so == last_match) {
340 			if (*string == '\0')
341 				return;
342 			addchar(*string);
343 			if (*string++ == '\n')
344 				flags = 0;
345 			else
346 				flags = REG_NOTBOL;
347 			continue;
348 		}
349 		last_match = string + pm[0].rm_so;
350 		addchars(string, pm[0].rm_so);
351 		add_replace(string, re, replace, pm);
352 		string += pm[0].rm_eo;
353 	}
354 	if (error != REG_NOMATCH)
355 		exit_regerror(error, re, source);
356 	pbstr(string);
357 }
358 
359 static void
do_regexp(const char * string,regex_t * re,const char * source,const char * replace,regmatch_t * pm)360 do_regexp(const char *string, regex_t *re, const char *source,
361     const char *replace, regmatch_t *pm)
362 {
363 	int error;
364 
365 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
366 	case 0:
367 		add_replace(string, re, replace, pm);
368 		pbstr(getstring());
369 		break;
370 	case REG_NOMATCH:
371 		break;
372 	default:
373 		exit_regerror(error, re, source);
374 	}
375 }
376 
377 static void
do_regexpindex(const char * string,regex_t * re,const char * source,regmatch_t * pm)378 do_regexpindex(const char *string, regex_t *re, const char *source,
379     regmatch_t *pm)
380 {
381 	int error;
382 
383 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
384 	case 0:
385 		pbunsigned(pm[0].rm_so);
386 		break;
387 	case REG_NOMATCH:
388 		pbnum(-1);
389 		break;
390 	default:
391 		exit_regerror(error, re, source);
392 	}
393 }
394 
395 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
396  * says. So we twiddle with the regexp before passing it to regcomp.
397  */
398 static char *
twiddle(const char * p)399 twiddle(const char *p)
400 {
401 	/* + at start of regexp is a normal character for Gnu m4 */
402 	if (*p == '^') {
403 		addchar(*p);
404 		p++;
405 	}
406 	if (*p == '+') {
407 		addchar('\\');
408 	}
409 	/* This could use strcspn for speed... */
410 	while (*p != '\0') {
411 		if (*p == '\\') {
412 			switch(p[1]) {
413 			case '(':
414 			case ')':
415 			case '|':
416 				addchar(p[1]);
417 				break;
418 			case 'w':
419 				addconstantstring("[_a-zA-Z0-9]");
420 				break;
421 			case 'W':
422 				addconstantstring("[^_a-zA-Z0-9]");
423 				break;
424 			case '<':
425 				addconstantstring("[[:<:]]");
426 				break;
427 			case '>':
428 				addconstantstring("[[:>:]]");
429 				break;
430 			default:
431 				addchars(p, 2);
432 				break;
433 			}
434 			p+=2;
435 			continue;
436 		}
437 		if (*p == '(' || *p == ')' || *p == '|')
438 			addchar('\\');
439 
440 		addchar(*p);
441 		p++;
442 	}
443 	return getstring();
444 }
445 
446 /* patsubst(string, regexp, opt replacement) */
447 /* argv[2]: string
448  * argv[3]: regexp
449  * argv[4]: opt rep
450  */
451 void
dopatsubst(const char * argv[],int argc)452 dopatsubst(const char *argv[], int argc)
453 {
454 	if (argc <= 3) {
455 		m4_warnx("Too few arguments to patsubst");
456 		return;
457 	}
458 	/* special case: empty regexp */
459 	if (argv[3][0] == '\0') {
460 		const char *s;
461 		size_t len;
462 		if (argc > 4 && argv[4])
463 			len = strlen(argv[4]);
464 		else
465 			len = 0;
466 		for (s = argv[2]; *s != '\0'; s++) {
467 			addchars(argv[4], len);
468 			addchar(*s);
469 		}
470 	} else {
471 		int error;
472 		regex_t re;
473 		regmatch_t *pmatch;
474 		int mode = REG_EXTENDED;
475 		const char *source;
476 		size_t l = strlen(argv[3]);
477 
478 		if (!mimic_gnu ||
479 		    (argv[3][0] == '^') ||
480 		    (l > 0 && argv[3][l-1] == '$'))
481 			mode |= REG_NEWLINE;
482 
483 		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
484 		error = regcomp(&re, source, mode);
485 		if (error != 0)
486 			exit_regerror(error, &re, source);
487 
488 		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
489 		    NULL);
490 		do_subst(argv[2], &re, source,
491 		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
492 		free(pmatch);
493 		regfree(&re);
494 	}
495 	pbstr(getstring());
496 }
497 
498 void
doregexp(const char * argv[],int argc)499 doregexp(const char *argv[], int argc)
500 {
501 	int error;
502 	regex_t re;
503 	regmatch_t *pmatch;
504 	const char *source;
505 
506 	if (argc <= 3) {
507 		m4_warnx("Too few arguments to regexp");
508 		return;
509 	}
510 	/* special gnu case */
511 	if (argv[3][0] == '\0' && mimic_gnu) {
512 		if (argc == 4 || argv[4] == NULL)
513 			return;
514 		else
515 			pbstr(argv[4]);
516 	}
517 	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
518 	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
519 	if (error != 0)
520 		exit_regerror(error, &re, source);
521 
522 	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
523 	if (argc == 4 || argv[4] == NULL)
524 		do_regexpindex(argv[2], &re, source, pmatch);
525 	else
526 		do_regexp(argv[2], &re, source, argv[4], pmatch);
527 	free(pmatch);
528 	regfree(&re);
529 }
530 
531 void
doformat(const char * argv[],int argc)532 doformat(const char *argv[], int argc)
533 {
534 	const char *format = argv[2];
535 	int pos = 3;
536 	int left_padded;
537 	long width;
538 	size_t l;
539 	const char *thisarg;
540 	char temp[2];
541 	long extra;
542 
543 	while (*format != 0) {
544 		if (*format != '%') {
545 			addchar(*format++);
546 			continue;
547 		}
548 
549 		format++;
550 		if (*format == '%') {
551 			addchar(*format++);
552 			continue;
553 		}
554 		if (*format == 0) {
555 			addchar('%');
556 			break;
557 		}
558 
559 		if (*format == '*') {
560 			format++;
561 			if (pos >= argc)
562 				m4errx(1,
563 				    "Format with too many format specifiers.");
564 			width = strtol(argv[pos++], NULL, 10);
565 		} else {
566 			width = strtol(format, (char **)&format, 10);
567 		}
568 		if (width < 0) {
569 			left_padded = 1;
570 			width = -width;
571 		} else {
572 			left_padded = 0;
573 		}
574 		if (*format == '.') {
575 			format++;
576 			if (*format == '*') {
577 				format++;
578 				if (pos >= argc)
579 					m4errx(1,
580 					    "Format with too many format specifiers.");
581 				extra = strtol(argv[pos++], NULL, 10);
582 			} else {
583 				extra = strtol(format, (char **)&format, 10);
584 			}
585 		} else {
586 			extra = LONG_MAX;
587 		}
588 		if (pos >= argc)
589 			m4errx(1, "Format with too many format specifiers.");
590 		switch(*format) {
591 		case 's':
592 			thisarg = argv[pos++];
593 			break;
594 		case 'c':
595 			temp[0] = strtoul(argv[pos++], NULL, 10);
596 			temp[1] = 0;
597 			thisarg = temp;
598 			break;
599 		default:
600 			m4errx(1, "Unsupported format specification: %s.",
601 			    argv[2]);
602 		}
603 		format++;
604 		l = strlen(thisarg);
605 		if (l > extra)
606 			l = extra;
607 		if (!left_padded) {
608 			while (l < width--)
609 				addchar(' ');
610 		}
611 		addchars(thisarg, l);
612 		if (left_padded) {
613 			while (l < width--)
614 				addchar(' ');
615 		}
616 	}
617 	pbstr(getstring());
618 }
619 
620 void
doesyscmd(const char * cmd)621 doesyscmd(const char *cmd)
622 {
623 	int p[2];
624 	pid_t cpid;
625 	char *argv[4];
626 	int cc;
627 	int status;
628 
629 	/* Follow gnu m4 documentation: first flush buffers. */
630 	fflush(NULL);
631 
632 	argv[0] = "sh";
633 	argv[1] = "-c";
634 	argv[2] = (char *)cmd;
635 	argv[3] = NULL;
636 
637 	/* Just set up standard output, share stderr and stdin with m4 */
638 	if (pipe(p) == -1)
639 		err(1, "bad pipe");
640 	switch(cpid = fork()) {
641 	case -1:
642 		err(1, "bad fork");
643 		/* NOTREACHED */
644 	case 0:
645 		(void) close(p[0]);
646 		(void) dup2(p[1], 1);
647 		(void) close(p[1]);
648 		execv(_PATH_BSHELL, argv);
649 		exit(1);
650 	default:
651 		/* Read result in two stages, since m4's buffer is
652 		 * pushback-only. */
653 		(void) close(p[1]);
654 		do {
655 			char result[BUFSIZE];
656 			cc = read(p[0], result, sizeof result);
657 			if (cc > 0)
658 				addchars(result, cc);
659 		} while (cc > 0 || (cc == -1 && errno == EINTR));
660 
661 		(void) close(p[0]);
662 		while (waitpid(cpid, &status, 0) == -1) {
663 			if (errno != EINTR)
664 				break;
665 		}
666 		pbstr(getstring());
667 	}
668 }
669 
670 void
getdivfile(const char * name)671 getdivfile(const char *name)
672 {
673 	FILE *f;
674 	int c;
675 
676 	f = fopen(name, "r");
677 	if (!f)
678 		return;
679 
680 	while ((c = getc(f))!= EOF)
681 		putc(c, active);
682 	(void) fclose(f);
683 }
684