xref: /freebsd/usr.bin/m4/gnum4.c (revision b00ab754)
1 /* $OpenBSD: gnum4.c,v 1.50 2015/04/29 00:13:26 millert Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 1999 Marc Espie
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  * functions needed to support gnu-m4 extensions, including a fake freezing
34  */
35 
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <paths.h>
41 #include <regex.h>
42 #include <stddef.h>
43 #include <stdlib.h>
44 #include <stdint.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <errno.h>
48 #include <unistd.h>
49 #include <limits.h>
50 #include "mdef.h"
51 #include "stdd.h"
52 #include "extern.h"
53 
54 
55 int mimic_gnu = 0;
56 
57 /*
58  * Support for include path search
59  * First search in the current directory.
60  * If not found, and the path is not absolute, include path kicks in.
61  * First, -I options, in the order found on the command line.
62  * Then M4PATH env variable
63  */
64 
65 static struct path_entry {
66 	char *name;
67 	struct path_entry *next;
68 } *first, *last;
69 
70 static struct path_entry *new_path_entry(const char *);
71 static void ensure_m4path(void);
72 static struct input_file *dopath(struct input_file *, const char *);
73 
74 static struct path_entry *
75 new_path_entry(const char *dirname)
76 {
77 	struct path_entry *n;
78 
79 	n = malloc(sizeof(struct path_entry));
80 	if (!n)
81 		errx(1, "out of memory");
82 	n->name = xstrdup(dirname);
83 	n->next = 0;
84 	return n;
85 }
86 
87 void
88 addtoincludepath(const char *dirname)
89 {
90 	struct path_entry *n;
91 
92 	n = new_path_entry(dirname);
93 
94 	if (last) {
95 		last->next = n;
96 		last = n;
97 	}
98 	else
99 		last = first = n;
100 }
101 
102 static void
103 ensure_m4path(void)
104 {
105 	static int envpathdone = 0;
106 	char *envpath;
107 	char *sweep;
108 	char *path;
109 
110 	if (envpathdone)
111 		return;
112 	envpathdone = TRUE;
113 	envpath = getenv("M4PATH");
114 	if (!envpath)
115 		return;
116 	/* for portability: getenv result is read-only */
117 	envpath = xstrdup(envpath);
118 	for (sweep = envpath;
119 	    (path = strsep(&sweep, ":")) != NULL;)
120 	    addtoincludepath(path);
121 	free(envpath);
122 }
123 
124 static
125 struct input_file *
126 dopath(struct input_file *i, const char *filename)
127 {
128 	char path[PATH_MAX];
129 	struct path_entry *pe;
130 	FILE *f;
131 
132 	for (pe = first; pe; pe = pe->next) {
133 		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
134 		if ((f = fopen(path, "r")) != NULL) {
135 			set_input(i, f, path);
136 			return i;
137 		}
138 	}
139 	return NULL;
140 }
141 
142 struct input_file *
143 fopen_trypath(struct input_file *i, const char *filename)
144 {
145 	FILE *f;
146 
147 	f = fopen(filename, "r");
148 	if (f != NULL) {
149 		set_input(i, f, filename);
150 		return i;
151 	}
152 	if (filename[0] == '/')
153 		return NULL;
154 
155 	ensure_m4path();
156 
157 	return dopath(i, filename);
158 }
159 
160 void
161 doindir(const char *argv[], int argc)
162 {
163 	ndptr n;
164 	struct macro_definition *p = NULL;
165 
166 	n = lookup(argv[2]);
167 	if (n == NULL || (p = macro_getdef(n)) == NULL)
168 		m4errx(1, "indir: undefined macro %s.", argv[2]);
169 	argv[1] = p->defn;
170 
171 	eval(argv+1, argc-1, p->type, is_traced(n));
172 }
173 
174 void
175 dobuiltin(const char *argv[], int argc)
176 {
177 	ndptr p;
178 
179 	argv[1] = NULL;
180 	p = macro_getbuiltin(argv[2]);
181 	if (p != NULL)
182 		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
183 	else
184 		m4errx(1, "unknown builtin %s.", argv[2]);
185 }
186 
187 
188 /* We need some temporary buffer space, as pb pushes BACK and substitution
189  * proceeds forward... */
190 static char *buffer;
191 static size_t bufsize = 0;
192 static size_t current = 0;
193 
194 static void addchars(const char *, size_t);
195 static void addchar(int);
196 static char *twiddle(const char *);
197 static char *getstring(void);
198 static void exit_regerror(int, regex_t *, const char *);
199 static void do_subst(const char *, regex_t *, const char *, const char *,
200     regmatch_t *);
201 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
202 static void do_regexp(const char *, regex_t *, const char *, const char *,
203     regmatch_t *);
204 static void add_sub(int, const char *, regex_t *, regmatch_t *);
205 static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
206 #define addconstantstring(s) addchars((s), sizeof(s)-1)
207 
208 static void
209 addchars(const char *c, size_t n)
210 {
211 	if (n == 0)
212 		return;
213 	while (current + n > bufsize) {
214 		if (bufsize == 0)
215 			bufsize = 1024;
216 		else if (bufsize <= SIZE_MAX/2) {
217 			bufsize *= 2;
218 		} else {
219 			errx(1, "size overflow");
220 		}
221 		buffer = xrealloc(buffer, bufsize, NULL);
222 	}
223 	memcpy(buffer+current, c, n);
224 	current += n;
225 }
226 
227 static void
228 addchar(int c)
229 {
230 	if (current +1 > bufsize) {
231 		if (bufsize == 0)
232 			bufsize = 1024;
233 		else
234 			bufsize *= 2;
235 		buffer = xrealloc(buffer, bufsize, NULL);
236 	}
237 	buffer[current++] = c;
238 }
239 
240 static char *
241 getstring(void)
242 {
243 	addchar('\0');
244 	current = 0;
245 	return buffer;
246 }
247 
248 
249 static void
250 exit_regerror(int er, regex_t *re, const char *source)
251 {
252 	size_t	errlen;
253 	char	*errbuf;
254 
255 	errlen = regerror(er, re, NULL, 0);
256 	errbuf = xalloc(errlen,
257 	    "malloc in regerror: %lu", (unsigned long)errlen);
258 	regerror(er, re, errbuf, errlen);
259 	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
260 }
261 
262 static void
263 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
264 {
265 	if (n > (int)re->re_nsub)
266 		warnx("No subexpression %d", n);
267 	/* Subexpressions that did not match are
268 	 * not an error.  */
269 	else if (pm[n].rm_so != -1 &&
270 	    pm[n].rm_eo != -1) {
271 		addchars(string + pm[n].rm_so,
272 			pm[n].rm_eo - pm[n].rm_so);
273 	}
274 }
275 
276 /* Add replacement string to the output buffer, recognizing special
277  * constructs and replacing them with substrings of the original string.
278  */
279 static void
280 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
281 {
282 	const char *p;
283 
284 	for (p = replace; *p != '\0'; p++) {
285 		if (*p == '&' && !mimic_gnu) {
286 			add_sub(0, string, re, pm);
287 			continue;
288 		}
289 		if (*p == '\\') {
290 			if (p[1] == '\\') {
291 				addchar(p[1]);
292 				p++;
293 				continue;
294 			}
295 			if (p[1] == '&') {
296 				if (mimic_gnu)
297 					add_sub(0, string, re, pm);
298 				else
299 					addchar(p[1]);
300 				p++;
301 				continue;
302 			}
303 			if (isdigit((unsigned char)p[1])) {
304 				add_sub(*(++p) - '0', string, re, pm);
305 				continue;
306 			}
307 		}
308 		addchar(*p);
309 	}
310 }
311 
312 static void
313 do_subst(const char *string, regex_t *re, const char *source,
314     const char *replace, regmatch_t *pm)
315 {
316 	int error;
317 	int flags = 0;
318 	const char *last_match = NULL;
319 
320 	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
321 		if (pm[0].rm_eo != 0) {
322 			if (string[pm[0].rm_eo-1] == '\n')
323 				flags = 0;
324 			else
325 				flags = REG_NOTBOL;
326 		}
327 
328 		/* NULL length matches are special... We use the `vi-mode'
329 		 * rule: don't allow a NULL-match at the last match
330 		 * position.
331 		 */
332 		if (pm[0].rm_so == pm[0].rm_eo &&
333 		    string + pm[0].rm_so == last_match) {
334 			if (*string == '\0')
335 				return;
336 			addchar(*string);
337 			if (*string++ == '\n')
338 				flags = 0;
339 			else
340 				flags = REG_NOTBOL;
341 			continue;
342 		}
343 		last_match = string + pm[0].rm_so;
344 		addchars(string, pm[0].rm_so);
345 		add_replace(string, re, replace, pm);
346 		string += pm[0].rm_eo;
347 	}
348 	if (error != REG_NOMATCH)
349 		exit_regerror(error, re, source);
350 	pbstr(string);
351 }
352 
353 static void
354 do_regexp(const char *string, regex_t *re, const char *source,
355     const char *replace, regmatch_t *pm)
356 {
357 	int error;
358 
359 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
360 	case 0:
361 		add_replace(string, re, replace, pm);
362 		pbstr(getstring());
363 		break;
364 	case REG_NOMATCH:
365 		break;
366 	default:
367 		exit_regerror(error, re, source);
368 	}
369 }
370 
371 static void
372 do_regexpindex(const char *string, regex_t *re, const char *source,
373     regmatch_t *pm)
374 {
375 	int error;
376 
377 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
378 	case 0:
379 		pbunsigned(pm[0].rm_so);
380 		break;
381 	case REG_NOMATCH:
382 		pbnum(-1);
383 		break;
384 	default:
385 		exit_regerror(error, re, source);
386 	}
387 }
388 
389 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
390  * says. So we twiddle with the regexp before passing it to regcomp.
391  */
392 static char *
393 twiddle(const char *p)
394 {
395 	/* + at start of regexp is a normal character for Gnu m4 */
396 	if (*p == '^') {
397 		addchar(*p);
398 		p++;
399 	}
400 	if (*p == '+') {
401 		addchar('\\');
402 	}
403 	/* This could use strcspn for speed... */
404 	while (*p != '\0') {
405 		if (*p == '\\') {
406 			switch(p[1]) {
407 			case '(':
408 			case ')':
409 			case '|':
410 				addchar(p[1]);
411 				break;
412 			case 'w':
413 				addconstantstring("[_a-zA-Z0-9]");
414 				break;
415 			case 'W':
416 				addconstantstring("[^_a-zA-Z0-9]");
417 				break;
418 			case '<':
419 				addconstantstring("[[:<:]]");
420 				break;
421 			case '>':
422 				addconstantstring("[[:>:]]");
423 				break;
424 			default:
425 				addchars(p, 2);
426 				break;
427 			}
428 			p+=2;
429 			continue;
430 		}
431 		if (*p == '(' || *p == ')' || *p == '|')
432 			addchar('\\');
433 
434 		addchar(*p);
435 		p++;
436 	}
437 	return getstring();
438 }
439 
440 /* patsubst(string, regexp, opt replacement) */
441 /* argv[2]: string
442  * argv[3]: regexp
443  * argv[4]: opt rep
444  */
445 void
446 dopatsubst(const char *argv[], int argc)
447 {
448 	if (argc <= 3) {
449 		warnx("Too few arguments to patsubst");
450 		return;
451 	}
452 	/* special case: empty regexp */
453 	if (argv[3][0] == '\0') {
454 		const char *s;
455 		size_t len;
456 		if (argc > 4 && argv[4])
457 			len = strlen(argv[4]);
458 		else
459 			len = 0;
460 		for (s = argv[2]; *s != '\0'; s++) {
461 			addchars(argv[4], len);
462 			addchar(*s);
463 		}
464 	} else {
465 		int error;
466 		regex_t re;
467 		regmatch_t *pmatch;
468 		int mode = REG_EXTENDED;
469 		const char *source;
470 		size_t l = strlen(argv[3]);
471 
472 		if (!mimic_gnu ||
473 		    (argv[3][0] == '^') ||
474 		    (l > 0 && argv[3][l-1] == '$'))
475 			mode |= REG_NEWLINE;
476 
477 		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
478 		error = regcomp(&re, source, mode);
479 		if (error != 0)
480 			exit_regerror(error, &re, source);
481 
482 		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
483 		    NULL);
484 		do_subst(argv[2], &re, source,
485 		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
486 		free(pmatch);
487 		regfree(&re);
488 	}
489 	pbstr(getstring());
490 }
491 
492 void
493 doregexp(const char *argv[], int argc)
494 {
495 	int error;
496 	regex_t re;
497 	regmatch_t *pmatch;
498 	const char *source;
499 
500 	if (argc <= 3) {
501 		warnx("Too few arguments to regexp");
502 		return;
503 	}
504 	/* special gnu case */
505 	if (argv[3][0] == '\0' && mimic_gnu) {
506 		if (argc == 4 || argv[4] == NULL)
507 			return;
508 		else
509 			pbstr(argv[4]);
510 	}
511 	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
512 	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
513 	if (error != 0)
514 		exit_regerror(error, &re, source);
515 
516 	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
517 	if (argc == 4 || argv[4] == NULL)
518 		do_regexpindex(argv[2], &re, source, pmatch);
519 	else
520 		do_regexp(argv[2], &re, source, argv[4], pmatch);
521 	free(pmatch);
522 	regfree(&re);
523 }
524 
525 void
526 doformat(const char *argv[], int argc)
527 {
528 	const char *format = argv[2];
529 	int pos = 3;
530 	int left_padded;
531 	long width;
532 	size_t l;
533 	const char *thisarg = NULL;
534 	char temp[2];
535 	long extra;
536 
537 	while (*format != 0) {
538 		if (*format != '%') {
539 			addchar(*format++);
540 			continue;
541 		}
542 
543 		format++;
544 		if (*format == '%') {
545 			addchar(*format++);
546 			continue;
547 		}
548 		if (*format == 0) {
549 			addchar('%');
550 			break;
551 		}
552 
553 		if (*format == '*') {
554 			format++;
555 			if (pos >= argc)
556 				m4errx(1,
557 				    "Format with too many format specifiers.");
558 			width = strtol(argv[pos++], NULL, 10);
559 		} else {
560 			width = strtol(format, __DECONST(char **,&format), 10);
561 		}
562 		if (width < 0) {
563 			left_padded = 1;
564 			width = -width;
565 		} else {
566 			left_padded = 0;
567 		}
568 		if (*format == '.') {
569 			format++;
570 			if (*format == '*') {
571 				format++;
572 				if (pos >= argc)
573 					m4errx(1,
574 					    "Format with too many format specifiers.");
575 				extra = strtol(argv[pos++], NULL, 10);
576 			} else {
577 				extra = strtol(format, __DECONST(char **, &format), 10);
578 			}
579 		} else {
580 			extra = LONG_MAX;
581 		}
582 		if (pos >= argc)
583 			m4errx(1, "Format with too many format specifiers.");
584 		switch(*format) {
585 		case 's':
586 			thisarg = argv[pos++];
587 			break;
588 		case 'c':
589 			temp[0] = strtoul(argv[pos++], NULL, 10);
590 			temp[1] = 0;
591 			thisarg = temp;
592 			break;
593 		default:
594 			m4errx(1, "Unsupported format specification: %s.",
595 			    argv[2]);
596 		}
597 		format++;
598 		l = strlen(thisarg);
599 		if ((long)l > extra)
600 			l = extra;
601 		if (!left_padded) {
602 			while ((long)l < width--)
603 				addchar(' ');
604 		}
605 		addchars(thisarg, l);
606 		if (left_padded) {
607 			while ((long)l < width--)
608 				addchar(' ');
609 		}
610 	}
611 	pbstr(getstring());
612 }
613 
614 void
615 doesyscmd(const char *cmd)
616 {
617 	int p[2];
618 	pid_t pid, cpid;
619 	char *argv[4];
620 	int cc;
621 	int status;
622 
623 	/* Follow gnu m4 documentation: first flush buffers. */
624 	fflush(NULL);
625 
626 	argv[0] = __DECONST(char *, "sh");
627 	argv[1] = __DECONST(char *, "-c");
628 	argv[2] = __DECONST(char *, cmd);
629 	argv[3] = NULL;
630 
631 	/* Just set up standard output, share stderr and stdin with m4 */
632 	if (pipe(p) == -1)
633 		err(1, "bad pipe");
634 	switch(cpid = fork()) {
635 	case -1:
636 		err(1, "bad fork");
637 		/* NOTREACHED */
638 	case 0:
639 		(void) close(p[0]);
640 		(void) dup2(p[1], 1);
641 		(void) close(p[1]);
642 		execv(_PATH_BSHELL, argv);
643 		exit(1);
644 	default:
645 		/* Read result in two stages, since m4's buffer is
646 		 * pushback-only. */
647 		(void) close(p[1]);
648 		do {
649 			char result[BUFSIZE];
650 			cc = read(p[0], result, sizeof result);
651 			if (cc > 0)
652 				addchars(result, cc);
653 		} while (cc > 0 || (cc == -1 && errno == EINTR));
654 
655 		(void) close(p[0]);
656 		while ((pid = wait(&status)) != cpid && pid >= 0)
657 			continue;
658 		pbstr(getstring());
659 	}
660 }
661 
662 void
663 getdivfile(const char *name)
664 {
665 	FILE *f;
666 	int c;
667 
668 	f = fopen(name, "r");
669 	if (!f)
670 		return;
671 
672 	while ((c = getc(f))!= EOF)
673 		putc(c, active);
674 	(void) fclose(f);
675 }
676