1 /* $OpenBSD: gnum4.c,v 1.54 2022/07/04 10:41:21 espie Exp $ */
2
3 /*
4 * Copyright (c) 1999-2022 Marc Espie <espie@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * functions needed to support gnu-m4 extensions, including a fake freezing
21 */
22
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <paths.h>
28 #include <regex.h>
29 #include <stdarg.h>
30 #include <stddef.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include "mdef.h"
39 #include "stdd.h"
40 #include "extern.h"
41
42
43 int mimic_gnu = 0;
44
45 /*
46 * Support for include path search
47 * First search in the current directory.
48 * If not found, and the path is not absolute, include path kicks in.
49 * First, -I options, in the order found on the command line.
50 * Then M4PATH env variable
51 */
52
53 struct path_entry {
54 char *name;
55 struct path_entry *next;
56 } *first, *last;
57
58 static struct path_entry *new_path_entry(const char *);
59 static void ensure_m4path(void);
60 static struct input_file *dopath(struct input_file *, const char *);
61
62 static struct path_entry *
new_path_entry(const char * dirname)63 new_path_entry(const char *dirname)
64 {
65 struct path_entry *n;
66
67 n = malloc(sizeof(struct path_entry));
68 if (!n)
69 errx(1, "out of memory");
70 n->name = xstrdup(dirname);
71 n->next = 0;
72 return n;
73 }
74
75 void
addtoincludepath(const char * dirname)76 addtoincludepath(const char *dirname)
77 {
78 struct path_entry *n;
79
80 n = new_path_entry(dirname);
81
82 if (last) {
83 last->next = n;
84 last = n;
85 }
86 else
87 last = first = n;
88 }
89
90 static void
ensure_m4path()91 ensure_m4path()
92 {
93 static int envpathdone = 0;
94 char *envpath;
95 char *sweep;
96 char *path;
97
98 if (envpathdone)
99 return;
100 envpathdone = TRUE;
101 envpath = getenv("M4PATH");
102 if (!envpath)
103 return;
104 /* for portability: getenv result is read-only */
105 envpath = xstrdup(envpath);
106 for (sweep = envpath;
107 (path = strsep(&sweep, ":")) != NULL;)
108 addtoincludepath(path);
109 free(envpath);
110 }
111
112 static
113 struct input_file *
dopath(struct input_file * i,const char * filename)114 dopath(struct input_file *i, const char *filename)
115 {
116 char path[PATH_MAX];
117 struct path_entry *pe;
118 FILE *f;
119
120 for (pe = first; pe; pe = pe->next) {
121 snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
122 if ((f = fopen(path, "r")) != 0) {
123 set_input(i, f, path);
124 return i;
125 }
126 }
127 return NULL;
128 }
129
130 struct input_file *
fopen_trypath(struct input_file * i,const char * filename)131 fopen_trypath(struct input_file *i, const char *filename)
132 {
133 FILE *f;
134
135 f = fopen(filename, "r");
136 if (f != NULL) {
137 set_input(i, f, filename);
138 return i;
139 }
140 if (filename[0] == '/')
141 return NULL;
142
143 ensure_m4path();
144
145 return dopath(i, filename);
146 }
147
148 void
doindir(const char * argv[],int argc)149 doindir(const char *argv[], int argc)
150 {
151 ndptr n;
152 struct macro_definition *p;
153
154 n = lookup(argv[2]);
155 if (n == NULL || (p = macro_getdef(n)) == NULL)
156 m4errx(1, "indir: undefined macro %s.", argv[2]);
157 argv[1] = p->defn;
158
159 eval(argv+1, argc-1, p->type, is_traced(n));
160 }
161
162 void
dobuiltin(const char * argv[],int argc)163 dobuiltin(const char *argv[], int argc)
164 {
165 ndptr p;
166
167 argv[1] = NULL;
168 p = macro_getbuiltin(argv[2]);
169 if (p != NULL)
170 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
171 else
172 m4errx(1, "unknown builtin %s.", argv[2]);
173 }
174
175
176 /* We need some temporary buffer space, as pb pushes BACK and substitution
177 * proceeds forward... */
178 static char *buffer;
179 static size_t bufsize = 0;
180 static size_t current = 0;
181
182 static void addchars(const char *, size_t);
183 static void addchar(int);
184 static char *twiddle(const char *);
185 static char *getstring(void);
186 static void exit_regerror(int, regex_t *, const char *);
187 static void do_subst(const char *, regex_t *, const char *, const char *,
188 regmatch_t *);
189 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
190 static void do_regexp(const char *, regex_t *, const char *, const char *,
191 regmatch_t *);
192 static void add_sub(int, const char *, regex_t *, regmatch_t *);
193 static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
194 #define addconstantstring(s) addchars((s), sizeof(s)-1)
195
196 static void
addchars(const char * c,size_t n)197 addchars(const char *c, size_t n)
198 {
199 if (n == 0)
200 return;
201 while (current + n > bufsize) {
202 if (bufsize == 0)
203 bufsize = 1024;
204 else if (bufsize <= SIZE_MAX/2) {
205 bufsize *= 2;
206 } else {
207 errx(1, "size overflow");
208 }
209 buffer = xrealloc(buffer, bufsize, NULL);
210 }
211 memcpy(buffer+current, c, n);
212 current += n;
213 }
214
215 static void
addchar(int c)216 addchar(int c)
217 {
218 if (current +1 > bufsize) {
219 if (bufsize == 0)
220 bufsize = 1024;
221 else
222 bufsize *= 2;
223 buffer = xrealloc(buffer, bufsize, NULL);
224 }
225 buffer[current++] = c;
226 }
227
228 static char *
getstring(void)229 getstring(void)
230 {
231 addchar('\0');
232 current = 0;
233 return buffer;
234 }
235
236
237 static void
exit_regerror(int er,regex_t * re,const char * source)238 exit_regerror(int er, regex_t *re, const char *source)
239 {
240 size_t errlen;
241 char *errbuf;
242
243 errlen = regerror(er, re, NULL, 0);
244 errbuf = xalloc(errlen,
245 "malloc in regerror: %lu", (unsigned long)errlen);
246 regerror(er, re, errbuf, errlen);
247 m4errx(1, "regular expression error in %s: %s.", source, errbuf);
248 }
249
250 /* warnx() plus check to see if we need to change exit code or exit.
251 * -E flag functionality.
252 */
253 void
m4_warnx(const char * fmt,...)254 m4_warnx(const char *fmt, ...)
255 {
256 va_list ap;
257
258 va_start(ap, fmt);
259 vwarnx(fmt, ap);
260 va_end(ap);
261
262 if (fatal_warns)
263 exit(1);
264 if (error_warns)
265 exit_code = 1;
266 }
267
268 static void
add_sub(int n,const char * string,regex_t * re,regmatch_t * pm)269 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
270 {
271 if (n > re->re_nsub)
272 m4_warnx("No subexpression %d", n);
273 /* Subexpressions that did not match are
274 * not an error. */
275 else if (pm[n].rm_so != -1 &&
276 pm[n].rm_eo != -1) {
277 addchars(string + pm[n].rm_so,
278 pm[n].rm_eo - pm[n].rm_so);
279 }
280 }
281
282 /* Add replacement string to the output buffer, recognizing special
283 * constructs and replacing them with substrings of the original string.
284 */
285 static void
add_replace(const char * string,regex_t * re,const char * replace,regmatch_t * pm)286 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
287 {
288 const char *p;
289
290 for (p = replace; *p != '\0'; p++) {
291 if (*p == '&' && !mimic_gnu) {
292 add_sub(0, string, re, pm);
293 continue;
294 }
295 if (*p == '\\') {
296 if (p[1] == '\\') {
297 addchar(p[1]);
298 p++;
299 continue;
300 }
301 if (p[1] == '&') {
302 if (mimic_gnu)
303 add_sub(0, string, re, pm);
304 else
305 addchar(p[1]);
306 p++;
307 continue;
308 }
309 if (isdigit((unsigned char)p[1])) {
310 add_sub(*(++p) - '0', string, re, pm);
311 continue;
312 }
313 }
314 addchar(*p);
315 }
316 }
317
318 static void
do_subst(const char * string,regex_t * re,const char * source,const char * replace,regmatch_t * pm)319 do_subst(const char *string, regex_t *re, const char *source,
320 const char *replace, regmatch_t *pm)
321 {
322 int error;
323 int flags = 0;
324 const char *last_match = NULL;
325
326 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
327 if (pm[0].rm_eo != 0) {
328 if (string[pm[0].rm_eo-1] == '\n')
329 flags = 0;
330 else
331 flags = REG_NOTBOL;
332 }
333
334 /* NULL length matches are special... We use the `vi-mode'
335 * rule: don't allow a NULL-match at the last match
336 * position.
337 */
338 if (pm[0].rm_so == pm[0].rm_eo &&
339 string + pm[0].rm_so == last_match) {
340 if (*string == '\0')
341 return;
342 addchar(*string);
343 if (*string++ == '\n')
344 flags = 0;
345 else
346 flags = REG_NOTBOL;
347 continue;
348 }
349 last_match = string + pm[0].rm_so;
350 addchars(string, pm[0].rm_so);
351 add_replace(string, re, replace, pm);
352 string += pm[0].rm_eo;
353 }
354 if (error != REG_NOMATCH)
355 exit_regerror(error, re, source);
356 pbstr(string);
357 }
358
359 static void
do_regexp(const char * string,regex_t * re,const char * source,const char * replace,regmatch_t * pm)360 do_regexp(const char *string, regex_t *re, const char *source,
361 const char *replace, regmatch_t *pm)
362 {
363 int error;
364
365 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
366 case 0:
367 add_replace(string, re, replace, pm);
368 pbstr(getstring());
369 break;
370 case REG_NOMATCH:
371 break;
372 default:
373 exit_regerror(error, re, source);
374 }
375 }
376
377 static void
do_regexpindex(const char * string,regex_t * re,const char * source,regmatch_t * pm)378 do_regexpindex(const char *string, regex_t *re, const char *source,
379 regmatch_t *pm)
380 {
381 int error;
382
383 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
384 case 0:
385 pbunsigned(pm[0].rm_so);
386 break;
387 case REG_NOMATCH:
388 pbnum(-1);
389 break;
390 default:
391 exit_regerror(error, re, source);
392 }
393 }
394
395 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
396 * says. So we twiddle with the regexp before passing it to regcomp.
397 */
398 static char *
twiddle(const char * p)399 twiddle(const char *p)
400 {
401 /* + at start of regexp is a normal character for Gnu m4 */
402 if (*p == '^') {
403 addchar(*p);
404 p++;
405 }
406 if (*p == '+') {
407 addchar('\\');
408 }
409 /* This could use strcspn for speed... */
410 while (*p != '\0') {
411 if (*p == '\\') {
412 switch(p[1]) {
413 case '(':
414 case ')':
415 case '|':
416 addchar(p[1]);
417 break;
418 case 'w':
419 addconstantstring("[_a-zA-Z0-9]");
420 break;
421 case 'W':
422 addconstantstring("[^_a-zA-Z0-9]");
423 break;
424 case '<':
425 addconstantstring("[[:<:]]");
426 break;
427 case '>':
428 addconstantstring("[[:>:]]");
429 break;
430 default:
431 addchars(p, 2);
432 break;
433 }
434 p+=2;
435 continue;
436 }
437 if (*p == '(' || *p == ')' || *p == '|')
438 addchar('\\');
439
440 addchar(*p);
441 p++;
442 }
443 return getstring();
444 }
445
446 /* patsubst(string, regexp, opt replacement) */
447 /* argv[2]: string
448 * argv[3]: regexp
449 * argv[4]: opt rep
450 */
451 void
dopatsubst(const char * argv[],int argc)452 dopatsubst(const char *argv[], int argc)
453 {
454 if (argc <= 3) {
455 m4_warnx("Too few arguments to patsubst");
456 return;
457 }
458 /* special case: empty regexp */
459 if (argv[3][0] == '\0') {
460 const char *s;
461 size_t len;
462 if (argc > 4 && argv[4])
463 len = strlen(argv[4]);
464 else
465 len = 0;
466 for (s = argv[2]; *s != '\0'; s++) {
467 addchars(argv[4], len);
468 addchar(*s);
469 }
470 } else {
471 int error;
472 regex_t re;
473 regmatch_t *pmatch;
474 int mode = REG_EXTENDED;
475 const char *source;
476 size_t l = strlen(argv[3]);
477
478 if (!mimic_gnu ||
479 (argv[3][0] == '^') ||
480 (l > 0 && argv[3][l-1] == '$'))
481 mode |= REG_NEWLINE;
482
483 source = mimic_gnu ? twiddle(argv[3]) : argv[3];
484 error = regcomp(&re, source, mode);
485 if (error != 0)
486 exit_regerror(error, &re, source);
487
488 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
489 NULL);
490 do_subst(argv[2], &re, source,
491 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
492 free(pmatch);
493 regfree(&re);
494 }
495 pbstr(getstring());
496 }
497
498 void
doregexp(const char * argv[],int argc)499 doregexp(const char *argv[], int argc)
500 {
501 int error;
502 regex_t re;
503 regmatch_t *pmatch;
504 const char *source;
505
506 if (argc <= 3) {
507 m4_warnx("Too few arguments to regexp");
508 return;
509 }
510 /* special gnu case */
511 if (argv[3][0] == '\0' && mimic_gnu) {
512 if (argc == 4 || argv[4] == NULL)
513 return;
514 else
515 pbstr(argv[4]);
516 }
517 source = mimic_gnu ? twiddle(argv[3]) : argv[3];
518 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
519 if (error != 0)
520 exit_regerror(error, &re, source);
521
522 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
523 if (argc == 4 || argv[4] == NULL)
524 do_regexpindex(argv[2], &re, source, pmatch);
525 else
526 do_regexp(argv[2], &re, source, argv[4], pmatch);
527 free(pmatch);
528 regfree(&re);
529 }
530
531 void
doformat(const char * argv[],int argc)532 doformat(const char *argv[], int argc)
533 {
534 const char *format = argv[2];
535 int pos = 3;
536 int left_padded;
537 long width;
538 size_t l;
539 const char *thisarg;
540 char temp[2];
541 long extra;
542
543 while (*format != 0) {
544 if (*format != '%') {
545 addchar(*format++);
546 continue;
547 }
548
549 format++;
550 if (*format == '%') {
551 addchar(*format++);
552 continue;
553 }
554 if (*format == 0) {
555 addchar('%');
556 break;
557 }
558
559 if (*format == '*') {
560 format++;
561 if (pos >= argc)
562 m4errx(1,
563 "Format with too many format specifiers.");
564 width = strtol(argv[pos++], NULL, 10);
565 } else {
566 width = strtol(format, (char **)&format, 10);
567 }
568 if (width < 0) {
569 left_padded = 1;
570 width = -width;
571 } else {
572 left_padded = 0;
573 }
574 if (*format == '.') {
575 format++;
576 if (*format == '*') {
577 format++;
578 if (pos >= argc)
579 m4errx(1,
580 "Format with too many format specifiers.");
581 extra = strtol(argv[pos++], NULL, 10);
582 } else {
583 extra = strtol(format, (char **)&format, 10);
584 }
585 } else {
586 extra = LONG_MAX;
587 }
588 if (pos >= argc)
589 m4errx(1, "Format with too many format specifiers.");
590 switch(*format) {
591 case 's':
592 thisarg = argv[pos++];
593 break;
594 case 'c':
595 temp[0] = strtoul(argv[pos++], NULL, 10);
596 temp[1] = 0;
597 thisarg = temp;
598 break;
599 default:
600 m4errx(1, "Unsupported format specification: %s.",
601 argv[2]);
602 }
603 format++;
604 l = strlen(thisarg);
605 if (l > extra)
606 l = extra;
607 if (!left_padded) {
608 while (l < width--)
609 addchar(' ');
610 }
611 addchars(thisarg, l);
612 if (left_padded) {
613 while (l < width--)
614 addchar(' ');
615 }
616 }
617 pbstr(getstring());
618 }
619
620 void
doesyscmd(const char * cmd)621 doesyscmd(const char *cmd)
622 {
623 int p[2];
624 pid_t cpid;
625 char *argv[4];
626 int cc;
627 int status;
628
629 /* Follow gnu m4 documentation: first flush buffers. */
630 fflush(NULL);
631
632 argv[0] = "sh";
633 argv[1] = "-c";
634 argv[2] = (char *)cmd;
635 argv[3] = NULL;
636
637 /* Just set up standard output, share stderr and stdin with m4 */
638 if (pipe(p) == -1)
639 err(1, "bad pipe");
640 switch(cpid = fork()) {
641 case -1:
642 err(1, "bad fork");
643 /* NOTREACHED */
644 case 0:
645 (void) close(p[0]);
646 (void) dup2(p[1], 1);
647 (void) close(p[1]);
648 execv(_PATH_BSHELL, argv);
649 exit(1);
650 default:
651 /* Read result in two stages, since m4's buffer is
652 * pushback-only. */
653 (void) close(p[1]);
654 do {
655 char result[BUFSIZE];
656 cc = read(p[0], result, sizeof result);
657 if (cc > 0)
658 addchars(result, cc);
659 } while (cc > 0 || (cc == -1 && errno == EINTR));
660
661 (void) close(p[0]);
662 while (waitpid(cpid, &status, 0) == -1) {
663 if (errno != EINTR)
664 break;
665 }
666 pbstr(getstring());
667 }
668 }
669
670 void
getdivfile(const char * name)671 getdivfile(const char *name)
672 {
673 FILE *f;
674 int c;
675
676 f = fopen(name, "r");
677 if (!f)
678 return;
679
680 while ((c = getc(f))!= EOF)
681 putc(c, active);
682 (void) fclose(f);
683 }
684