1 /* $OpenBSD: gnum4.c,v 1.54 2022/07/04 10:41:21 espie Exp $ */ 2 3 /* 4 * Copyright (c) 1999-2022 Marc Espie <espie@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * functions needed to support gnu-m4 extensions, including a fake freezing 21 */ 22 23 #include <sys/types.h> 24 #include <sys/wait.h> 25 #include <ctype.h> 26 #include <err.h> 27 #include <paths.h> 28 #include <regex.h> 29 #include <stdarg.h> 30 #include <stddef.h> 31 #include <stdlib.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 #include <errno.h> 36 #include <unistd.h> 37 #include <limits.h> 38 #include "mdef.h" 39 #include "stdd.h" 40 #include "extern.h" 41 42 43 int mimic_gnu = 0; 44 45 /* 46 * Support for include path search 47 * First search in the current directory. 48 * If not found, and the path is not absolute, include path kicks in. 49 * First, -I options, in the order found on the command line. 50 * Then M4PATH env variable 51 */ 52 53 struct path_entry { 54 char *name; 55 struct path_entry *next; 56 } *first, *last; 57 58 static struct path_entry *new_path_entry(const char *); 59 static void ensure_m4path(void); 60 static struct input_file *dopath(struct input_file *, const char *); 61 62 static struct path_entry * 63 new_path_entry(const char *dirname) 64 { 65 struct path_entry *n; 66 67 n = malloc(sizeof(struct path_entry)); 68 if (!n) 69 errx(1, "out of memory"); 70 n->name = xstrdup(dirname); 71 n->next = 0; 72 return n; 73 } 74 75 void 76 addtoincludepath(const char *dirname) 77 { 78 struct path_entry *n; 79 80 n = new_path_entry(dirname); 81 82 if (last) { 83 last->next = n; 84 last = n; 85 } 86 else 87 last = first = n; 88 } 89 90 static void 91 ensure_m4path() 92 { 93 static int envpathdone = 0; 94 char *envpath; 95 char *sweep; 96 char *path; 97 98 if (envpathdone) 99 return; 100 envpathdone = TRUE; 101 envpath = getenv("M4PATH"); 102 if (!envpath) 103 return; 104 /* for portability: getenv result is read-only */ 105 envpath = xstrdup(envpath); 106 for (sweep = envpath; 107 (path = strsep(&sweep, ":")) != NULL;) 108 addtoincludepath(path); 109 free(envpath); 110 } 111 112 static 113 struct input_file * 114 dopath(struct input_file *i, const char *filename) 115 { 116 char path[PATH_MAX]; 117 struct path_entry *pe; 118 FILE *f; 119 120 for (pe = first; pe; pe = pe->next) { 121 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 122 if ((f = fopen(path, "r")) != 0) { 123 set_input(i, f, path); 124 return i; 125 } 126 } 127 return NULL; 128 } 129 130 struct input_file * 131 fopen_trypath(struct input_file *i, const char *filename) 132 { 133 FILE *f; 134 135 f = fopen(filename, "r"); 136 if (f != NULL) { 137 set_input(i, f, filename); 138 return i; 139 } 140 if (filename[0] == '/') 141 return NULL; 142 143 ensure_m4path(); 144 145 return dopath(i, filename); 146 } 147 148 void 149 doindir(const char *argv[], int argc) 150 { 151 ndptr n; 152 struct macro_definition *p; 153 154 n = lookup(argv[2]); 155 if (n == NULL || (p = macro_getdef(n)) == NULL) 156 m4errx(1, "indir: undefined macro %s.", argv[2]); 157 argv[1] = p->defn; 158 159 eval(argv+1, argc-1, p->type, is_traced(n)); 160 } 161 162 void 163 dobuiltin(const char *argv[], int argc) 164 { 165 ndptr p; 166 167 argv[1] = NULL; 168 p = macro_getbuiltin(argv[2]); 169 if (p != NULL) 170 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 171 else 172 m4errx(1, "unknown builtin %s.", argv[2]); 173 } 174 175 176 /* We need some temporary buffer space, as pb pushes BACK and substitution 177 * proceeds forward... */ 178 static char *buffer; 179 static size_t bufsize = 0; 180 static size_t current = 0; 181 182 static void addchars(const char *, size_t); 183 static void addchar(int); 184 static char *twiddle(const char *); 185 static char *getstring(void); 186 static void exit_regerror(int, regex_t *, const char *); 187 static void do_subst(const char *, regex_t *, const char *, const char *, 188 regmatch_t *); 189 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 190 static void do_regexp(const char *, regex_t *, const char *, const char *, 191 regmatch_t *); 192 static void add_sub(int, const char *, regex_t *, regmatch_t *); 193 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 194 #define addconstantstring(s) addchars((s), sizeof(s)-1) 195 196 static void 197 addchars(const char *c, size_t n) 198 { 199 if (n == 0) 200 return; 201 while (current + n > bufsize) { 202 if (bufsize == 0) 203 bufsize = 1024; 204 else if (bufsize <= SIZE_MAX/2) { 205 bufsize *= 2; 206 } else { 207 errx(1, "size overflow"); 208 } 209 buffer = xrealloc(buffer, bufsize, NULL); 210 } 211 memcpy(buffer+current, c, n); 212 current += n; 213 } 214 215 static void 216 addchar(int c) 217 { 218 if (current +1 > bufsize) { 219 if (bufsize == 0) 220 bufsize = 1024; 221 else 222 bufsize *= 2; 223 buffer = xrealloc(buffer, bufsize, NULL); 224 } 225 buffer[current++] = c; 226 } 227 228 static char * 229 getstring(void) 230 { 231 addchar('\0'); 232 current = 0; 233 return buffer; 234 } 235 236 237 static void 238 exit_regerror(int er, regex_t *re, const char *source) 239 { 240 size_t errlen; 241 char *errbuf; 242 243 errlen = regerror(er, re, NULL, 0); 244 errbuf = xalloc(errlen, 245 "malloc in regerror: %lu", (unsigned long)errlen); 246 regerror(er, re, errbuf, errlen); 247 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 248 } 249 250 /* warnx() plus check to see if we need to change exit code or exit. 251 * -E flag functionality. 252 */ 253 void 254 m4_warnx(const char *fmt, ...) 255 { 256 va_list ap; 257 258 va_start(ap, fmt); 259 vwarnx(fmt, ap); 260 va_end(ap); 261 262 if (fatal_warns) 263 exit(1); 264 if (error_warns) 265 exit_code = 1; 266 } 267 268 static void 269 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 270 { 271 if (n > re->re_nsub) 272 m4_warnx("No subexpression %d", n); 273 /* Subexpressions that did not match are 274 * not an error. */ 275 else if (pm[n].rm_so != -1 && 276 pm[n].rm_eo != -1) { 277 addchars(string + pm[n].rm_so, 278 pm[n].rm_eo - pm[n].rm_so); 279 } 280 } 281 282 /* Add replacement string to the output buffer, recognizing special 283 * constructs and replacing them with substrings of the original string. 284 */ 285 static void 286 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 287 { 288 const char *p; 289 290 for (p = replace; *p != '\0'; p++) { 291 if (*p == '&' && !mimic_gnu) { 292 add_sub(0, string, re, pm); 293 continue; 294 } 295 if (*p == '\\') { 296 if (p[1] == '\\') { 297 addchar(p[1]); 298 p++; 299 continue; 300 } 301 if (p[1] == '&') { 302 if (mimic_gnu) 303 add_sub(0, string, re, pm); 304 else 305 addchar(p[1]); 306 p++; 307 continue; 308 } 309 if (isdigit((unsigned char)p[1])) { 310 add_sub(*(++p) - '0', string, re, pm); 311 continue; 312 } 313 } 314 addchar(*p); 315 } 316 } 317 318 static void 319 do_subst(const char *string, regex_t *re, const char *source, 320 const char *replace, regmatch_t *pm) 321 { 322 int error; 323 int flags = 0; 324 const char *last_match = NULL; 325 326 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 327 if (pm[0].rm_eo != 0) { 328 if (string[pm[0].rm_eo-1] == '\n') 329 flags = 0; 330 else 331 flags = REG_NOTBOL; 332 } 333 334 /* NULL length matches are special... We use the `vi-mode' 335 * rule: don't allow a NULL-match at the last match 336 * position. 337 */ 338 if (pm[0].rm_so == pm[0].rm_eo && 339 string + pm[0].rm_so == last_match) { 340 if (*string == '\0') 341 return; 342 addchar(*string); 343 if (*string++ == '\n') 344 flags = 0; 345 else 346 flags = REG_NOTBOL; 347 continue; 348 } 349 last_match = string + pm[0].rm_so; 350 addchars(string, pm[0].rm_so); 351 add_replace(string, re, replace, pm); 352 string += pm[0].rm_eo; 353 } 354 if (error != REG_NOMATCH) 355 exit_regerror(error, re, source); 356 pbstr(string); 357 } 358 359 static void 360 do_regexp(const char *string, regex_t *re, const char *source, 361 const char *replace, regmatch_t *pm) 362 { 363 int error; 364 365 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 366 case 0: 367 add_replace(string, re, replace, pm); 368 pbstr(getstring()); 369 break; 370 case REG_NOMATCH: 371 break; 372 default: 373 exit_regerror(error, re, source); 374 } 375 } 376 377 static void 378 do_regexpindex(const char *string, regex_t *re, const char *source, 379 regmatch_t *pm) 380 { 381 int error; 382 383 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 384 case 0: 385 pbunsigned(pm[0].rm_so); 386 break; 387 case REG_NOMATCH: 388 pbnum(-1); 389 break; 390 default: 391 exit_regerror(error, re, source); 392 } 393 } 394 395 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 396 * says. So we twiddle with the regexp before passing it to regcomp. 397 */ 398 static char * 399 twiddle(const char *p) 400 { 401 /* + at start of regexp is a normal character for Gnu m4 */ 402 if (*p == '^') { 403 addchar(*p); 404 p++; 405 } 406 if (*p == '+') { 407 addchar('\\'); 408 } 409 /* This could use strcspn for speed... */ 410 while (*p != '\0') { 411 if (*p == '\\') { 412 switch(p[1]) { 413 case '(': 414 case ')': 415 case '|': 416 addchar(p[1]); 417 break; 418 case 'w': 419 addconstantstring("[_a-zA-Z0-9]"); 420 break; 421 case 'W': 422 addconstantstring("[^_a-zA-Z0-9]"); 423 break; 424 case '<': 425 addconstantstring("[[:<:]]"); 426 break; 427 case '>': 428 addconstantstring("[[:>:]]"); 429 break; 430 default: 431 addchars(p, 2); 432 break; 433 } 434 p+=2; 435 continue; 436 } 437 if (*p == '(' || *p == ')' || *p == '|') 438 addchar('\\'); 439 440 addchar(*p); 441 p++; 442 } 443 return getstring(); 444 } 445 446 /* patsubst(string, regexp, opt replacement) */ 447 /* argv[2]: string 448 * argv[3]: regexp 449 * argv[4]: opt rep 450 */ 451 void 452 dopatsubst(const char *argv[], int argc) 453 { 454 if (argc <= 3) { 455 m4_warnx("Too few arguments to patsubst"); 456 return; 457 } 458 /* special case: empty regexp */ 459 if (argv[3][0] == '\0') { 460 const char *s; 461 size_t len; 462 if (argc > 4 && argv[4]) 463 len = strlen(argv[4]); 464 else 465 len = 0; 466 for (s = argv[2]; *s != '\0'; s++) { 467 addchars(argv[4], len); 468 addchar(*s); 469 } 470 } else { 471 int error; 472 regex_t re; 473 regmatch_t *pmatch; 474 int mode = REG_EXTENDED; 475 const char *source; 476 size_t l = strlen(argv[3]); 477 478 if (!mimic_gnu || 479 (argv[3][0] == '^') || 480 (l > 0 && argv[3][l-1] == '$')) 481 mode |= REG_NEWLINE; 482 483 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 484 error = regcomp(&re, source, mode); 485 if (error != 0) 486 exit_regerror(error, &re, source); 487 488 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 489 NULL); 490 do_subst(argv[2], &re, source, 491 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 492 free(pmatch); 493 regfree(&re); 494 } 495 pbstr(getstring()); 496 } 497 498 void 499 doregexp(const char *argv[], int argc) 500 { 501 int error; 502 regex_t re; 503 regmatch_t *pmatch; 504 const char *source; 505 506 if (argc <= 3) { 507 m4_warnx("Too few arguments to regexp"); 508 return; 509 } 510 /* special gnu case */ 511 if (argv[3][0] == '\0' && mimic_gnu) { 512 if (argc == 4 || argv[4] == NULL) 513 return; 514 else 515 pbstr(argv[4]); 516 } 517 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 518 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 519 if (error != 0) 520 exit_regerror(error, &re, source); 521 522 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 523 if (argc == 4 || argv[4] == NULL) 524 do_regexpindex(argv[2], &re, source, pmatch); 525 else 526 do_regexp(argv[2], &re, source, argv[4], pmatch); 527 free(pmatch); 528 regfree(&re); 529 } 530 531 void 532 doformat(const char *argv[], int argc) 533 { 534 const char *format = argv[2]; 535 int pos = 3; 536 int left_padded; 537 long width; 538 size_t l; 539 const char *thisarg; 540 char temp[2]; 541 long extra; 542 543 while (*format != 0) { 544 if (*format != '%') { 545 addchar(*format++); 546 continue; 547 } 548 549 format++; 550 if (*format == '%') { 551 addchar(*format++); 552 continue; 553 } 554 if (*format == 0) { 555 addchar('%'); 556 break; 557 } 558 559 if (*format == '*') { 560 format++; 561 if (pos >= argc) 562 m4errx(1, 563 "Format with too many format specifiers."); 564 width = strtol(argv[pos++], NULL, 10); 565 } else { 566 width = strtol(format, (char **)&format, 10); 567 } 568 if (width < 0) { 569 left_padded = 1; 570 width = -width; 571 } else { 572 left_padded = 0; 573 } 574 if (*format == '.') { 575 format++; 576 if (*format == '*') { 577 format++; 578 if (pos >= argc) 579 m4errx(1, 580 "Format with too many format specifiers."); 581 extra = strtol(argv[pos++], NULL, 10); 582 } else { 583 extra = strtol(format, (char **)&format, 10); 584 } 585 } else { 586 extra = LONG_MAX; 587 } 588 if (pos >= argc) 589 m4errx(1, "Format with too many format specifiers."); 590 switch(*format) { 591 case 's': 592 thisarg = argv[pos++]; 593 break; 594 case 'c': 595 temp[0] = strtoul(argv[pos++], NULL, 10); 596 temp[1] = 0; 597 thisarg = temp; 598 break; 599 default: 600 m4errx(1, "Unsupported format specification: %s.", 601 argv[2]); 602 } 603 format++; 604 l = strlen(thisarg); 605 if (l > extra) 606 l = extra; 607 if (!left_padded) { 608 while (l < width--) 609 addchar(' '); 610 } 611 addchars(thisarg, l); 612 if (left_padded) { 613 while (l < width--) 614 addchar(' '); 615 } 616 } 617 pbstr(getstring()); 618 } 619 620 void 621 doesyscmd(const char *cmd) 622 { 623 int p[2]; 624 pid_t cpid; 625 char *argv[4]; 626 int cc; 627 int status; 628 629 /* Follow gnu m4 documentation: first flush buffers. */ 630 fflush(NULL); 631 632 argv[0] = "sh"; 633 argv[1] = "-c"; 634 argv[2] = (char *)cmd; 635 argv[3] = NULL; 636 637 /* Just set up standard output, share stderr and stdin with m4 */ 638 if (pipe(p) == -1) 639 err(1, "bad pipe"); 640 switch(cpid = fork()) { 641 case -1: 642 err(1, "bad fork"); 643 /* NOTREACHED */ 644 case 0: 645 (void) close(p[0]); 646 (void) dup2(p[1], 1); 647 (void) close(p[1]); 648 execv(_PATH_BSHELL, argv); 649 exit(1); 650 default: 651 /* Read result in two stages, since m4's buffer is 652 * pushback-only. */ 653 (void) close(p[1]); 654 do { 655 char result[BUFSIZE]; 656 cc = read(p[0], result, sizeof result); 657 if (cc > 0) 658 addchars(result, cc); 659 } while (cc > 0 || (cc == -1 && errno == EINTR)); 660 661 (void) close(p[0]); 662 while (waitpid(cpid, &status, 0) == -1) { 663 if (errno != EINTR) 664 break; 665 } 666 pbstr(getstring()); 667 } 668 } 669 670 void 671 getdivfile(const char *name) 672 { 673 FILE *f; 674 int c; 675 676 f = fopen(name, "r"); 677 if (!f) 678 return; 679 680 while ((c = getc(f))!= EOF) 681 putc(c, active); 682 (void) fclose(f); 683 } 684