1 /* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * functions needed to support gnu-m4 extensions, including a fake freezing 30 */ 31 32 #include <sys/types.h> 33 #include <sys/wait.h> 34 #include <ctype.h> 35 #include <err.h> 36 #include <paths.h> 37 #include <regex.h> 38 #include <stdarg.h> 39 #include <stddef.h> 40 #include <stdlib.h> 41 #include <stdint.h> 42 #include <stdio.h> 43 #include <string.h> 44 #include <errno.h> 45 #include <unistd.h> 46 #include <limits.h> 47 #include "mdef.h" 48 #include "stdd.h" 49 #include "extern.h" 50 51 52 int mimic_gnu = 0; 53 54 /* 55 * Support for include path search 56 * First search in the current directory. 57 * If not found, and the path is not absolute, include path kicks in. 58 * First, -I options, in the order found on the command line. 59 * Then M4PATH env variable 60 */ 61 62 struct path_entry { 63 char *name; 64 struct path_entry *next; 65 } *first, *last; 66 67 static struct path_entry *new_path_entry(const char *); 68 static void ensure_m4path(void); 69 static struct input_file *dopath(struct input_file *, const char *); 70 71 static struct path_entry * 72 new_path_entry(const char *dirname) 73 { 74 struct path_entry *n; 75 76 n = malloc(sizeof(struct path_entry)); 77 if (!n) 78 errx(1, "out of memory"); 79 n->name = xstrdup(dirname); 80 n->next = 0; 81 return n; 82 } 83 84 void 85 addtoincludepath(const char *dirname) 86 { 87 struct path_entry *n; 88 89 n = new_path_entry(dirname); 90 91 if (last) { 92 last->next = n; 93 last = n; 94 } 95 else 96 last = first = n; 97 } 98 99 static void 100 ensure_m4path() 101 { 102 static int envpathdone = 0; 103 char *envpath; 104 char *sweep; 105 char *path; 106 107 if (envpathdone) 108 return; 109 envpathdone = TRUE; 110 envpath = getenv("M4PATH"); 111 if (!envpath) 112 return; 113 /* for portability: getenv result is read-only */ 114 envpath = xstrdup(envpath); 115 for (sweep = envpath; 116 (path = strsep(&sweep, ":")) != NULL;) 117 addtoincludepath(path); 118 free(envpath); 119 } 120 121 static 122 struct input_file * 123 dopath(struct input_file *i, const char *filename) 124 { 125 char path[PATH_MAX]; 126 struct path_entry *pe; 127 FILE *f; 128 129 for (pe = first; pe; pe = pe->next) { 130 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 131 if ((f = fopen(path, "r")) != 0) { 132 set_input(i, f, path); 133 return i; 134 } 135 } 136 return NULL; 137 } 138 139 struct input_file * 140 fopen_trypath(struct input_file *i, const char *filename) 141 { 142 FILE *f; 143 144 f = fopen(filename, "r"); 145 if (f != NULL) { 146 set_input(i, f, filename); 147 return i; 148 } 149 if (filename[0] == '/') 150 return NULL; 151 152 ensure_m4path(); 153 154 return dopath(i, filename); 155 } 156 157 void 158 doindir(const char *argv[], int argc) 159 { 160 ndptr n; 161 struct macro_definition *p; 162 163 n = lookup(argv[2]); 164 if (n == NULL || (p = macro_getdef(n)) == NULL) 165 m4errx(1, "indir: undefined macro %s.", argv[2]); 166 argv[1] = p->defn; 167 168 eval(argv+1, argc-1, p->type, is_traced(n)); 169 } 170 171 void 172 dobuiltin(const char *argv[], int argc) 173 { 174 ndptr p; 175 176 argv[1] = NULL; 177 p = macro_getbuiltin(argv[2]); 178 if (p != NULL) 179 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 180 else 181 m4errx(1, "unknown builtin %s.", argv[2]); 182 } 183 184 185 /* We need some temporary buffer space, as pb pushes BACK and substitution 186 * proceeds forward... */ 187 static char *buffer; 188 static size_t bufsize = 0; 189 static size_t current = 0; 190 191 static void addchars(const char *, size_t); 192 static void addchar(int); 193 static char *twiddle(const char *); 194 static char *getstring(void); 195 static void exit_regerror(int, regex_t *, const char *); 196 static void do_subst(const char *, regex_t *, const char *, const char *, 197 regmatch_t *); 198 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 199 static void do_regexp(const char *, regex_t *, const char *, const char *, 200 regmatch_t *); 201 static void add_sub(int, const char *, regex_t *, regmatch_t *); 202 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 203 #define addconstantstring(s) addchars((s), sizeof(s)-1) 204 205 static void 206 addchars(const char *c, size_t n) 207 { 208 if (n == 0) 209 return; 210 while (current + n > bufsize) { 211 if (bufsize == 0) 212 bufsize = 1024; 213 else if (bufsize <= SIZE_MAX/2) { 214 bufsize *= 2; 215 } else { 216 errx(1, "size overflow"); 217 } 218 buffer = xrealloc(buffer, bufsize, NULL); 219 } 220 memcpy(buffer+current, c, n); 221 current += n; 222 } 223 224 static void 225 addchar(int c) 226 { 227 if (current +1 > bufsize) { 228 if (bufsize == 0) 229 bufsize = 1024; 230 else 231 bufsize *= 2; 232 buffer = xrealloc(buffer, bufsize, NULL); 233 } 234 buffer[current++] = c; 235 } 236 237 static char * 238 getstring(void) 239 { 240 addchar('\0'); 241 current = 0; 242 return buffer; 243 } 244 245 246 static void 247 exit_regerror(int er, regex_t *re, const char *source) 248 { 249 size_t errlen; 250 char *errbuf; 251 252 errlen = regerror(er, re, NULL, 0); 253 errbuf = xalloc(errlen, 254 "malloc in regerror: %lu", (unsigned long)errlen); 255 regerror(er, re, errbuf, errlen); 256 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 257 } 258 259 /* warnx() plus check to see if we need to change exit code or exit. 260 * -E flag functionality. 261 */ 262 void 263 m4_warnx(const char *fmt, ...) 264 { 265 va_list ap; 266 267 va_start(ap, fmt); 268 vwarnx(fmt, ap); 269 va_end(ap); 270 271 if (fatal_warns) 272 exit(1); 273 if (error_warns) 274 exit_code = 1; 275 } 276 277 static void 278 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 279 { 280 if (n > re->re_nsub) 281 m4_warnx("No subexpression %d", n); 282 /* Subexpressions that did not match are 283 * not an error. */ 284 else if (pm[n].rm_so != -1 && 285 pm[n].rm_eo != -1) { 286 addchars(string + pm[n].rm_so, 287 pm[n].rm_eo - pm[n].rm_so); 288 } 289 } 290 291 /* Add replacement string to the output buffer, recognizing special 292 * constructs and replacing them with substrings of the original string. 293 */ 294 static void 295 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 296 { 297 const char *p; 298 299 for (p = replace; *p != '\0'; p++) { 300 if (*p == '&' && !mimic_gnu) { 301 add_sub(0, string, re, pm); 302 continue; 303 } 304 if (*p == '\\') { 305 if (p[1] == '\\') { 306 addchar(p[1]); 307 p++; 308 continue; 309 } 310 if (p[1] == '&') { 311 if (mimic_gnu) 312 add_sub(0, string, re, pm); 313 else 314 addchar(p[1]); 315 p++; 316 continue; 317 } 318 if (isdigit((unsigned char)p[1])) { 319 add_sub(*(++p) - '0', string, re, pm); 320 continue; 321 } 322 } 323 addchar(*p); 324 } 325 } 326 327 static void 328 do_subst(const char *string, regex_t *re, const char *source, 329 const char *replace, regmatch_t *pm) 330 { 331 int error; 332 int flags = 0; 333 const char *last_match = NULL; 334 335 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 336 if (pm[0].rm_eo != 0) { 337 if (string[pm[0].rm_eo-1] == '\n') 338 flags = 0; 339 else 340 flags = REG_NOTBOL; 341 } 342 343 /* NULL length matches are special... We use the `vi-mode' 344 * rule: don't allow a NULL-match at the last match 345 * position. 346 */ 347 if (pm[0].rm_so == pm[0].rm_eo && 348 string + pm[0].rm_so == last_match) { 349 if (*string == '\0') 350 return; 351 addchar(*string); 352 if (*string++ == '\n') 353 flags = 0; 354 else 355 flags = REG_NOTBOL; 356 continue; 357 } 358 last_match = string + pm[0].rm_so; 359 addchars(string, pm[0].rm_so); 360 add_replace(string, re, replace, pm); 361 string += pm[0].rm_eo; 362 } 363 if (error != REG_NOMATCH) 364 exit_regerror(error, re, source); 365 pbstr(string); 366 } 367 368 static void 369 do_regexp(const char *string, regex_t *re, const char *source, 370 const char *replace, regmatch_t *pm) 371 { 372 int error; 373 374 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 375 case 0: 376 add_replace(string, re, replace, pm); 377 pbstr(getstring()); 378 break; 379 case REG_NOMATCH: 380 break; 381 default: 382 exit_regerror(error, re, source); 383 } 384 } 385 386 static void 387 do_regexpindex(const char *string, regex_t *re, const char *source, 388 regmatch_t *pm) 389 { 390 int error; 391 392 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 393 case 0: 394 pbunsigned(pm[0].rm_so); 395 break; 396 case REG_NOMATCH: 397 pbnum(-1); 398 break; 399 default: 400 exit_regerror(error, re, source); 401 } 402 } 403 404 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 405 * says. So we twiddle with the regexp before passing it to regcomp. 406 */ 407 static char * 408 twiddle(const char *p) 409 { 410 /* + at start of regexp is a normal character for Gnu m4 */ 411 if (*p == '^') { 412 addchar(*p); 413 p++; 414 } 415 if (*p == '+') { 416 addchar('\\'); 417 } 418 /* This could use strcspn for speed... */ 419 while (*p != '\0') { 420 if (*p == '\\') { 421 switch(p[1]) { 422 case '(': 423 case ')': 424 case '|': 425 addchar(p[1]); 426 break; 427 case 'w': 428 addconstantstring("[_a-zA-Z0-9]"); 429 break; 430 case 'W': 431 addconstantstring("[^_a-zA-Z0-9]"); 432 break; 433 case '<': 434 addconstantstring("[[:<:]]"); 435 break; 436 case '>': 437 addconstantstring("[[:>:]]"); 438 break; 439 default: 440 addchars(p, 2); 441 break; 442 } 443 p+=2; 444 continue; 445 } 446 if (*p == '(' || *p == ')' || *p == '|') 447 addchar('\\'); 448 449 addchar(*p); 450 p++; 451 } 452 return getstring(); 453 } 454 455 /* patsubst(string, regexp, opt replacement) */ 456 /* argv[2]: string 457 * argv[3]: regexp 458 * argv[4]: opt rep 459 */ 460 void 461 dopatsubst(const char *argv[], int argc) 462 { 463 if (argc <= 3) { 464 m4_warnx("Too few arguments to patsubst"); 465 return; 466 } 467 /* special case: empty regexp */ 468 if (argv[3][0] == '\0') { 469 const char *s; 470 size_t len; 471 if (argc > 4 && argv[4]) 472 len = strlen(argv[4]); 473 else 474 len = 0; 475 for (s = argv[2]; *s != '\0'; s++) { 476 addchars(argv[4], len); 477 addchar(*s); 478 } 479 } else { 480 int error; 481 regex_t re; 482 regmatch_t *pmatch; 483 int mode = REG_EXTENDED; 484 const char *source; 485 size_t l = strlen(argv[3]); 486 487 if (!mimic_gnu || 488 (argv[3][0] == '^') || 489 (l > 0 && argv[3][l-1] == '$')) 490 mode |= REG_NEWLINE; 491 492 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 493 error = regcomp(&re, source, mode); 494 if (error != 0) 495 exit_regerror(error, &re, source); 496 497 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 498 NULL); 499 do_subst(argv[2], &re, source, 500 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 501 free(pmatch); 502 regfree(&re); 503 } 504 pbstr(getstring()); 505 } 506 507 void 508 doregexp(const char *argv[], int argc) 509 { 510 int error; 511 regex_t re; 512 regmatch_t *pmatch; 513 const char *source; 514 515 if (argc <= 3) { 516 m4_warnx("Too few arguments to regexp"); 517 return; 518 } 519 /* special gnu case */ 520 if (argv[3][0] == '\0' && mimic_gnu) { 521 if (argc == 4 || argv[4] == NULL) 522 return; 523 else 524 pbstr(argv[4]); 525 } 526 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 527 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 528 if (error != 0) 529 exit_regerror(error, &re, source); 530 531 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 532 if (argc == 4 || argv[4] == NULL) 533 do_regexpindex(argv[2], &re, source, pmatch); 534 else 535 do_regexp(argv[2], &re, source, argv[4], pmatch); 536 free(pmatch); 537 regfree(&re); 538 } 539 540 void 541 doformat(const char *argv[], int argc) 542 { 543 const char *format = argv[2]; 544 int pos = 3; 545 int left_padded; 546 long width; 547 size_t l; 548 const char *thisarg; 549 char temp[2]; 550 long extra; 551 552 while (*format != 0) { 553 if (*format != '%') { 554 addchar(*format++); 555 continue; 556 } 557 558 format++; 559 if (*format == '%') { 560 addchar(*format++); 561 continue; 562 } 563 if (*format == 0) { 564 addchar('%'); 565 break; 566 } 567 568 if (*format == '*') { 569 format++; 570 if (pos >= argc) 571 m4errx(1, 572 "Format with too many format specifiers."); 573 width = strtol(argv[pos++], NULL, 10); 574 } else { 575 width = strtol(format, (char **)&format, 10); 576 } 577 if (width < 0) { 578 left_padded = 1; 579 width = -width; 580 } else { 581 left_padded = 0; 582 } 583 if (*format == '.') { 584 format++; 585 if (*format == '*') { 586 format++; 587 if (pos >= argc) 588 m4errx(1, 589 "Format with too many format specifiers."); 590 extra = strtol(argv[pos++], NULL, 10); 591 } else { 592 extra = strtol(format, (char **)&format, 10); 593 } 594 } else { 595 extra = LONG_MAX; 596 } 597 if (pos >= argc) 598 m4errx(1, "Format with too many format specifiers."); 599 switch(*format) { 600 case 's': 601 thisarg = argv[pos++]; 602 break; 603 case 'c': 604 temp[0] = strtoul(argv[pos++], NULL, 10); 605 temp[1] = 0; 606 thisarg = temp; 607 break; 608 default: 609 m4errx(1, "Unsupported format specification: %s.", 610 argv[2]); 611 } 612 format++; 613 l = strlen(thisarg); 614 if (l > extra) 615 l = extra; 616 if (!left_padded) { 617 while (l < width--) 618 addchar(' '); 619 } 620 addchars(thisarg, l); 621 if (left_padded) { 622 while (l < width--) 623 addchar(' '); 624 } 625 } 626 pbstr(getstring()); 627 } 628 629 void 630 doesyscmd(const char *cmd) 631 { 632 int p[2]; 633 pid_t cpid; 634 char *argv[4]; 635 int cc; 636 int status; 637 638 /* Follow gnu m4 documentation: first flush buffers. */ 639 fflush(NULL); 640 641 argv[0] = "sh"; 642 argv[1] = "-c"; 643 argv[2] = (char *)cmd; 644 argv[3] = NULL; 645 646 /* Just set up standard output, share stderr and stdin with m4 */ 647 if (pipe(p) == -1) 648 err(1, "bad pipe"); 649 switch(cpid = fork()) { 650 case -1: 651 err(1, "bad fork"); 652 /* NOTREACHED */ 653 case 0: 654 (void) close(p[0]); 655 (void) dup2(p[1], 1); 656 (void) close(p[1]); 657 execv(_PATH_BSHELL, argv); 658 exit(1); 659 default: 660 /* Read result in two stages, since m4's buffer is 661 * pushback-only. */ 662 (void) close(p[1]); 663 do { 664 char result[BUFSIZE]; 665 cc = read(p[0], result, sizeof result); 666 if (cc > 0) 667 addchars(result, cc); 668 } while (cc > 0 || (cc == -1 && errno == EINTR)); 669 670 (void) close(p[0]); 671 while (waitpid(cpid, &status, 0) == -1) { 672 if (errno != EINTR) 673 break; 674 } 675 pbstr(getstring()); 676 } 677 } 678 679 void 680 getdivfile(const char *name) 681 { 682 FILE *f; 683 int c; 684 685 f = fopen(name, "r"); 686 if (!f) 687 return; 688 689 while ((c = getc(f))!= EOF) 690 putc(c, active); 691 (void) fclose(f); 692 } 693