1 /* $OpenBSD: gnum4.c,v 1.42 2011/11/06 12:25:43 espie Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/usr.bin/m4/gnum4.c,v 1.15 2012/11/17 01:54:24 svnexp Exp $ 28 */ 29 30 /* 31 * functions needed to support gnu-m4 extensions, including a fake freezing 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/wait.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <paths.h> 40 #include <regex.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <errno.h> 46 #include <unistd.h> 47 #include "mdef.h" 48 #include "stdd.h" 49 #include "extern.h" 50 51 52 int mimic_gnu = 0; 53 54 /* 55 * Support for include path search 56 * First search in the current directory. 57 * If not found, and the path is not absolute, include path kicks in. 58 * First, -I options, in the order found on the command line. 59 * Then M4PATH env variable 60 */ 61 62 static struct path_entry { 63 char *name; 64 struct path_entry *next; 65 } *first, *last; 66 67 static struct path_entry *new_path_entry(const char *); 68 static void ensure_m4path(void); 69 static struct input_file *dopath(struct input_file *, const char *); 70 71 static struct path_entry * 72 new_path_entry(const char *dirname) 73 { 74 struct path_entry *n; 75 76 n = malloc(sizeof(struct path_entry)); 77 if (n == NULL) 78 errx(1, "out of memory"); 79 n->name = strdup(dirname); 80 if (n->name == NULL) 81 errx(1, "out of memory"); 82 n->next = NULL; 83 return n; 84 } 85 86 void 87 addtoincludepath(const char *dirname) 88 { 89 struct path_entry *n; 90 91 n = new_path_entry(dirname); 92 93 if (last) { 94 last->next = n; 95 last = n; 96 } else { 97 last = first = n; 98 } 99 } 100 101 static void 102 ensure_m4path(void) 103 { 104 static int envpathdone = 0; 105 char *envpath; 106 char *sweep; 107 char *path; 108 109 if (envpathdone) 110 return; 111 envpathdone = TRUE; 112 envpath = getenv("M4PATH"); 113 if (!envpath) 114 return; 115 /* for portability: getenv result is read-only */ 116 envpath = strdup(envpath); 117 if (envpath == NULL) 118 errx(1, "out of memory"); 119 for (sweep = envpath; 120 (path = strsep(&sweep, ":")) != NULL;) 121 addtoincludepath(path); 122 free(envpath); 123 } 124 125 static struct input_file * 126 dopath(struct input_file *i, const char *filename) 127 { 128 char path[MAXPATHLEN]; 129 struct path_entry *pe; 130 FILE *f; 131 132 for (pe = first; pe; pe = pe->next) { 133 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 134 if ((f = fopen(path, "r")) != NULL) { 135 set_input(i, f, path); 136 return i; 137 } 138 } 139 return NULL; 140 } 141 142 struct input_file * 143 fopen_trypath(struct input_file *i, const char *filename) 144 { 145 FILE *f; 146 147 f = fopen(filename, "r"); 148 if (f != NULL) { 149 set_input(i, f, filename); 150 return i; 151 } 152 if (filename[0] == '/') 153 return NULL; 154 155 ensure_m4path(); 156 157 return dopath(i, filename); 158 } 159 160 void 161 doindir(const char *argv[], int argc) 162 { 163 ndptr n; 164 struct macro_definition *p = NULL; 165 166 n = lookup(argv[2]); 167 if (n == NULL || (p = macro_getdef(n)) == NULL) 168 m4errx(1, "indir: undefined macro %s.", argv[2]); 169 argv[1] = p->defn; 170 171 eval(argv + 1, argc - 1, p->type, is_traced(n)); 172 } 173 174 void 175 dobuiltin(const char *argv[], int argc) 176 { 177 ndptr p; 178 179 argv[1] = NULL; 180 p = macro_getbuiltin(argv[2]); 181 if (p != NULL) 182 eval(argv + 1, argc - 1, macro_builtin_type(p), is_traced(p)); 183 else 184 m4errx(1, "unknown builtin %s.", argv[2]); 185 } 186 187 188 /* We need some temporary buffer space, as pb pushes BACK and substitution 189 * proceeds forward... */ 190 static char *buffer; 191 static size_t bufsize = 0; 192 static size_t current = 0; 193 194 static void addchars(const char *, size_t); 195 static void addchar(int); 196 static char *twiddle(const char *); 197 static char *getstring(void); 198 static void exit_regerror(int, regex_t *); 199 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 200 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 201 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 202 static void add_sub(int, const char *, regex_t *, regmatch_t *); 203 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 204 #define addconstantstring(s) addchars((s), sizeof(s)-1) 205 206 static void 207 addchars(const char *c, size_t n) 208 { 209 if (n == 0) 210 return; 211 while (current + n > bufsize) { 212 if (bufsize == 0) 213 bufsize = 1024; 214 else 215 bufsize *= 2; 216 buffer = xrealloc(buffer, bufsize, NULL); 217 } 218 memcpy(buffer + current, c, n); 219 current += n; 220 } 221 222 static void 223 addchar(int c) 224 { 225 if (current + 1 > bufsize) { 226 if (bufsize == 0) 227 bufsize = 1024; 228 else 229 bufsize *= 2; 230 buffer = xrealloc(buffer, bufsize, NULL); 231 } 232 buffer[current++] = c; 233 } 234 235 static char * 236 getstring(void) 237 { 238 addchar('\0'); 239 current = 0; 240 return buffer; 241 } 242 243 244 static void 245 exit_regerror(int er, regex_t *re) 246 { 247 size_t errlen; 248 char *errbuf; 249 250 errlen = regerror(er, re, NULL, 0); 251 errbuf = xalloc(errlen, 252 "malloc in regerror: %lu", (unsigned long)errlen); 253 regerror(er, re, errbuf, errlen); 254 m4errx(1, "regular expression error: %s.", errbuf); 255 } 256 257 static void 258 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 259 { 260 if (n > (int)re->re_nsub) 261 warnx("No subexpression %d", n); 262 /* Subexpressions that did not match are 263 * not an error. */ 264 else if (pm[n].rm_so != -1 && pm[n].rm_eo != -1) 265 addchars(string + pm[n].rm_so, pm[n].rm_eo - pm[n].rm_so); 266 } 267 268 /* Add replacement string to the output buffer, recognizing special 269 * constructs and replacing them with substrings of the original string. 270 */ 271 static void 272 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 273 { 274 const char *p; 275 276 for (p = replace; *p != '\0'; p++) { 277 if (*p == '&' && !mimic_gnu) { 278 add_sub(0, string, re, pm); 279 continue; 280 } 281 if (*p == '\\') { 282 if (p[1] == '\\') { 283 addchar(p[1]); 284 p++; 285 continue; 286 } 287 if (p[1] == '&') { 288 if (mimic_gnu) 289 add_sub(0, string, re, pm); 290 else 291 addchar(p[1]); 292 p++; 293 continue; 294 } 295 if (isdigit(p[1])) { 296 add_sub(*(++p) - '0', string, re, pm); 297 continue; 298 } 299 } 300 addchar(*p); 301 } 302 } 303 304 static void 305 do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 306 { 307 int error; 308 int flags = 0; 309 const char *last_match = NULL; 310 311 while ((error = regexec(re, string, re->re_nsub + 1, pm, flags)) == 0) { 312 if (pm[0].rm_eo != 0) { 313 if (string[pm[0].rm_eo - 1] == '\n') 314 flags = 0; 315 else 316 flags = REG_NOTBOL; 317 } 318 319 /* NULL length matches are special... We use the `vi-mode' 320 * rule: don't allow a NULL-match at the last match 321 * position. 322 */ 323 if (pm[0].rm_so == pm[0].rm_eo && 324 string + pm[0].rm_so == last_match) { 325 if (*string == '\0') 326 return; 327 addchar(*string); 328 if (*string++ == '\n') 329 flags = 0; 330 else 331 flags = REG_NOTBOL; 332 continue; 333 } 334 last_match = string + pm[0].rm_so; 335 addchars(string, pm[0].rm_so); 336 add_replace(string, re, replace, pm); 337 string += pm[0].rm_eo; 338 } 339 if (error != REG_NOMATCH) 340 exit_regerror(error, re); 341 pbstr(string); 342 } 343 344 static void 345 do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 346 { 347 int error; 348 349 switch(error = regexec(re, string, re->re_nsub + 1, pm, 0)) { 350 case 0: 351 add_replace(string, re, replace, pm); 352 pbstr(getstring()); 353 break; 354 case REG_NOMATCH: 355 break; 356 default: 357 exit_regerror(error, re); 358 } 359 } 360 361 static void 362 do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 363 { 364 int error; 365 366 switch(error = regexec(re, string, re->re_nsub + 1, pm, 0)) { 367 case 0: 368 pbunsigned(pm[0].rm_so); 369 break; 370 case REG_NOMATCH: 371 pbnum(-1); 372 break; 373 default: 374 exit_regerror(error, re); 375 } 376 } 377 378 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 379 * says. So we twiddle with the regexp before passing it to regcomp. 380 */ 381 static char * 382 twiddle(const char *p) 383 { 384 /* + at start of regexp is a normal character for Gnu m4 */ 385 if (*p == '^') { 386 addchar(*p); 387 p++; 388 } 389 if (*p == '+') 390 addchar('\\'); 391 /* This could use strcspn for speed... */ 392 while (*p != '\0') { 393 if (*p == '\\') { 394 switch(p[1]) { 395 case '(': 396 case ')': 397 case '|': 398 addchar(p[1]); 399 break; 400 case 'w': 401 addconstantstring("[_a-zA-Z0-9]"); 402 break; 403 case 'W': 404 addconstantstring("[^_a-zA-Z0-9]"); 405 break; 406 case '<': 407 addconstantstring("[[:<:]]"); 408 break; 409 case '>': 410 addconstantstring("[[:>:]]"); 411 break; 412 default: 413 addchars(p, 2); 414 break; 415 } 416 p += 2; 417 continue; 418 } 419 if (*p == '(' || *p == ')' || *p == '|') 420 addchar('\\'); 421 422 addchar(*p); 423 p++; 424 } 425 return getstring(); 426 } 427 428 /* patsubst(string, regexp, opt replacement) */ 429 /* argv[2]: string 430 * argv[3]: regexp 431 * argv[4]: opt rep 432 */ 433 void 434 dopatsubst(const char *argv[], int argc) 435 { 436 if (argc <= 3) { 437 warnx("Too few arguments to patsubst"); 438 return; 439 } 440 /* special case: empty regexp */ 441 if (argv[3][0] == '\0') { 442 const char *s; 443 size_t len; 444 if (argc > 4 && argv[4]) 445 len = strlen(argv[4]); 446 else 447 len = 0; 448 for (s = argv[2]; *s != '\0'; s++) { 449 addchars(argv[4], len); 450 addchar(*s); 451 } 452 } else { 453 int error; 454 regex_t re; 455 regmatch_t *pmatch; 456 int mode = REG_EXTENDED; 457 size_t l = strlen(argv[3]); 458 459 if (!mimic_gnu || 460 (argv[3][0] == '^') || 461 (l > 0 && argv[3][l-1] == '$')) 462 mode |= REG_NEWLINE; 463 464 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 465 mode); 466 if (error != 0) 467 exit_regerror(error, &re); 468 469 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub + 1), NULL); 470 do_subst(argv[2], &re, 471 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 472 free(pmatch); 473 regfree(&re); 474 } 475 pbstr(getstring()); 476 } 477 478 void 479 doregexp(const char *argv[], int argc) 480 { 481 int error; 482 regex_t re; 483 regmatch_t *pmatch; 484 485 if (argc <= 3) { 486 warnx("Too few arguments to regexp"); 487 return; 488 } 489 /* special gnu case */ 490 if (argv[3][0] == '\0' && mimic_gnu) { 491 if (argc == 4 || argv[4] == NULL) 492 return; 493 else 494 pbstr(argv[4]); 495 } 496 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 497 REG_EXTENDED|REG_NEWLINE); 498 if (error != 0) 499 exit_regerror(error, &re); 500 501 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub + 1), NULL); 502 if (argc == 4 || argv[4] == NULL) 503 do_regexpindex(argv[2], &re, pmatch); 504 else 505 do_regexp(argv[2], &re, argv[4], pmatch); 506 free(pmatch); 507 regfree(&re); 508 } 509 510 void 511 doformat(const char *argv[], int argc) 512 { 513 const char *format = argv[2]; 514 int pos = 3; 515 int left_padded; 516 long width; 517 size_t l; 518 const char *thisarg = NULL; 519 char temp[2]; 520 long extra; 521 522 while (*format != 0) { 523 if (*format != '%') { 524 addchar(*format++); 525 continue; 526 } 527 528 format++; 529 if (*format == '%') { 530 addchar(*format++); 531 continue; 532 } 533 if (*format == 0) { 534 addchar('%'); 535 break; 536 } 537 538 if (*format == '*') { 539 format++; 540 if (pos >= argc) 541 m4errx(1, 542 "Format with too many format specifiers."); 543 width = strtol(argv[pos++], NULL, 10); 544 } else { 545 width = strtol(format, __DECONST(char **, &format), 10); 546 } 547 if (width < 0) { 548 left_padded = 1; 549 width = -width; 550 } else { 551 left_padded = 0; 552 } 553 if (*format == '.') { 554 format++; 555 if (*format == '*') { 556 format++; 557 if (pos >= argc) 558 m4errx(1, 559 "Format with too many format specifiers."); 560 extra = strtol(argv[pos++], NULL, 10); 561 } else { 562 extra = strtol(format, __DECONST(char **, &format), 10); 563 } 564 } else { 565 extra = LONG_MAX; 566 } 567 if (pos >= argc) 568 m4errx(1, "Format with too many format specifiers."); 569 switch(*format) { 570 case 's': 571 thisarg = argv[pos++]; 572 break; 573 case 'c': 574 temp[0] = strtoul(argv[pos++], NULL, 10); 575 temp[1] = 0; 576 thisarg = temp; 577 break; 578 default: 579 m4errx(1, "Unsupported format specification: %s.", 580 argv[2]); 581 } 582 format++; 583 l = strlen(thisarg); 584 if ((long)l > extra) 585 l = extra; 586 if (!left_padded) { 587 while ((long)l < width--) 588 addchar(' '); 589 } 590 addchars(thisarg, l); 591 if (left_padded) { 592 while ((long)l < width--) 593 addchar(' '); 594 } 595 } 596 pbstr(getstring()); 597 } 598 599 void 600 doesyscmd(const char *cmd) 601 { 602 int p[2]; 603 pid_t pid, cpid; 604 char *argv[4]; 605 int cc; 606 int status; 607 608 /* Follow gnu m4 documentation: first flush buffers. */ 609 fflush(NULL); 610 611 argv[0] = __DECONST(char *, "sh"); 612 argv[1] = __DECONST(char *, "-c"); 613 argv[2] = __DECONST(char *, cmd); 614 argv[3] = NULL; 615 616 /* Just set up standard output, share stderr and stdin with m4 */ 617 if (pipe(p) == -1) 618 err(1, "bad pipe"); 619 switch(cpid = fork()) { 620 case -1: 621 err(1, "bad fork"); 622 /* NOTREACHED */ 623 case 0: 624 close(p[0]); 625 dup2(p[1], 1); 626 close(p[1]); 627 execv(_PATH_BSHELL, argv); 628 exit(1); 629 default: 630 /* Read result in two stages, since m4's buffer is 631 * pushback-only. */ 632 close(p[1]); 633 do { 634 char result[BUFSIZE]; 635 cc = read(p[0], result, sizeof result); 636 if (cc > 0) 637 addchars(result, cc); 638 } while (cc > 0 || (cc == -1 && errno == EINTR)); 639 640 close(p[0]); 641 while ((pid = wait(&status)) != cpid && pid >= 0) 642 continue; 643 pbstr(getstring()); 644 } 645 } 646 647 void 648 getdivfile(const char *name) 649 { 650 FILE *f; 651 int c; 652 653 f = fopen(name, "r"); 654 if (!f) 655 return; 656 657 while ((c = getc(f)) != EOF) 658 putc(c, active); 659 fclose(f); 660 } 661