1 /* $NetBSD: gnum4.c,v 1.9 2012/03/20 20:34:58 matt Exp $ */ 2 /* $OpenBSD: gnum4.c,v 1.39 2008/08/21 21:01:04 espie Exp $ */ 3 4 /* 5 * Copyright (c) 1999 Marc Espie 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * functions needed to support gnu-m4 extensions, including a fake freezing 31 */ 32 #if HAVE_NBTOOL_CONFIG_H 33 #include "nbtool_config.h" 34 #endif 35 #include <sys/cdefs.h> 36 __RCSID("$NetBSD: gnum4.c,v 1.9 2012/03/20 20:34:58 matt Exp $"); 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include <ctype.h> 42 #include <err.h> 43 #include <paths.h> 44 #include <regex.h> 45 #include <stddef.h> 46 #include <stdlib.h> 47 #include <stdio.h> 48 #include <string.h> 49 #include <errno.h> 50 #include <unistd.h> 51 #include "mdef.h" 52 #include "stdd.h" 53 #include "extern.h" 54 55 56 int mimic_gnu = 0; 57 #ifndef SIZE_T_MAX 58 #define SIZE_T_MAX (size_t)~0ull 59 #endif 60 61 /* 62 * Support for include path search 63 * First search in the current directory. 64 * If not found, and the path is not absolute, include path kicks in. 65 * First, -I options, in the order found on the command line. 66 * Then M4PATH env variable 67 */ 68 69 struct path_entry { 70 char *name; 71 struct path_entry *next; 72 } *first, *last; 73 74 static struct path_entry *new_path_entry(const char *); 75 static void ensure_m4path(void); 76 static struct input_file *dopath(struct input_file *, const char *); 77 78 static struct path_entry * 79 new_path_entry(const char *dirname) 80 { 81 struct path_entry *n; 82 83 n = malloc(sizeof(struct path_entry)); 84 if (!n) 85 errx(1, "out of memory"); 86 n->name = strdup(dirname); 87 if (!n->name) 88 errx(1, "out of memory"); 89 n->next = 0; 90 return n; 91 } 92 93 void 94 addtoincludepath(const char *dirname) 95 { 96 struct path_entry *n; 97 98 n = new_path_entry(dirname); 99 100 if (last) { 101 last->next = n; 102 last = n; 103 } 104 else 105 last = first = n; 106 } 107 108 static void 109 ensure_m4path(void) 110 { 111 static int envpathdone = 0; 112 char *envpath; 113 char *sweep; 114 char *path; 115 116 if (envpathdone) 117 return; 118 envpathdone = TRUE; 119 envpath = getenv("M4PATH"); 120 if (!envpath) 121 return; 122 /* for portability: getenv result is read-only */ 123 envpath = strdup(envpath); 124 if (!envpath) 125 errx(1, "out of memory"); 126 for (sweep = envpath; 127 (path = strsep(&sweep, ":")) != NULL;) 128 addtoincludepath(path); 129 free(envpath); 130 } 131 132 static 133 struct input_file * 134 dopath(struct input_file *i, const char *filename) 135 { 136 char path[MAXPATHLEN]; 137 struct path_entry *pe; 138 FILE *f; 139 140 for (pe = first; pe; pe = pe->next) { 141 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 142 if ((f = fopen(path, "r")) != 0) { 143 set_input(i, f, path); 144 return i; 145 } 146 } 147 return NULL; 148 } 149 150 struct input_file * 151 fopen_trypath(struct input_file *i, const char *filename) 152 { 153 FILE *f; 154 155 f = fopen(filename, "r"); 156 if (f != NULL) { 157 set_input(i, f, filename); 158 return i; 159 } 160 if (filename[0] == '/') 161 return NULL; 162 163 ensure_m4path(); 164 165 return dopath(i, filename); 166 } 167 168 void 169 doindir(const char *argv[], int argc) 170 { 171 ndptr n; 172 struct macro_definition *p; 173 174 n = lookup(argv[2]); 175 if (n == NULL || (p = macro_getdef(n)) == NULL) 176 m4errx(1, "indir: undefined macro %s.", argv[2]); 177 argv[1] = p->defn; 178 179 eval(argv+1, argc-1, p->type, is_traced(n)); 180 } 181 182 void 183 dobuiltin(const char *argv[], int argc) 184 { 185 ndptr p; 186 187 argv[1] = NULL; 188 p = macro_getbuiltin(argv[2]); 189 if (p != NULL) 190 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 191 else 192 m4errx(1, "unknown builtin %s.", argv[2]); 193 } 194 195 196 /* We need some temporary buffer space, as pb pushes BACK and substitution 197 * proceeds forward... */ 198 static char *buffer; 199 static size_t bufsize = 0; 200 static size_t current = 0; 201 202 static void addchars(const char *, size_t); 203 static void addchar(int); 204 static char *twiddle(const char *); 205 static char *getstring(void); 206 static void exit_regerror(int, regex_t *) __dead; 207 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 208 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 209 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 210 static void add_sub(size_t, const char *, regex_t *, regmatch_t *); 211 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 212 #define addconstantstring(s) addchars((s), sizeof(s)-1) 213 214 static void 215 addchars(const char *c, size_t n) 216 { 217 if (n == 0) 218 return; 219 while (current + n > bufsize) { 220 if (bufsize == 0) 221 bufsize = 1024; 222 else 223 bufsize *= 2; 224 buffer = xrealloc(buffer, bufsize, NULL); 225 } 226 memcpy(buffer+current, c, n); 227 current += n; 228 } 229 230 static void 231 addchar(int c) 232 { 233 if (current +1 > bufsize) { 234 if (bufsize == 0) 235 bufsize = 1024; 236 else 237 bufsize *= 2; 238 buffer = xrealloc(buffer, bufsize, NULL); 239 } 240 buffer[current++] = c; 241 } 242 243 static char * 244 getstring(void) 245 { 246 addchar('\0'); 247 current = 0; 248 return buffer; 249 } 250 251 252 static void 253 exit_regerror(int er, regex_t *re) 254 { 255 size_t errlen; 256 char *errbuf; 257 258 errlen = regerror(er, re, NULL, 0); 259 errbuf = xalloc(errlen, 260 "malloc in regerror: %lu", (unsigned long)errlen); 261 regerror(er, re, errbuf, errlen); 262 m4errx(1, "regular expression error: %s.", errbuf); 263 } 264 265 static void 266 add_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm) 267 { 268 if (n > re->re_nsub) 269 warnx("No subexpression %zu", n); 270 /* Subexpressions that did not match are 271 * not an error. */ 272 else if (pm[n].rm_so != -1 && 273 pm[n].rm_eo != -1) { 274 addchars(string + pm[n].rm_so, 275 pm[n].rm_eo - pm[n].rm_so); 276 } 277 } 278 279 /* Add replacement string to the output buffer, recognizing special 280 * constructs and replacing them with substrings of the original string. 281 */ 282 static void 283 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 284 { 285 const char *p; 286 287 for (p = replace; *p != '\0'; p++) { 288 if (*p == '&' && !mimic_gnu) { 289 add_sub(0, string, re, pm); 290 continue; 291 } 292 if (*p == '\\') { 293 if (p[1] == '\\') { 294 addchar(p[1]); 295 p++; 296 continue; 297 } 298 if (p[1] == '&') { 299 if (mimic_gnu) 300 add_sub(0, string, re, pm); 301 else 302 addchar(p[1]); 303 p++; 304 continue; 305 } 306 if (isdigit((unsigned char)p[1])) { 307 add_sub(*(++p) - '0', string, re, pm); 308 continue; 309 } 310 } 311 addchar(*p); 312 } 313 } 314 315 static void 316 do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 317 { 318 int error; 319 int flags = 0; 320 const char *last_match = NULL; 321 322 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 323 if (pm[0].rm_eo != 0) { 324 if (string[pm[0].rm_eo-1] == '\n') 325 flags = 0; 326 else 327 flags = REG_NOTBOL; 328 } 329 330 /* NULL length matches are special... We use the `vi-mode' 331 * rule: don't allow a NULL-match at the last match 332 * position. 333 */ 334 if (pm[0].rm_so == pm[0].rm_eo && 335 string + pm[0].rm_so == last_match) { 336 if (*string == '\0') 337 return; 338 addchar(*string); 339 if (*string++ == '\n') 340 flags = 0; 341 else 342 flags = REG_NOTBOL; 343 continue; 344 } 345 last_match = string + pm[0].rm_so; 346 addchars(string, pm[0].rm_so); 347 add_replace(string, re, replace, pm); 348 string += pm[0].rm_eo; 349 } 350 if (error != REG_NOMATCH) 351 exit_regerror(error, re); 352 pbstr(string); 353 } 354 355 static void 356 do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 357 { 358 int error; 359 360 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 361 case 0: 362 add_replace(string, re, replace, pm); 363 pbstr(getstring()); 364 break; 365 case REG_NOMATCH: 366 break; 367 default: 368 exit_regerror(error, re); 369 } 370 } 371 372 static void 373 do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 374 { 375 int error; 376 377 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 378 case 0: 379 pbunsigned(pm[0].rm_so); 380 break; 381 case REG_NOMATCH: 382 pbnum(-1); 383 break; 384 default: 385 exit_regerror(error, re); 386 } 387 } 388 389 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 390 * says. So we twiddle with the regexp before passing it to regcomp. 391 */ 392 static char * 393 twiddle(const char *p) 394 { 395 /* + at start of regexp is a normal character for Gnu m4 */ 396 if (*p == '^') { 397 addchar(*p); 398 p++; 399 } 400 if (*p == '+') { 401 addchar('\\'); 402 } 403 /* This could use strcspn for speed... */ 404 while (*p != '\0') { 405 if (*p == '\\') { 406 switch(p[1]) { 407 case '(': 408 case ')': 409 case '|': 410 addchar(p[1]); 411 break; 412 case 'w': 413 addconstantstring("[_a-zA-Z0-9]"); 414 break; 415 case 'W': 416 addconstantstring("[^_a-zA-Z0-9]"); 417 break; 418 case '<': 419 addconstantstring("[[:<:]]"); 420 break; 421 case '>': 422 addconstantstring("[[:>:]]"); 423 break; 424 default: 425 addchars(p, 2); 426 break; 427 } 428 p+=2; 429 continue; 430 } 431 if (*p == '(' || *p == ')' || *p == '|') 432 addchar('\\'); 433 434 addchar(*p); 435 p++; 436 } 437 return getstring(); 438 } 439 440 /* patsubst(string, regexp, opt replacement) */ 441 /* argv[2]: string 442 * argv[3]: regexp 443 * argv[4]: opt rep 444 */ 445 void 446 dopatsubst(const char *argv[], int argc) 447 { 448 if (argc <= 3) { 449 warnx("Too few arguments to patsubst"); 450 return; 451 } 452 /* special case: empty regexp */ 453 if (argv[3][0] == '\0') { 454 const char *s; 455 size_t len; 456 if (argv[4] && argc > 4) 457 len = strlen(argv[4]); 458 else 459 len = 0; 460 for (s = argv[2]; *s != '\0'; s++) { 461 addchars(argv[4], len); 462 addchar(*s); 463 } 464 } else { 465 int error; 466 regex_t re; 467 regmatch_t *pmatch; 468 int mode = REG_EXTENDED; 469 size_t l = strlen(argv[3]); 470 471 if (!mimic_gnu || 472 (argv[3][0] == '^') || 473 (l > 0 && argv[3][l-1] == '$')) 474 mode |= REG_NEWLINE; 475 476 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 477 mode); 478 if (error != 0) 479 exit_regerror(error, &re); 480 481 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 482 do_subst(argv[2], &re, 483 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 484 free(pmatch); 485 regfree(&re); 486 } 487 pbstr(getstring()); 488 } 489 490 void 491 doregexp(const char *argv[], int argc) 492 { 493 int error; 494 regex_t re; 495 regmatch_t *pmatch; 496 497 if (argc <= 3) { 498 warnx("Too few arguments to regexp"); 499 return; 500 } 501 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 502 REG_EXTENDED); 503 if (error != 0) 504 exit_regerror(error, &re); 505 506 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 507 if (argv[4] == NULL || argc == 4) 508 do_regexpindex(argv[2], &re, pmatch); 509 else 510 do_regexp(argv[2], &re, argv[4], pmatch); 511 free(pmatch); 512 regfree(&re); 513 } 514 515 void 516 doformat(const char *argv[], int argc) 517 { 518 const char *format = argv[2]; 519 int pos = 3; 520 int left_padded; 521 long width; 522 size_t l; 523 const char *thisarg; 524 char temp[2]; 525 size_t extra; 526 527 while (*format != 0) { 528 if (*format != '%') { 529 addchar(*format++); 530 continue; 531 } 532 533 format++; 534 if (*format == '%') { 535 addchar(*format++); 536 continue; 537 } 538 if (*format == 0) { 539 addchar('%'); 540 break; 541 } 542 543 if (*format == '*') { 544 format++; 545 if (pos >= argc) 546 m4errx(1, 547 "Format with too many format specifiers."); 548 width = strtol(argv[pos++], NULL, 10); 549 } else { 550 char *eformat; 551 width = strtol(format, &eformat, 10); 552 format = eformat; 553 } 554 if (width < 0) { 555 left_padded = 1; 556 width = -width; 557 } else { 558 left_padded = 0; 559 } 560 if (*format == '.') { 561 format++; 562 if (*format == '*') { 563 format++; 564 if (pos >= argc) 565 m4errx(1, 566 "Format with too many format specifiers."); 567 extra = strtol(argv[pos++], NULL, 10); 568 } else { 569 char *eformat; 570 extra = strtol(format, &eformat, 10); 571 format = eformat; 572 } 573 } else { 574 extra = SIZE_T_MAX; 575 } 576 if (pos >= argc) 577 m4errx(1, "Format with too many format specifiers."); 578 switch(*format) { 579 case 's': 580 thisarg = argv[pos++]; 581 break; 582 case 'c': 583 temp[0] = strtoul(argv[pos++], NULL, 10); 584 temp[1] = 0; 585 thisarg = temp; 586 break; 587 default: 588 m4errx(1, "Unsupported format specification: %s.", 589 argv[2]); 590 } 591 format++; 592 l = strlen(thisarg); 593 if (l > extra) 594 l = extra; 595 if (!left_padded) { 596 while (l < (size_t)width--) 597 addchar(' '); 598 } 599 addchars(thisarg, l); 600 if (left_padded) { 601 while (l < (size_t)width--) 602 addchar(' '); 603 } 604 } 605 pbstr(getstring()); 606 } 607 608 void 609 doesyscmd(const char *cmd) 610 { 611 int p[2]; 612 pid_t pid, cpid; 613 const char *argv[4]; 614 int cc; 615 int status; 616 617 /* Follow gnu m4 documentation: first flush buffers. */ 618 fflush(NULL); 619 620 argv[0] = "sh"; 621 argv[1] = "-c"; 622 argv[2] = cmd; 623 argv[3] = NULL; 624 625 /* Just set up standard output, share stderr and stdin with m4 */ 626 if (pipe(p) == -1) 627 err(1, "bad pipe"); 628 switch(cpid = fork()) { 629 case -1: 630 err(1, "bad fork"); 631 /* NOTREACHED */ 632 case 0: 633 (void) close(p[0]); 634 (void) dup2(p[1], 1); 635 (void) close(p[1]); 636 execv(_PATH_BSHELL, __UNCONST(argv)); 637 exit(1); 638 default: 639 /* Read result in two stages, since m4's buffer is 640 * pushback-only. */ 641 (void) close(p[1]); 642 do { 643 char result[BUFSIZE]; 644 cc = read(p[0], result, sizeof result); 645 if (cc > 0) 646 addchars(result, cc); 647 } while (cc > 0 || (cc == -1 && errno == EINTR)); 648 649 (void) close(p[0]); 650 while ((pid = wait(&status)) != cpid && pid >= 0) 651 continue; 652 pbstr(getstring()); 653 } 654 } 655 656 void 657 getdivfile(const char *name) 658 { 659 FILE *f; 660 int c; 661 662 f = fopen(name, "r"); 663 if (!f) 664 return; 665 666 while ((c = getc(f))!= EOF) 667 putc(c, active); 668 (void) fclose(f); 669 } 670