1 /* $NetBSD: gnum4.c,v 1.2 2002/01/21 21:49:57 tv Exp $ */ 2 /* $OpenBSD: gnum4.c,v 1.15 2001/10/13 20:18:48 espie Exp $ */ 3 4 /* 5 * Copyright (c) 1999 Marc Espie 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * functions needed to support gnu-m4 extensions, including a fake freezing 31 */ 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/wait.h> 36 #include <ctype.h> 37 #include <errno.h> 38 #include <paths.h> 39 #include <regex.h> 40 #include <stddef.h> 41 #include <stdlib.h> 42 #include <stdio.h> 43 #include <string.h> 44 #include "mdef.h" 45 #include "stdd.h" 46 #include "extern.h" 47 48 49 int mimic_gnu = 0; 50 51 /* 52 * Support for include path search 53 * First search in the the current directory. 54 * If not found, and the path is not absolute, include path kicks in. 55 * First, -I options, in the order found on the command line. 56 * Then M4PATH env variable 57 */ 58 59 struct path_entry { 60 char *name; 61 struct path_entry *next; 62 } *first, *last; 63 64 static struct path_entry *new_path_entry __P((const char *)); 65 static void ensure_m4path __P((void)); 66 static struct input_file *dopath __P((struct input_file *, const char *)); 67 68 static struct path_entry * 69 new_path_entry(dirname) 70 const char *dirname; 71 { 72 struct path_entry *n; 73 74 n = malloc(sizeof(struct path_entry)); 75 if (!n) 76 errx(1, "out of memory"); 77 n->name = strdup(dirname); 78 if (!n->name) 79 errx(1, "out of memory"); 80 n->next = 0; 81 return n; 82 } 83 84 void 85 addtoincludepath(dirname) 86 const char *dirname; 87 { 88 struct path_entry *n; 89 90 n = new_path_entry(dirname); 91 92 if (last) { 93 last->next = n; 94 last = n; 95 } 96 else 97 last = first = n; 98 } 99 100 static void 101 ensure_m4path() 102 { 103 static int envpathdone = 0; 104 char *envpath; 105 char *sweep; 106 char *path; 107 108 if (envpathdone) 109 return; 110 envpathdone = TRUE; 111 envpath = getenv("M4PATH"); 112 if (!envpath) 113 return; 114 /* for portability: getenv result is read-only */ 115 envpath = strdup(envpath); 116 if (!envpath) 117 errx(1, "out of memory"); 118 for (sweep = envpath; 119 (path = strsep(&sweep, ":")) != NULL;) 120 addtoincludepath(path); 121 free(envpath); 122 } 123 124 static 125 struct input_file * 126 dopath(i, filename) 127 struct input_file *i; 128 const char *filename; 129 { 130 char path[MAXPATHLEN]; 131 struct path_entry *pe; 132 FILE *f; 133 134 for (pe = first; pe; pe = pe->next) { 135 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 136 if ((f = fopen(path, "r")) != 0) { 137 set_input(i, f, path); 138 return i; 139 } 140 } 141 return NULL; 142 } 143 144 struct input_file * 145 fopen_trypath(i, filename) 146 struct input_file *i; 147 const char *filename; 148 { 149 FILE *f; 150 151 f = fopen(filename, "r"); 152 if (f != NULL) { 153 set_input(i, f, filename); 154 return i; 155 } 156 if (filename[0] == '/') 157 return NULL; 158 159 ensure_m4path(); 160 161 return dopath(i, filename); 162 } 163 164 void 165 doindir(argv, argc) 166 const char *argv[]; 167 int argc; 168 { 169 ndptr p; 170 171 p = lookup(argv[2]); 172 if (p == NULL) 173 errx(1, "undefined macro %s", argv[2]); 174 argv[1] = p->defn; 175 eval(argv+1, argc-1, p->type); 176 } 177 178 void 179 dobuiltin(argv, argc) 180 const char *argv[]; 181 int argc; 182 { 183 int n; 184 argv[1] = NULL; 185 n = builtin_type(argv[2]); 186 if (n != -1) 187 eval(argv+1, argc-1, n); 188 else 189 errx(1, "unknown builtin %s", argv[2]); 190 } 191 192 193 /* We need some temporary buffer space, as pb pushes BACK and substitution 194 * proceeds forward... */ 195 static char *buffer; 196 static size_t bufsize = 0; 197 static size_t current = 0; 198 199 static void addchars __P((const char *, size_t)); 200 static void addchar __P((char)); 201 static char *twiddle __P((const char *)); 202 static char *getstring __P((void)); 203 static void exit_regerror __P((int, regex_t *)); 204 static void do_subst __P((const char *, regex_t *, const char *, regmatch_t *)); 205 static void do_regexpindex __P((const char *, regex_t *, regmatch_t *)); 206 static void do_regexp __P((const char *, regex_t *, const char *, regmatch_t *)); 207 static void add_sub __P((int, const char *, regex_t *, regmatch_t *)); 208 static void add_replace __P((const char *, regex_t *, const char *, regmatch_t *)); 209 #define addconstantstring(s) addchars((s), sizeof(s)-1) 210 211 static void 212 addchars(c, n) 213 const char *c; 214 size_t n; 215 { 216 if (n == 0) 217 return; 218 while (current + n > bufsize) { 219 if (bufsize == 0) 220 bufsize = 1024; 221 else 222 bufsize *= 2; 223 buffer = realloc(buffer, bufsize); 224 if (buffer == NULL) 225 errx(1, "out of memory"); 226 } 227 memcpy(buffer+current, c, n); 228 current += n; 229 } 230 231 static void 232 addchar(c) 233 char c; 234 { 235 if (current +1 > bufsize) { 236 if (bufsize == 0) 237 bufsize = 1024; 238 else 239 bufsize *= 2; 240 buffer = realloc(buffer, bufsize); 241 if (buffer == NULL) 242 errx(1, "out of memory"); 243 } 244 buffer[current++] = c; 245 } 246 247 static char * 248 getstring() 249 { 250 addchar('\0'); 251 current = 0; 252 return buffer; 253 } 254 255 256 static void 257 exit_regerror(er, re) 258 int er; 259 regex_t *re; 260 { 261 size_t errlen; 262 char *errbuf; 263 264 errlen = regerror(er, re, NULL, 0); 265 errbuf = xalloc(errlen); 266 regerror(er, re, errbuf, errlen); 267 errx(1, "regular expression error: %s", errbuf); 268 } 269 270 static void 271 add_sub(n, string, re, pm) 272 int n; 273 const char *string; 274 regex_t *re; 275 regmatch_t *pm; 276 { 277 if (n > re->re_nsub) 278 warnx("No subexpression %d", n); 279 /* Subexpressions that did not match are 280 * not an error. */ 281 else if (pm[n].rm_so != -1 && 282 pm[n].rm_eo != -1) { 283 addchars(string + pm[n].rm_so, 284 pm[n].rm_eo - pm[n].rm_so); 285 } 286 } 287 288 /* Add replacement string to the output buffer, recognizing special 289 * constructs and replacing them with substrings of the original string. 290 */ 291 static void 292 add_replace(string, re, replace, pm) 293 const char *string; 294 regex_t *re; 295 const char *replace; 296 regmatch_t *pm; 297 { 298 const char *p; 299 300 for (p = replace; *p != '\0'; p++) { 301 if (*p == '&' && !mimic_gnu) { 302 add_sub(0, string, re, pm); 303 continue; 304 } 305 if (*p == '\\') { 306 if (p[1] == '\\') { 307 addchar(p[1]); 308 p++; 309 continue; 310 } 311 if (p[1] == '&') { 312 if (mimic_gnu) 313 add_sub(0, string, re, pm); 314 else 315 addchar(p[1]); 316 p++; 317 continue; 318 } 319 if (isdigit(p[1])) { 320 add_sub(*(++p) - '0', string, re, pm); 321 continue; 322 } 323 } 324 addchar(*p); 325 } 326 } 327 328 static void 329 do_subst(string, re, replace, pm) 330 const char *string; 331 regex_t *re; 332 const char *replace; 333 regmatch_t *pm; 334 { 335 int error; 336 int flags = 0; 337 const char *last_match = NULL; 338 339 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 340 if (pm[0].rm_eo != 0) { 341 if (string[pm[0].rm_eo-1] == '\n') 342 flags = 0; 343 else 344 flags = REG_NOTBOL; 345 } 346 347 /* NULL length matches are special... We use the `vi-mode' 348 * rule: don't allow a NULL-match at the last match 349 * position. 350 */ 351 if (pm[0].rm_so == pm[0].rm_eo && 352 string + pm[0].rm_so == last_match) { 353 if (*string == '\0') 354 return; 355 addchar(*string); 356 if (*string++ == '\n') 357 flags = 0; 358 else 359 flags = REG_NOTBOL; 360 continue; 361 } 362 last_match = string + pm[0].rm_so; 363 addchars(string, pm[0].rm_so); 364 add_replace(string, re, replace, pm); 365 string += pm[0].rm_eo; 366 } 367 if (error != REG_NOMATCH) 368 exit_regerror(error, re); 369 pbstr(string); 370 } 371 372 static void 373 do_regexp(string, re, replace, pm) 374 const char *string; 375 regex_t *re; 376 const char *replace; 377 regmatch_t *pm; 378 { 379 int error; 380 381 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 382 case 0: 383 add_replace(string, re, replace, pm); 384 pbstr(getstring()); 385 break; 386 case REG_NOMATCH: 387 break; 388 default: 389 exit_regerror(error, re); 390 } 391 } 392 393 static void 394 do_regexpindex(string, re, pm) 395 const char *string; 396 regex_t *re; 397 regmatch_t *pm; 398 { 399 int error; 400 401 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 402 case 0: 403 pbunsigned(pm[0].rm_so); 404 break; 405 case REG_NOMATCH: 406 pbnum(-1); 407 break; 408 default: 409 exit_regerror(error, re); 410 } 411 } 412 413 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 414 * says. So we twiddle with the regexp before passing it to regcomp. 415 */ 416 static char * 417 twiddle(p) 418 const char *p; 419 { 420 /* This could use strcspn for speed... */ 421 while (*p != '\0') { 422 if (*p == '\\') { 423 switch(p[1]) { 424 case '(': 425 case ')': 426 case '|': 427 addchar(p[1]); 428 break; 429 case 'w': 430 addconstantstring("[_a-zA-Z0-9]"); 431 break; 432 case 'W': 433 addconstantstring("[^_a-zA-Z0-9]"); 434 break; 435 case '<': 436 addconstantstring("[[:<:]]"); 437 break; 438 case '>': 439 addconstantstring("[[:>:]]"); 440 break; 441 default: 442 addchars(p, 2); 443 break; 444 } 445 p+=2; 446 continue; 447 } 448 if (*p == '(' || *p == ')' || *p == '|') 449 addchar('\\'); 450 451 addchar(*p); 452 p++; 453 } 454 return getstring(); 455 } 456 457 /* patsubst(string, regexp, opt replacement) */ 458 /* argv[2]: string 459 * argv[3]: regexp 460 * argv[4]: opt rep 461 */ 462 void 463 dopatsubst(argv, argc) 464 const char *argv[]; 465 int argc; 466 { 467 int error; 468 regex_t re; 469 regmatch_t *pmatch; 470 471 if (argc <= 3) { 472 warnx("Too few arguments to patsubst"); 473 return; 474 } 475 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 476 REG_NEWLINE | REG_EXTENDED); 477 if (error != 0) 478 exit_regerror(error, &re); 479 480 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 481 do_subst(argv[2], &re, 482 argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch); 483 pbstr(getstring()); 484 free(pmatch); 485 regfree(&re); 486 } 487 488 void 489 doregexp(argv, argc) 490 const char *argv[]; 491 int argc; 492 { 493 int error; 494 regex_t re; 495 regmatch_t *pmatch; 496 497 if (argc <= 3) { 498 warnx("Too few arguments to regexp"); 499 return; 500 } 501 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 502 REG_EXTENDED); 503 if (error != 0) 504 exit_regerror(error, &re); 505 506 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 507 if (argv[4] == NULL || argc == 4) 508 do_regexpindex(argv[2], &re, pmatch); 509 else 510 do_regexp(argv[2], &re, argv[4], pmatch); 511 free(pmatch); 512 regfree(&re); 513 } 514 515 void 516 doesyscmd(cmd) 517 const char *cmd; 518 { 519 int p[2]; 520 pid_t pid, cpid; 521 char *argv[4]; 522 int cc; 523 int status; 524 525 /* Follow gnu m4 documentation: first flush buffers. */ 526 fflush(NULL); 527 528 argv[0] = "sh"; 529 argv[1] = "-c"; 530 argv[2] = (char *)cmd; 531 argv[3] = NULL; 532 533 /* Just set up standard output, share stderr and stdin with m4 */ 534 if (pipe(p) == -1) 535 err(1, "bad pipe"); 536 switch(cpid = fork()) { 537 case -1: 538 err(1, "bad fork"); 539 /* NOTREACHED */ 540 case 0: 541 (void) close(p[0]); 542 (void) dup2(p[1], 1); 543 (void) close(p[1]); 544 execv(_PATH_BSHELL, argv); 545 exit(1); 546 default: 547 /* Read result in two stages, since m4's buffer is 548 * pushback-only. */ 549 (void) close(p[1]); 550 do { 551 char result[BUFSIZE]; 552 cc = read(p[0], result, sizeof result); 553 if (cc > 0) 554 addchars(result, cc); 555 } while (cc > 0 || (cc == -1 && errno == EINTR)); 556 557 (void) close(p[0]); 558 while ((pid = wait(&status)) != cpid && pid >= 0) 559 continue; 560 pbstr(getstring()); 561 } 562 } 563