1 /* $OpenBSD: gnum4.c,v 1.18 2002/04/26 16:15:16 espie Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/usr.bin/m4/gnum4.c,v 1.9 2004/05/18 15:53:58 stefanf Exp $ 28 */ 29 30 /* 31 * functions needed to support gnu-m4 extensions, including a fake freezing 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/wait.h> 37 #include <ctype.h> 38 #include <paths.h> 39 #include <regex.h> 40 #include <stddef.h> 41 #include <stdlib.h> 42 #include <stdio.h> 43 #include <string.h> 44 #include <err.h> 45 #include <errno.h> 46 #include <unistd.h> 47 #include "mdef.h" 48 #include "stdd.h" 49 #include "extern.h" 50 51 52 int mimic_gnu = 0; 53 54 /* 55 * Support for include path search 56 * First search in the the current directory. 57 * If not found, and the path is not absolute, include path kicks in. 58 * First, -I options, in the order found on the command line. 59 * Then M4PATH env variable 60 */ 61 62 struct path_entry { 63 char *name; 64 struct path_entry *next; 65 } *first, *last; 66 67 static struct path_entry *new_path_entry(const char *); 68 static void ensure_m4path(void); 69 static struct input_file *dopath(struct input_file *, const char *); 70 71 static struct path_entry * 72 new_path_entry(const char *dirname) 73 { 74 struct path_entry *n; 75 76 n = malloc(sizeof(struct path_entry)); 77 if (!n) 78 errx(1, "out of memory"); 79 n->name = strdup(dirname); 80 if (!n->name) 81 errx(1, "out of memory"); 82 n->next = NULL; 83 return n; 84 } 85 86 void 87 addtoincludepath(const char *dirname) 88 { 89 struct path_entry *n; 90 91 n = new_path_entry(dirname); 92 93 if (last) { 94 last->next = n; 95 last = n; 96 } 97 else 98 last = first = n; 99 } 100 101 static void 102 ensure_m4path(void) 103 { 104 static int envpathdone = 0; 105 char *envpath; 106 char *sweep; 107 char *path; 108 109 if (envpathdone) 110 return; 111 envpathdone = TRUE; 112 envpath = getenv("M4PATH"); 113 if (!envpath) 114 return; 115 /* for portability: getenv result is read-only */ 116 envpath = strdup(envpath); 117 if (!envpath) 118 errx(1, "out of memory"); 119 for (sweep = envpath; 120 (path = strsep(&sweep, ":")) != NULL;) 121 addtoincludepath(path); 122 free(envpath); 123 } 124 125 static 126 struct input_file * 127 dopath(struct input_file *i, const char *filename) 128 { 129 char path[MAXPATHLEN]; 130 struct path_entry *pe; 131 FILE *f; 132 133 for (pe = first; pe; pe = pe->next) { 134 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 135 if ((f = fopen(path, "r")) != NULL) { 136 set_input(i, f, path); 137 return i; 138 } 139 } 140 return NULL; 141 } 142 143 struct input_file * 144 fopen_trypath(struct input_file *i, const char *filename) 145 { 146 FILE *f; 147 148 f = fopen(filename, "r"); 149 if (f != NULL) { 150 set_input(i, f, filename); 151 return i; 152 } 153 if (filename[0] == '/') 154 return NULL; 155 156 ensure_m4path(); 157 158 return dopath(i, filename); 159 } 160 161 void 162 doindir(const char *argv[], int argc) 163 { 164 ndptr p; 165 166 p = lookup(argv[2]); 167 if (p == NULL) 168 errx(1, "undefined macro %s", argv[2]); 169 argv[1] = p->defn; 170 eval(argv+1, argc-1, p->type); 171 } 172 173 void 174 dobuiltin(const char *argv[], int argc) 175 { 176 int n; 177 argv[1] = NULL; 178 n = builtin_type(argv[2]); 179 if (n != -1) 180 eval(argv+1, argc-1, n); 181 else 182 errx(1, "unknown builtin %s", argv[2]); 183 } 184 185 186 /* We need some temporary buffer space, as pb pushes BACK and substitution 187 * proceeds forward... */ 188 static char *buffer; 189 static size_t bufsize = 0; 190 static size_t current = 0; 191 192 static void addchars(const char *, size_t); 193 static void addchar(int); 194 static char *twiddle(const char *); 195 static char *getstring(void); 196 static void exit_regerror(int, regex_t *); 197 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 198 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 199 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 200 static void add_sub(size_t, const char *, regex_t *, regmatch_t *); 201 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 202 #define addconstantstring(s) addchars((s), sizeof(s)-1) 203 204 static void 205 addchars(const char *c, size_t n) 206 { 207 if (n == 0) 208 return; 209 while (current + n > bufsize) { 210 if (bufsize == 0) 211 bufsize = 1024; 212 else 213 bufsize *= 2; 214 buffer = realloc(buffer, bufsize); 215 if (buffer == NULL) 216 errx(1, "out of memory"); 217 } 218 memcpy(buffer+current, c, n); 219 current += n; 220 } 221 222 static void 223 addchar(int c) 224 { 225 if (current +1 > bufsize) { 226 if (bufsize == 0) 227 bufsize = 1024; 228 else 229 bufsize *= 2; 230 buffer = realloc(buffer, bufsize); 231 if (buffer == NULL) 232 errx(1, "out of memory"); 233 } 234 buffer[current++] = c; 235 } 236 237 static char * 238 getstring(void) 239 { 240 addchar('\0'); 241 current = 0; 242 return buffer; 243 } 244 245 246 static void 247 exit_regerror(int er, regex_t *re) 248 { 249 size_t errlen; 250 char *errbuf; 251 252 errlen = regerror(er, re, NULL, 0); 253 errbuf = xalloc(errlen); 254 regerror(er, re, errbuf, errlen); 255 errx(1, "regular expression error: %s", errbuf); 256 } 257 258 static void 259 add_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm) 260 { 261 if (n > re->re_nsub) 262 warnx("No subexpression %zu", n); 263 /* Subexpressions that did not match are 264 * not an error. */ 265 else if (pm[n].rm_so != -1 && 266 pm[n].rm_eo != -1) { 267 addchars(string + pm[n].rm_so, 268 pm[n].rm_eo - pm[n].rm_so); 269 } 270 } 271 272 /* Add replacement string to the output buffer, recognizing special 273 * constructs and replacing them with substrings of the original string. 274 */ 275 static void 276 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 277 { 278 const char *p; 279 280 for (p = replace; *p != '\0'; p++) { 281 if (*p == '&' && !mimic_gnu) { 282 add_sub(0, string, re, pm); 283 continue; 284 } 285 if (*p == '\\') { 286 if (p[1] == '\\') { 287 addchar(p[1]); 288 p++; 289 continue; 290 } 291 if (p[1] == '&') { 292 if (mimic_gnu) 293 add_sub(0, string, re, pm); 294 else 295 addchar(p[1]); 296 p++; 297 continue; 298 } 299 if (isdigit(p[1])) { 300 add_sub(*(++p) - '0', string, re, pm); 301 continue; 302 } 303 } 304 addchar(*p); 305 } 306 } 307 308 static void 309 do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 310 { 311 int error; 312 int flags = 0; 313 const char *last_match = NULL; 314 315 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 316 if (pm[0].rm_eo != 0) { 317 if (string[pm[0].rm_eo-1] == '\n') 318 flags = 0; 319 else 320 flags = REG_NOTBOL; 321 } 322 323 /* NULL length matches are special... We use the `vi-mode' 324 * rule: don't allow a NULL-match at the last match 325 * position. 326 */ 327 if (pm[0].rm_so == pm[0].rm_eo && 328 string + pm[0].rm_so == last_match) { 329 if (*string == '\0') 330 return; 331 addchar(*string); 332 if (*string++ == '\n') 333 flags = 0; 334 else 335 flags = REG_NOTBOL; 336 continue; 337 } 338 last_match = string + pm[0].rm_so; 339 addchars(string, pm[0].rm_so); 340 add_replace(string, re, replace, pm); 341 string += pm[0].rm_eo; 342 } 343 if (error != REG_NOMATCH) 344 exit_regerror(error, re); 345 pbstr(string); 346 } 347 348 static void 349 do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 350 { 351 int error; 352 353 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 354 case 0: 355 add_replace(string, re, replace, pm); 356 pbstr(getstring()); 357 break; 358 case REG_NOMATCH: 359 break; 360 default: 361 exit_regerror(error, re); 362 } 363 } 364 365 static void 366 do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 367 { 368 int error; 369 370 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 371 case 0: 372 pbunsigned(pm[0].rm_so); 373 break; 374 case REG_NOMATCH: 375 pbnum(-1); 376 break; 377 default: 378 exit_regerror(error, re); 379 } 380 } 381 382 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 383 * says. So we twiddle with the regexp before passing it to regcomp. 384 */ 385 static char * 386 twiddle(const char *p) 387 { 388 /* This could use strcspn for speed... */ 389 while (*p != '\0') { 390 if (*p == '\\') { 391 switch(p[1]) { 392 case '(': 393 case ')': 394 case '|': 395 addchar(p[1]); 396 break; 397 case 'w': 398 addconstantstring("[_a-zA-Z0-9]"); 399 break; 400 case 'W': 401 addconstantstring("[^_a-zA-Z0-9]"); 402 break; 403 case '<': 404 addconstantstring("[[:<:]]"); 405 break; 406 case '>': 407 addconstantstring("[[:>:]]"); 408 break; 409 default: 410 addchars(p, 2); 411 break; 412 } 413 p+=2; 414 continue; 415 } 416 if (*p == '(' || *p == ')' || *p == '|') 417 addchar('\\'); 418 419 addchar(*p); 420 p++; 421 } 422 return getstring(); 423 } 424 425 /* patsubst(string, regexp, opt replacement) */ 426 /* argv[2]: string 427 * argv[3]: regexp 428 * argv[4]: opt rep 429 */ 430 void 431 dopatsubst(const char *argv[], int argc) 432 { 433 int error; 434 regex_t re; 435 regmatch_t *pmatch; 436 437 if (argc <= 3) { 438 warnx("Too few arguments to patsubst"); 439 return; 440 } 441 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 442 REG_NEWLINE | REG_EXTENDED); 443 if (error != 0) 444 exit_regerror(error, &re); 445 446 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 447 do_subst(argv[2], &re, 448 argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch); 449 pbstr(getstring()); 450 free(pmatch); 451 regfree(&re); 452 } 453 454 void 455 doregexp(const char *argv[], int argc) 456 { 457 int error; 458 regex_t re; 459 regmatch_t *pmatch; 460 461 if (argc <= 3) { 462 warnx("Too few arguments to regexp"); 463 return; 464 } 465 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 466 REG_EXTENDED); 467 if (error != 0) 468 exit_regerror(error, &re); 469 470 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 471 if (argv[4] == NULL || argc == 4) 472 do_regexpindex(argv[2], &re, pmatch); 473 else 474 do_regexp(argv[2], &re, argv[4], pmatch); 475 free(pmatch); 476 regfree(&re); 477 } 478 479 void 480 doesyscmd(const char *cmd) 481 { 482 int p[2]; 483 pid_t pid, cpid; 484 int cc; 485 int status; 486 487 /* Follow gnu m4 documentation: first flush buffers. */ 488 fflush(NULL); 489 490 /* Just set up standard output, share stderr and stdin with m4 */ 491 if (pipe(p) == -1) 492 err(1, "bad pipe"); 493 switch(cpid = fork()) { 494 case -1: 495 err(1, "bad fork"); 496 /* NOTREACHED */ 497 case 0: 498 (void) close(p[0]); 499 (void) dup2(p[1], 1); 500 (void) close(p[1]); 501 execl(_PATH_BSHELL, "sh", "-c", cmd, NULL); 502 exit(1); 503 default: 504 /* Read result in two stages, since m4's buffer is 505 * pushback-only. */ 506 (void) close(p[1]); 507 do { 508 char result[BUFSIZE]; 509 cc = read(p[0], result, sizeof result); 510 if (cc > 0) 511 addchars(result, cc); 512 } while (cc > 0 || (cc == -1 && errno == EINTR)); 513 514 (void) close(p[0]); 515 while ((pid = wait(&status)) != cpid && pid >= 0) 516 continue; 517 pbstr(getstring()); 518 } 519 } 520