1 /* $Vendor-Id: roff.c,v 1.120 2011/01/03 23:24:16 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <errno.h> 24 #include <ctype.h> 25 #include <limits.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <stdio.h> 29 30 #include "mandoc.h" 31 #include "roff.h" 32 #include "libroff.h" 33 #include "libmandoc.h" 34 35 #define RSTACK_MAX 128 36 37 #define ROFF_CTL(c) \ 38 ('.' == (c) || '\'' == (c)) 39 40 enum rofft { 41 ROFF_ad, 42 ROFF_am, 43 ROFF_ami, 44 ROFF_am1, 45 ROFF_de, 46 ROFF_dei, 47 ROFF_de1, 48 ROFF_ds, 49 ROFF_el, 50 ROFF_hy, 51 ROFF_ie, 52 ROFF_if, 53 ROFF_ig, 54 ROFF_ne, 55 ROFF_nh, 56 ROFF_nr, 57 ROFF_rm, 58 ROFF_so, 59 ROFF_tr, 60 ROFF_TS, 61 ROFF_TE, 62 ROFF_T_, 63 ROFF_cblock, 64 ROFF_ccond, /* FIXME: remove this. */ 65 ROFF_USERDEF, 66 ROFF_MAX 67 }; 68 69 enum roffrule { 70 ROFFRULE_ALLOW, 71 ROFFRULE_DENY 72 }; 73 74 struct roffstr { 75 char *name; /* key of symbol */ 76 char *string; /* current value */ 77 struct roffstr *next; /* next in list */ 78 }; 79 80 struct roff { 81 struct roffnode *last; /* leaf of stack */ 82 mandocmsg msg; /* err/warn/fatal messages */ 83 void *data; /* privdata for messages */ 84 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 85 int rstackpos; /* position in rstack */ 86 struct regset *regs; /* read/writable registers */ 87 struct roffstr *first_string; /* user-defined strings & macros */ 88 const char *current_string; /* value of last called user macro */ 89 struct tbl_node *first_tbl; /* first table parsed */ 90 struct tbl_node *last_tbl; /* last table parsed */ 91 struct tbl_node *tbl; /* current table being parsed */ 92 }; 93 94 struct roffnode { 95 enum rofft tok; /* type of node */ 96 struct roffnode *parent; /* up one in stack */ 97 int line; /* parse line */ 98 int col; /* parse col */ 99 char *name; /* node name, e.g. macro name */ 100 char *end; /* end-rules: custom token */ 101 int endspan; /* end-rules: next-line or infty */ 102 enum roffrule rule; /* current evaluation rule */ 103 }; 104 105 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 106 enum rofft tok, /* tok of macro */ \ 107 char **bufp, /* input buffer */ \ 108 size_t *szp, /* size of input buffer */ \ 109 int ln, /* parse line */ \ 110 int ppos, /* original pos in buffer */ \ 111 int pos, /* current pos in buffer */ \ 112 int *offs /* reset offset of buffer data */ 113 114 typedef enum rofferr (*roffproc)(ROFF_ARGS); 115 116 struct roffmac { 117 const char *name; /* macro name */ 118 roffproc proc; /* process new macro */ 119 roffproc text; /* process as child text of macro */ 120 roffproc sub; /* process as child of macro */ 121 int flags; 122 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 123 struct roffmac *next; 124 }; 125 126 static enum rofferr roff_block(ROFF_ARGS); 127 static enum rofferr roff_block_text(ROFF_ARGS); 128 static enum rofferr roff_block_sub(ROFF_ARGS); 129 static enum rofferr roff_cblock(ROFF_ARGS); 130 static enum rofferr roff_ccond(ROFF_ARGS); 131 static enum rofferr roff_cond(ROFF_ARGS); 132 static enum rofferr roff_cond_text(ROFF_ARGS); 133 static enum rofferr roff_cond_sub(ROFF_ARGS); 134 static enum rofferr roff_ds(ROFF_ARGS); 135 static enum roffrule roff_evalcond(const char *, int *); 136 static void roff_freestr(struct roff *); 137 static const char *roff_getstrn(const struct roff *, 138 const char *, size_t); 139 static enum rofferr roff_line_ignore(ROFF_ARGS); 140 static enum rofferr roff_line_error(ROFF_ARGS); 141 static enum rofferr roff_nr(ROFF_ARGS); 142 static int roff_res(struct roff *, 143 char **, size_t *, int); 144 static void roff_setstr(struct roff *, 145 const char *, const char *, int); 146 static enum rofferr roff_so(ROFF_ARGS); 147 static enum rofferr roff_TE(ROFF_ARGS); 148 static enum rofferr roff_TS(ROFF_ARGS); 149 static enum rofferr roff_T_(ROFF_ARGS); 150 static enum rofferr roff_userdef(ROFF_ARGS); 151 152 /* See roff_hash_find() */ 153 154 #define ASCII_HI 126 155 #define ASCII_LO 33 156 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 157 158 static struct roffmac *hash[HASHWIDTH]; 159 160 static struct roffmac roffs[ROFF_MAX] = { 161 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 162 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 163 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 164 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 165 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 166 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 167 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 168 { "ds", roff_ds, NULL, NULL, 0, NULL }, 169 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 170 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 171 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 172 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 173 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 174 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 175 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 176 { "nr", roff_nr, NULL, NULL, 0, NULL }, 177 { "rm", roff_line_error, NULL, NULL, 0, NULL }, 178 { "so", roff_so, NULL, NULL, 0, NULL }, 179 { "tr", roff_line_ignore, NULL, NULL, 0, NULL }, 180 { "TS", roff_TS, NULL, NULL, 0, NULL }, 181 { "TE", roff_TE, NULL, NULL, 0, NULL }, 182 { "T&", roff_T_, NULL, NULL, 0, NULL }, 183 { ".", roff_cblock, NULL, NULL, 0, NULL }, 184 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 185 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 186 }; 187 188 static void roff_free1(struct roff *); 189 static enum rofft roff_hash_find(const char *, size_t); 190 static void roff_hash_init(void); 191 static void roffnode_cleanscope(struct roff *); 192 static void roffnode_push(struct roff *, enum rofft, 193 const char *, int, int); 194 static void roffnode_pop(struct roff *); 195 static enum rofft roff_parse(struct roff *, const char *, int *); 196 static int roff_parse_nat(const char *, unsigned int *); 197 198 /* See roff_hash_find() */ 199 #define ROFF_HASH(p) (p[0] - ASCII_LO) 200 201 static void 202 roff_hash_init(void) 203 { 204 struct roffmac *n; 205 int buc, i; 206 207 for (i = 0; i < (int)ROFF_USERDEF; i++) { 208 assert(roffs[i].name[0] >= ASCII_LO); 209 assert(roffs[i].name[0] <= ASCII_HI); 210 211 buc = ROFF_HASH(roffs[i].name); 212 213 if (NULL != (n = hash[buc])) { 214 for ( ; n->next; n = n->next) 215 /* Do nothing. */ ; 216 n->next = &roffs[i]; 217 } else 218 hash[buc] = &roffs[i]; 219 } 220 } 221 222 223 /* 224 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 225 * the nil-terminated string name could be found. 226 */ 227 static enum rofft 228 roff_hash_find(const char *p, size_t s) 229 { 230 int buc; 231 struct roffmac *n; 232 233 /* 234 * libroff has an extremely simple hashtable, for the time 235 * being, which simply keys on the first character, which must 236 * be printable, then walks a chain. It works well enough until 237 * optimised. 238 */ 239 240 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 241 return(ROFF_MAX); 242 243 buc = ROFF_HASH(p); 244 245 if (NULL == (n = hash[buc])) 246 return(ROFF_MAX); 247 for ( ; n; n = n->next) 248 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 249 return((enum rofft)(n - roffs)); 250 251 return(ROFF_MAX); 252 } 253 254 255 /* 256 * Pop the current node off of the stack of roff instructions currently 257 * pending. 258 */ 259 static void 260 roffnode_pop(struct roff *r) 261 { 262 struct roffnode *p; 263 264 assert(r->last); 265 p = r->last; 266 267 if (ROFF_el == p->tok) 268 if (r->rstackpos > -1) 269 r->rstackpos--; 270 271 r->last = r->last->parent; 272 free(p->name); 273 free(p->end); 274 free(p); 275 } 276 277 278 /* 279 * Push a roff node onto the instruction stack. This must later be 280 * removed with roffnode_pop(). 281 */ 282 static void 283 roffnode_push(struct roff *r, enum rofft tok, const char *name, 284 int line, int col) 285 { 286 struct roffnode *p; 287 288 p = mandoc_calloc(1, sizeof(struct roffnode)); 289 p->tok = tok; 290 if (name) 291 p->name = mandoc_strdup(name); 292 p->parent = r->last; 293 p->line = line; 294 p->col = col; 295 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 296 297 r->last = p; 298 } 299 300 301 static void 302 roff_free1(struct roff *r) 303 { 304 struct tbl_node *t; 305 306 while (r->first_tbl) { 307 t = r->first_tbl; 308 r->first_tbl = t->next; 309 tbl_free(t); 310 } 311 312 r->first_tbl = r->last_tbl = r->tbl = NULL; 313 314 while (r->last) 315 roffnode_pop(r); 316 317 roff_freestr(r); 318 } 319 320 321 void 322 roff_reset(struct roff *r) 323 { 324 325 roff_free1(r); 326 } 327 328 329 void 330 roff_free(struct roff *r) 331 { 332 333 roff_free1(r); 334 free(r); 335 } 336 337 338 struct roff * 339 roff_alloc(struct regset *regs, void *data, const mandocmsg msg) 340 { 341 struct roff *r; 342 343 r = mandoc_calloc(1, sizeof(struct roff)); 344 r->regs = regs; 345 r->msg = msg; 346 r->data = data; 347 r->rstackpos = -1; 348 349 roff_hash_init(); 350 return(r); 351 } 352 353 354 /* 355 * Pre-filter each and every line for reserved words (one beginning with 356 * `\*', e.g., `\*(ab'). These must be handled before the actual line 357 * is processed. 358 */ 359 static int 360 roff_res(struct roff *r, char **bufp, size_t *szp, int pos) 361 { 362 const char *stesc; /* start of an escape sequence ('\\') */ 363 const char *stnam; /* start of the name, after "[(*" */ 364 const char *cp; /* end of the name, e.g. before ']' */ 365 const char *res; /* the string to be substituted */ 366 int i, maxl; 367 size_t nsz; 368 char *n; 369 370 /* Search for a leading backslash and save a pointer to it. */ 371 372 cp = *bufp + pos; 373 while (NULL != (cp = strchr(cp, '\\'))) { 374 stesc = cp++; 375 376 /* 377 * The second character must be an asterisk. 378 * If it isn't, skip it anyway: It is escaped, 379 * so it can't start another escape sequence. 380 */ 381 382 if ('\0' == *cp) 383 return(1); 384 if ('*' != *cp++) 385 continue; 386 387 /* 388 * The third character decides the length 389 * of the name of the string. 390 * Save a pointer to the name. 391 */ 392 393 switch (*cp) { 394 case ('\0'): 395 return(1); 396 case ('('): 397 cp++; 398 maxl = 2; 399 break; 400 case ('['): 401 cp++; 402 maxl = 0; 403 break; 404 default: 405 maxl = 1; 406 break; 407 } 408 stnam = cp; 409 410 /* Advance to the end of the name. */ 411 412 for (i = 0; 0 == maxl || i < maxl; i++, cp++) { 413 if ('\0' == *cp) 414 return(1); /* Error. */ 415 if (0 == maxl && ']' == *cp) 416 break; 417 } 418 419 /* 420 * Retrieve the replacement string; if it is 421 * undefined, resume searching for escapes. 422 */ 423 424 res = roff_getstrn(r, stnam, (size_t)i); 425 426 if (NULL == res) { 427 cp -= maxl ? 1 : 0; 428 continue; 429 } 430 431 /* Replace the escape sequence by the string. */ 432 433 nsz = *szp + strlen(res) + 1; 434 n = mandoc_malloc(nsz); 435 436 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); 437 strlcat(n, res, nsz); 438 strlcat(n, cp + (maxl ? 0 : 1), nsz); 439 440 free(*bufp); 441 442 *bufp = n; 443 *szp = nsz; 444 return(0); 445 } 446 447 return(1); 448 } 449 450 451 enum rofferr 452 roff_parseln(struct roff *r, int ln, char **bufp, 453 size_t *szp, int pos, int *offs) 454 { 455 enum rofft t; 456 enum rofferr e; 457 int ppos; 458 459 /* 460 * Run the reserved-word filter only if we have some reserved 461 * words to fill in. 462 */ 463 464 if (r->first_string && ! roff_res(r, bufp, szp, pos)) 465 return(ROFF_REPARSE); 466 467 /* 468 * First, if a scope is open and we're not a macro, pass the 469 * text through the macro's filter. If a scope isn't open and 470 * we're not a macro, just let it through. 471 */ 472 473 if (r->last && ! ROFF_CTL((*bufp)[pos])) { 474 t = r->last->tok; 475 assert(roffs[t].text); 476 e = (*roffs[t].text) 477 (r, t, bufp, szp, ln, pos, pos, offs); 478 assert(ROFF_IGN == e || ROFF_CONT == e); 479 if (ROFF_CONT == e && r->tbl) 480 return(tbl_read(r->tbl, ln, *bufp, *offs)); 481 return(e); 482 } else if ( ! ROFF_CTL((*bufp)[pos])) { 483 if (r->tbl) 484 return(tbl_read(r->tbl, ln, *bufp, *offs)); 485 return(ROFF_CONT); 486 } 487 488 /* 489 * If a scope is open, go to the child handler for that macro, 490 * as it may want to preprocess before doing anything with it. 491 */ 492 493 if (r->last) { 494 t = r->last->tok; 495 assert(roffs[t].sub); 496 return((*roffs[t].sub) 497 (r, t, bufp, szp, 498 ln, pos, pos, offs)); 499 } 500 501 /* 502 * Lastly, as we've no scope open, try to look up and execute 503 * the new macro. If no macro is found, simply return and let 504 * the compilers handle it. 505 */ 506 507 ppos = pos; 508 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 509 return(ROFF_CONT); 510 511 assert(roffs[t].proc); 512 return((*roffs[t].proc) 513 (r, t, bufp, szp, 514 ln, ppos, pos, offs)); 515 } 516 517 518 void 519 roff_endparse(struct roff *r) 520 { 521 522 if (r->last) 523 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data, 524 r->last->line, r->last->col, NULL); 525 526 if (r->tbl) { 527 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data, 528 r->tbl->line, r->tbl->pos, NULL); 529 tbl_end(r->tbl); 530 r->tbl = NULL; 531 } 532 } 533 534 535 /* 536 * Parse a roff node's type from the input buffer. This must be in the 537 * form of ".foo xxx" in the usual way. 538 */ 539 static enum rofft 540 roff_parse(struct roff *r, const char *buf, int *pos) 541 { 542 const char *mac; 543 size_t maclen; 544 enum rofft t; 545 546 assert(ROFF_CTL(buf[*pos])); 547 (*pos)++; 548 549 while (' ' == buf[*pos] || '\t' == buf[*pos]) 550 (*pos)++; 551 552 if ('\0' == buf[*pos]) 553 return(ROFF_MAX); 554 555 mac = buf + *pos; 556 maclen = strcspn(mac, " \\\t\0"); 557 558 t = (r->current_string = roff_getstrn(r, mac, maclen)) 559 ? ROFF_USERDEF : roff_hash_find(mac, maclen); 560 561 *pos += maclen; 562 while (buf[*pos] && ' ' == buf[*pos]) 563 (*pos)++; 564 565 return(t); 566 } 567 568 569 static int 570 roff_parse_nat(const char *buf, unsigned int *res) 571 { 572 char *ep; 573 long lval; 574 575 errno = 0; 576 lval = strtol(buf, &ep, 10); 577 if (buf[0] == '\0' || *ep != '\0') 578 return(0); 579 if ((errno == ERANGE && 580 (lval == LONG_MAX || lval == LONG_MIN)) || 581 (lval > INT_MAX || lval < 0)) 582 return(0); 583 584 *res = (unsigned int)lval; 585 return(1); 586 } 587 588 589 /* ARGSUSED */ 590 static enum rofferr 591 roff_cblock(ROFF_ARGS) 592 { 593 594 /* 595 * A block-close `..' should only be invoked as a child of an 596 * ignore macro, otherwise raise a warning and just ignore it. 597 */ 598 599 if (NULL == r->last) { 600 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 601 return(ROFF_IGN); 602 } 603 604 switch (r->last->tok) { 605 case (ROFF_am): 606 /* FALLTHROUGH */ 607 case (ROFF_ami): 608 /* FALLTHROUGH */ 609 case (ROFF_am1): 610 /* FALLTHROUGH */ 611 case (ROFF_de): 612 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 613 /* FALLTHROUGH */ 614 case (ROFF_dei): 615 /* FALLTHROUGH */ 616 case (ROFF_ig): 617 break; 618 default: 619 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 620 return(ROFF_IGN); 621 } 622 623 if ((*bufp)[pos]) 624 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL); 625 626 roffnode_pop(r); 627 roffnode_cleanscope(r); 628 return(ROFF_IGN); 629 630 } 631 632 633 static void 634 roffnode_cleanscope(struct roff *r) 635 { 636 637 while (r->last) { 638 if (--r->last->endspan < 0) 639 break; 640 roffnode_pop(r); 641 } 642 } 643 644 645 /* ARGSUSED */ 646 static enum rofferr 647 roff_ccond(ROFF_ARGS) 648 { 649 650 if (NULL == r->last) { 651 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 652 return(ROFF_IGN); 653 } 654 655 switch (r->last->tok) { 656 case (ROFF_el): 657 /* FALLTHROUGH */ 658 case (ROFF_ie): 659 /* FALLTHROUGH */ 660 case (ROFF_if): 661 break; 662 default: 663 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 664 return(ROFF_IGN); 665 } 666 667 if (r->last->endspan > -1) { 668 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 669 return(ROFF_IGN); 670 } 671 672 if ((*bufp)[pos]) 673 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL); 674 675 roffnode_pop(r); 676 roffnode_cleanscope(r); 677 return(ROFF_IGN); 678 } 679 680 681 /* ARGSUSED */ 682 static enum rofferr 683 roff_block(ROFF_ARGS) 684 { 685 int sv; 686 size_t sz; 687 char *name; 688 689 name = NULL; 690 691 if (ROFF_ig != tok) { 692 if ('\0' == (*bufp)[pos]) { 693 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL); 694 return(ROFF_IGN); 695 } 696 697 /* 698 * Re-write `de1', since we don't really care about 699 * groff's strange compatibility mode, into `de'. 700 */ 701 702 if (ROFF_de1 == tok) 703 tok = ROFF_de; 704 if (ROFF_de == tok) 705 name = *bufp + pos; 706 else 707 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, 708 roffs[tok].name); 709 710 while ((*bufp)[pos] && ' ' != (*bufp)[pos]) 711 pos++; 712 713 while (' ' == (*bufp)[pos]) 714 (*bufp)[pos++] = '\0'; 715 } 716 717 roffnode_push(r, tok, name, ln, ppos); 718 719 /* 720 * At the beginning of a `de' macro, clear the existing string 721 * with the same name, if there is one. New content will be 722 * added from roff_block_text() in multiline mode. 723 */ 724 725 if (ROFF_de == tok) 726 roff_setstr(r, name, "", 0); 727 728 if ('\0' == (*bufp)[pos]) 729 return(ROFF_IGN); 730 731 /* If present, process the custom end-of-line marker. */ 732 733 sv = pos; 734 while ((*bufp)[pos] && 735 ' ' != (*bufp)[pos] && 736 '\t' != (*bufp)[pos]) 737 pos++; 738 739 /* 740 * Note: groff does NOT like escape characters in the input. 741 * Instead of detecting this, we're just going to let it fly and 742 * to hell with it. 743 */ 744 745 assert(pos > sv); 746 sz = (size_t)(pos - sv); 747 748 if (1 == sz && '.' == (*bufp)[sv]) 749 return(ROFF_IGN); 750 751 r->last->end = mandoc_malloc(sz + 1); 752 753 memcpy(r->last->end, *bufp + sv, sz); 754 r->last->end[(int)sz] = '\0'; 755 756 if ((*bufp)[pos]) 757 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL); 758 759 return(ROFF_IGN); 760 } 761 762 763 /* ARGSUSED */ 764 static enum rofferr 765 roff_block_sub(ROFF_ARGS) 766 { 767 enum rofft t; 768 int i, j; 769 770 /* 771 * First check whether a custom macro exists at this level. If 772 * it does, then check against it. This is some of groff's 773 * stranger behaviours. If we encountered a custom end-scope 774 * tag and that tag also happens to be a "real" macro, then we 775 * need to try interpreting it again as a real macro. If it's 776 * not, then return ignore. Else continue. 777 */ 778 779 if (r->last->end) { 780 i = pos + 1; 781 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 782 i++; 783 784 for (j = 0; r->last->end[j]; j++, i++) 785 if ((*bufp)[i] != r->last->end[j]) 786 break; 787 788 if ('\0' == r->last->end[j] && 789 ('\0' == (*bufp)[i] || 790 ' ' == (*bufp)[i] || 791 '\t' == (*bufp)[i])) { 792 roffnode_pop(r); 793 roffnode_cleanscope(r); 794 795 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 796 return(ROFF_RERUN); 797 return(ROFF_IGN); 798 } 799 } 800 801 /* 802 * If we have no custom end-query or lookup failed, then try 803 * pulling it out of the hashtable. 804 */ 805 806 ppos = pos; 807 t = roff_parse(r, *bufp, &pos); 808 809 /* 810 * Macros other than block-end are only significant 811 * in `de' blocks; elsewhere, simply throw them away. 812 */ 813 if (ROFF_cblock != t) { 814 if (ROFF_de == tok) 815 roff_setstr(r, r->last->name, *bufp + ppos, 1); 816 return(ROFF_IGN); 817 } 818 819 assert(roffs[t].proc); 820 return((*roffs[t].proc)(r, t, bufp, szp, 821 ln, ppos, pos, offs)); 822 } 823 824 825 /* ARGSUSED */ 826 static enum rofferr 827 roff_block_text(ROFF_ARGS) 828 { 829 830 if (ROFF_de == tok) 831 roff_setstr(r, r->last->name, *bufp + pos, 1); 832 833 return(ROFF_IGN); 834 } 835 836 837 /* ARGSUSED */ 838 static enum rofferr 839 roff_cond_sub(ROFF_ARGS) 840 { 841 enum rofft t; 842 enum roffrule rr; 843 844 ppos = pos; 845 rr = r->last->rule; 846 847 /* 848 * Clean out scope. If we've closed ourselves, then don't 849 * continue. 850 */ 851 852 roffnode_cleanscope(r); 853 854 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { 855 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1]) 856 return(roff_ccond 857 (r, ROFF_ccond, bufp, szp, 858 ln, pos, pos + 2, offs)); 859 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 860 } 861 862 /* 863 * A denied conditional must evaluate its children if and only 864 * if they're either structurally required (such as loops and 865 * conditionals) or a closing macro. 866 */ 867 if (ROFFRULE_DENY == rr) 868 if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) 869 if (ROFF_ccond != t) 870 return(ROFF_IGN); 871 872 assert(roffs[t].proc); 873 return((*roffs[t].proc)(r, t, bufp, szp, 874 ln, ppos, pos, offs)); 875 } 876 877 878 /* ARGSUSED */ 879 static enum rofferr 880 roff_cond_text(ROFF_ARGS) 881 { 882 char *ep, *st; 883 enum roffrule rr; 884 885 rr = r->last->rule; 886 887 /* 888 * We display the value of the text if out current evaluation 889 * scope permits us to do so. 890 */ 891 892 /* FIXME: use roff_ccond? */ 893 894 st = &(*bufp)[pos]; 895 if (NULL == (ep = strstr(st, "\\}"))) { 896 roffnode_cleanscope(r); 897 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 898 } 899 900 if (ep == st || (ep > st && '\\' != *(ep - 1))) 901 roffnode_pop(r); 902 903 roffnode_cleanscope(r); 904 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 905 } 906 907 908 static enum roffrule 909 roff_evalcond(const char *v, int *pos) 910 { 911 912 switch (v[*pos]) { 913 case ('n'): 914 (*pos)++; 915 return(ROFFRULE_ALLOW); 916 case ('e'): 917 /* FALLTHROUGH */ 918 case ('o'): 919 /* FALLTHROUGH */ 920 case ('t'): 921 (*pos)++; 922 return(ROFFRULE_DENY); 923 default: 924 break; 925 } 926 927 while (v[*pos] && ' ' != v[*pos]) 928 (*pos)++; 929 return(ROFFRULE_DENY); 930 } 931 932 /* ARGSUSED */ 933 static enum rofferr 934 roff_line_ignore(ROFF_ARGS) 935 { 936 937 return(ROFF_IGN); 938 } 939 940 /* ARGSUSED */ 941 static enum rofferr 942 roff_line_error(ROFF_ARGS) 943 { 944 945 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name); 946 return(ROFF_IGN); 947 } 948 949 /* ARGSUSED */ 950 static enum rofferr 951 roff_cond(ROFF_ARGS) 952 { 953 int sv; 954 enum roffrule rule; 955 956 /* Stack overflow! */ 957 958 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) { 959 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL); 960 return(ROFF_ERR); 961 } 962 963 /* First, evaluate the conditional. */ 964 965 if (ROFF_el == tok) { 966 /* 967 * An `.el' will get the value of the current rstack 968 * entry set in prior `ie' calls or defaults to DENY. 969 */ 970 if (r->rstackpos < 0) 971 rule = ROFFRULE_DENY; 972 else 973 rule = r->rstack[r->rstackpos]; 974 } else 975 rule = roff_evalcond(*bufp, &pos); 976 977 sv = pos; 978 979 while (' ' == (*bufp)[pos]) 980 pos++; 981 982 /* 983 * Roff is weird. If we have just white-space after the 984 * conditional, it's considered the BODY and we exit without 985 * really doing anything. Warn about this. It's probably 986 * wrong. 987 */ 988 989 if ('\0' == (*bufp)[pos] && sv != pos) { 990 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL); 991 return(ROFF_IGN); 992 } 993 994 roffnode_push(r, tok, NULL, ln, ppos); 995 996 r->last->rule = rule; 997 998 if (ROFF_ie == tok) { 999 /* 1000 * An if-else will put the NEGATION of the current 1001 * evaluated conditional into the stack. 1002 */ 1003 r->rstackpos++; 1004 if (ROFFRULE_DENY == r->last->rule) 1005 r->rstack[r->rstackpos] = ROFFRULE_ALLOW; 1006 else 1007 r->rstack[r->rstackpos] = ROFFRULE_DENY; 1008 } 1009 1010 /* If the parent has false as its rule, then so do we. */ 1011 1012 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1013 r->last->rule = ROFFRULE_DENY; 1014 1015 /* 1016 * Determine scope. If we're invoked with "\{" trailing the 1017 * conditional, then we're in a multiline scope. Else our scope 1018 * expires on the next line. 1019 */ 1020 1021 r->last->endspan = 1; 1022 1023 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1024 r->last->endspan = -1; 1025 pos += 2; 1026 } 1027 1028 /* 1029 * If there are no arguments on the line, the next-line scope is 1030 * assumed. 1031 */ 1032 1033 if ('\0' == (*bufp)[pos]) 1034 return(ROFF_IGN); 1035 1036 /* Otherwise re-run the roff parser after recalculating. */ 1037 1038 *offs = pos; 1039 return(ROFF_RERUN); 1040 } 1041 1042 1043 /* ARGSUSED */ 1044 static enum rofferr 1045 roff_ds(ROFF_ARGS) 1046 { 1047 char *name, *string; 1048 1049 /* 1050 * A symbol is named by the first word following the macro 1051 * invocation up to a space. Its value is anything after the 1052 * name's trailing whitespace and optional double-quote. Thus, 1053 * 1054 * [.ds foo "bar " ] 1055 * 1056 * will have `bar " ' as its value. 1057 */ 1058 1059 name = *bufp + pos; 1060 if ('\0' == *name) 1061 return(ROFF_IGN); 1062 1063 string = name; 1064 /* Read until end of name. */ 1065 while (*string && ' ' != *string) 1066 string++; 1067 1068 /* Nil-terminate name. */ 1069 if (*string) 1070 *(string++) = '\0'; 1071 1072 /* Read past spaces. */ 1073 while (*string && ' ' == *string) 1074 string++; 1075 1076 /* Read passed initial double-quote. */ 1077 if (*string && '"' == *string) 1078 string++; 1079 1080 /* The rest is the value. */ 1081 roff_setstr(r, name, string, 0); 1082 return(ROFF_IGN); 1083 } 1084 1085 1086 /* ARGSUSED */ 1087 static enum rofferr 1088 roff_nr(ROFF_ARGS) 1089 { 1090 const char *key, *val; 1091 struct reg *rg; 1092 1093 key = &(*bufp)[pos]; 1094 rg = r->regs->regs; 1095 1096 /* Parse register request. */ 1097 while ((*bufp)[pos] && ' ' != (*bufp)[pos]) 1098 pos++; 1099 1100 /* 1101 * Set our nil terminator. Because this line is going to be 1102 * ignored anyway, we can munge it as we please. 1103 */ 1104 if ((*bufp)[pos]) 1105 (*bufp)[pos++] = '\0'; 1106 1107 /* Skip whitespace to register token. */ 1108 while ((*bufp)[pos] && ' ' == (*bufp)[pos]) 1109 pos++; 1110 1111 val = &(*bufp)[pos]; 1112 1113 /* Process register token. */ 1114 1115 if (0 == strcmp(key, "nS")) { 1116 rg[(int)REG_nS].set = 1; 1117 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u)) 1118 rg[(int)REG_nS].v.u = 0; 1119 } 1120 1121 return(ROFF_IGN); 1122 } 1123 1124 /* ARGSUSED */ 1125 static enum rofferr 1126 roff_TE(ROFF_ARGS) 1127 { 1128 1129 if (NULL == r->tbl) 1130 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 1131 else 1132 tbl_end(r->tbl); 1133 1134 r->tbl = NULL; 1135 return(ROFF_IGN); 1136 } 1137 1138 /* ARGSUSED */ 1139 static enum rofferr 1140 roff_T_(ROFF_ARGS) 1141 { 1142 1143 if (NULL == r->tbl) 1144 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL); 1145 else 1146 tbl_restart(ppos, ln, r->tbl); 1147 1148 return(ROFF_IGN); 1149 } 1150 1151 /* ARGSUSED */ 1152 static enum rofferr 1153 roff_TS(ROFF_ARGS) 1154 { 1155 struct tbl_node *t; 1156 1157 if (r->tbl) { 1158 (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL); 1159 tbl_end(r->tbl); 1160 } 1161 1162 t = tbl_alloc(ppos, ln, r->data, r->msg); 1163 1164 if (r->last_tbl) 1165 r->last_tbl->next = t; 1166 else 1167 r->first_tbl = r->last_tbl = t; 1168 1169 r->tbl = r->last_tbl = t; 1170 return(ROFF_IGN); 1171 } 1172 1173 /* ARGSUSED */ 1174 static enum rofferr 1175 roff_so(ROFF_ARGS) 1176 { 1177 char *name; 1178 1179 (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL); 1180 1181 /* 1182 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1183 * opening anything that's not in our cwd or anything beneath 1184 * it. Thus, explicitly disallow traversing up the file-system 1185 * or using absolute paths. 1186 */ 1187 1188 name = *bufp + pos; 1189 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1190 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL); 1191 return(ROFF_ERR); 1192 } 1193 1194 *offs = pos; 1195 return(ROFF_SO); 1196 } 1197 1198 /* ARGSUSED */ 1199 static enum rofferr 1200 roff_userdef(ROFF_ARGS) 1201 { 1202 const char *arg[9]; 1203 char *cp, *n1, *n2; 1204 int i; 1205 1206 /* 1207 * Collect pointers to macro argument strings 1208 * and null-terminate them. 1209 */ 1210 cp = *bufp + pos; 1211 for (i = 0; i < 9; i++) 1212 arg[i] = '\0' == *cp ? "" : 1213 mandoc_getarg(&cp, r->msg, r->data, ln, &pos); 1214 1215 /* 1216 * Expand macro arguments. 1217 */ 1218 *szp = 0; 1219 n1 = cp = mandoc_strdup(r->current_string); 1220 while (NULL != (cp = strstr(cp, "\\$"))) { 1221 i = cp[2] - '1'; 1222 if (0 > i || 8 < i) { 1223 /* Not an argument invocation. */ 1224 cp += 2; 1225 continue; 1226 } 1227 1228 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1229 n2 = mandoc_malloc(*szp); 1230 1231 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1232 strlcat(n2, arg[i], *szp); 1233 strlcat(n2, cp + 3, *szp); 1234 1235 cp = n2 + (cp - n1); 1236 free(n1); 1237 n1 = n2; 1238 } 1239 1240 /* 1241 * Replace the macro invocation 1242 * by the expanded macro. 1243 */ 1244 free(*bufp); 1245 *bufp = n1; 1246 if (0 == *szp) 1247 *szp = strlen(*bufp) + 1; 1248 1249 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1250 ROFF_REPARSE : ROFF_APPEND); 1251 } 1252 1253 /* 1254 * Store *string into the user-defined string called *name. 1255 * In multiline mode, append to an existing entry and append '\n'; 1256 * else replace the existing entry, if there is one. 1257 * To clear an existing entry, call with (*r, *name, NULL, 0). 1258 */ 1259 static void 1260 roff_setstr(struct roff *r, const char *name, const char *string, 1261 int multiline) 1262 { 1263 struct roffstr *n; 1264 char *c; 1265 size_t oldch, newch; 1266 1267 /* Search for an existing string with the same name. */ 1268 n = r->first_string; 1269 while (n && strcmp(name, n->name)) 1270 n = n->next; 1271 1272 if (NULL == n) { 1273 /* Create a new string table entry. */ 1274 n = mandoc_malloc(sizeof(struct roffstr)); 1275 n->name = mandoc_strdup(name); 1276 n->string = NULL; 1277 n->next = r->first_string; 1278 r->first_string = n; 1279 } else if (0 == multiline) { 1280 /* In multiline mode, append; else replace. */ 1281 free(n->string); 1282 n->string = NULL; 1283 } 1284 1285 if (NULL == string) 1286 return; 1287 1288 /* 1289 * One additional byte for the '\n' in multiline mode, 1290 * and one for the terminating '\0'. 1291 */ 1292 newch = strlen(string) + (multiline ? 2 : 1); 1293 if (NULL == n->string) { 1294 n->string = mandoc_malloc(newch); 1295 *n->string = '\0'; 1296 oldch = 0; 1297 } else { 1298 oldch = strlen(n->string); 1299 n->string = mandoc_realloc(n->string, oldch + newch); 1300 } 1301 1302 /* Skip existing content in the destination buffer. */ 1303 c = n->string + oldch; 1304 1305 /* Append new content to the destination buffer. */ 1306 while (*string) { 1307 /* 1308 * Rudimentary roff copy mode: 1309 * Handle escaped backslashes. 1310 */ 1311 if ('\\' == *string && '\\' == *(string + 1)) 1312 string++; 1313 *c++ = *string++; 1314 } 1315 1316 /* Append terminating bytes. */ 1317 if (multiline) 1318 *c++ = '\n'; 1319 *c = '\0'; 1320 } 1321 1322 1323 static const char * 1324 roff_getstrn(const struct roff *r, const char *name, size_t len) 1325 { 1326 const struct roffstr *n; 1327 1328 n = r->first_string; 1329 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len])) 1330 n = n->next; 1331 1332 return(n ? n->string : NULL); 1333 } 1334 1335 1336 static void 1337 roff_freestr(struct roff *r) 1338 { 1339 struct roffstr *n, *nn; 1340 1341 for (n = r->first_string; n; n = nn) { 1342 free(n->name); 1343 free(n->string); 1344 nn = n->next; 1345 free(n); 1346 } 1347 1348 r->first_string = NULL; 1349 } 1350 1351 const struct tbl_span * 1352 roff_span(const struct roff *r) 1353 { 1354 1355 return(r->tbl ? tbl_span(r->tbl) : NULL); 1356 } 1357