1 /* $Id: roff.c,v 1.142 2011/05/26 11:58:25 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "libroff.h" 29 #include "libmandoc.h" 30 31 /* Maximum number of nested if-else conditionals. */ 32 #define RSTACK_MAX 128 33 34 enum rofft { 35 ROFF_ad, 36 ROFF_am, 37 ROFF_ami, 38 ROFF_am1, 39 ROFF_de, 40 ROFF_dei, 41 ROFF_de1, 42 ROFF_ds, 43 ROFF_el, 44 ROFF_hy, 45 ROFF_ie, 46 ROFF_if, 47 ROFF_ig, 48 ROFF_it, 49 ROFF_ne, 50 ROFF_nh, 51 ROFF_nr, 52 ROFF_ns, 53 ROFF_ps, 54 ROFF_rm, 55 ROFF_so, 56 ROFF_ta, 57 ROFF_tr, 58 ROFF_TS, 59 ROFF_TE, 60 ROFF_T_, 61 ROFF_EQ, 62 ROFF_EN, 63 ROFF_cblock, 64 ROFF_ccond, 65 ROFF_USERDEF, 66 ROFF_MAX 67 }; 68 69 enum roffrule { 70 ROFFRULE_ALLOW, 71 ROFFRULE_DENY 72 }; 73 74 struct roffstr { 75 char *name; /* key of symbol */ 76 char *string; /* current value */ 77 struct roffstr *next; /* next in list */ 78 }; 79 80 struct roff { 81 struct mparse *parse; /* parse point */ 82 struct roffnode *last; /* leaf of stack */ 83 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 84 int rstackpos; /* position in rstack */ 85 struct regset *regs; /* read/writable registers */ 86 struct roffstr *first_string; /* user-defined strings & macros */ 87 const char *current_string; /* value of last called user macro */ 88 struct tbl_node *first_tbl; /* first table parsed */ 89 struct tbl_node *last_tbl; /* last table parsed */ 90 struct tbl_node *tbl; /* current table being parsed */ 91 struct eqn_node *last_eqn; /* last equation parsed */ 92 struct eqn_node *first_eqn; /* first equation parsed */ 93 struct eqn_node *eqn; /* current equation being parsed */ 94 }; 95 96 struct roffnode { 97 enum rofft tok; /* type of node */ 98 struct roffnode *parent; /* up one in stack */ 99 int line; /* parse line */ 100 int col; /* parse col */ 101 char *name; /* node name, e.g. macro name */ 102 char *end; /* end-rules: custom token */ 103 int endspan; /* end-rules: next-line or infty */ 104 enum roffrule rule; /* current evaluation rule */ 105 }; 106 107 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 108 enum rofft tok, /* tok of macro */ \ 109 char **bufp, /* input buffer */ \ 110 size_t *szp, /* size of input buffer */ \ 111 int ln, /* parse line */ \ 112 int ppos, /* original pos in buffer */ \ 113 int pos, /* current pos in buffer */ \ 114 int *offs /* reset offset of buffer data */ 115 116 typedef enum rofferr (*roffproc)(ROFF_ARGS); 117 118 struct roffmac { 119 const char *name; /* macro name */ 120 roffproc proc; /* process new macro */ 121 roffproc text; /* process as child text of macro */ 122 roffproc sub; /* process as child of macro */ 123 int flags; 124 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 125 struct roffmac *next; 126 }; 127 128 struct predef { 129 const char *name; /* predefined input name */ 130 const char *str; /* replacement symbol */ 131 }; 132 133 #define PREDEF(__name, __str) \ 134 { (__name), (__str) }, 135 136 static enum rofferr roff_block(ROFF_ARGS); 137 static enum rofferr roff_block_text(ROFF_ARGS); 138 static enum rofferr roff_block_sub(ROFF_ARGS); 139 static enum rofferr roff_cblock(ROFF_ARGS); 140 static enum rofferr roff_ccond(ROFF_ARGS); 141 static enum rofferr roff_cond(ROFF_ARGS); 142 static enum rofferr roff_cond_text(ROFF_ARGS); 143 static enum rofferr roff_cond_sub(ROFF_ARGS); 144 static enum rofferr roff_ds(ROFF_ARGS); 145 static enum roffrule roff_evalcond(const char *, int *); 146 static void roff_freestr(struct roff *); 147 static char *roff_getname(struct roff *, char **, int, int); 148 static const char *roff_getstrn(const struct roff *, 149 const char *, size_t); 150 static enum rofferr roff_line_ignore(ROFF_ARGS); 151 static enum rofferr roff_nr(ROFF_ARGS); 152 static int roff_res(struct roff *, 153 char **, size_t *, int, int); 154 static enum rofferr roff_rm(ROFF_ARGS); 155 static void roff_setstr(struct roff *, 156 const char *, const char *, int); 157 static enum rofferr roff_so(ROFF_ARGS); 158 static enum rofferr roff_TE(ROFF_ARGS); 159 static enum rofferr roff_TS(ROFF_ARGS); 160 static enum rofferr roff_EQ(ROFF_ARGS); 161 static enum rofferr roff_EN(ROFF_ARGS); 162 static enum rofferr roff_T_(ROFF_ARGS); 163 static enum rofferr roff_userdef(ROFF_ARGS); 164 165 /* See roff_hash_find() */ 166 167 #define ASCII_HI 126 168 #define ASCII_LO 33 169 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 170 171 static struct roffmac *hash[HASHWIDTH]; 172 173 static struct roffmac roffs[ROFF_MAX] = { 174 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 175 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 176 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 177 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 178 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 179 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 180 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 181 { "ds", roff_ds, NULL, NULL, 0, NULL }, 182 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 183 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 184 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 185 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 186 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 187 { "it", roff_line_ignore, NULL, NULL, 0, NULL }, 188 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 189 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 190 { "nr", roff_nr, NULL, NULL, 0, NULL }, 191 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 192 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 193 { "rm", roff_rm, NULL, NULL, 0, NULL }, 194 { "so", roff_so, NULL, NULL, 0, NULL }, 195 { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, 196 { "tr", roff_line_ignore, NULL, NULL, 0, NULL }, 197 { "TS", roff_TS, NULL, NULL, 0, NULL }, 198 { "TE", roff_TE, NULL, NULL, 0, NULL }, 199 { "T&", roff_T_, NULL, NULL, 0, NULL }, 200 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 201 { "EN", roff_EN, NULL, NULL, 0, NULL }, 202 { ".", roff_cblock, NULL, NULL, 0, NULL }, 203 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 204 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 205 }; 206 207 /* Array of injected predefined strings. */ 208 #define PREDEFS_MAX 38 209 static const struct predef predefs[PREDEFS_MAX] = { 210 #include "predefs.in" 211 }; 212 213 static void roff_free1(struct roff *); 214 static enum rofft roff_hash_find(const char *, size_t); 215 static void roff_hash_init(void); 216 static void roffnode_cleanscope(struct roff *); 217 static void roffnode_push(struct roff *, enum rofft, 218 const char *, int, int); 219 static void roffnode_pop(struct roff *); 220 static enum rofft roff_parse(struct roff *, const char *, int *); 221 222 /* See roff_hash_find() */ 223 #define ROFF_HASH(p) (p[0] - ASCII_LO) 224 225 static void 226 roff_hash_init(void) 227 { 228 struct roffmac *n; 229 int buc, i; 230 231 for (i = 0; i < (int)ROFF_USERDEF; i++) { 232 assert(roffs[i].name[0] >= ASCII_LO); 233 assert(roffs[i].name[0] <= ASCII_HI); 234 235 buc = ROFF_HASH(roffs[i].name); 236 237 if (NULL != (n = hash[buc])) { 238 for ( ; n->next; n = n->next) 239 /* Do nothing. */ ; 240 n->next = &roffs[i]; 241 } else 242 hash[buc] = &roffs[i]; 243 } 244 } 245 246 /* 247 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 248 * the nil-terminated string name could be found. 249 */ 250 static enum rofft 251 roff_hash_find(const char *p, size_t s) 252 { 253 int buc; 254 struct roffmac *n; 255 256 /* 257 * libroff has an extremely simple hashtable, for the time 258 * being, which simply keys on the first character, which must 259 * be printable, then walks a chain. It works well enough until 260 * optimised. 261 */ 262 263 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 264 return(ROFF_MAX); 265 266 buc = ROFF_HASH(p); 267 268 if (NULL == (n = hash[buc])) 269 return(ROFF_MAX); 270 for ( ; n; n = n->next) 271 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 272 return((enum rofft)(n - roffs)); 273 274 return(ROFF_MAX); 275 } 276 277 278 /* 279 * Pop the current node off of the stack of roff instructions currently 280 * pending. 281 */ 282 static void 283 roffnode_pop(struct roff *r) 284 { 285 struct roffnode *p; 286 287 assert(r->last); 288 p = r->last; 289 290 r->last = r->last->parent; 291 free(p->name); 292 free(p->end); 293 free(p); 294 } 295 296 297 /* 298 * Push a roff node onto the instruction stack. This must later be 299 * removed with roffnode_pop(). 300 */ 301 static void 302 roffnode_push(struct roff *r, enum rofft tok, const char *name, 303 int line, int col) 304 { 305 struct roffnode *p; 306 307 p = mandoc_calloc(1, sizeof(struct roffnode)); 308 p->tok = tok; 309 if (name) 310 p->name = mandoc_strdup(name); 311 p->parent = r->last; 312 p->line = line; 313 p->col = col; 314 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 315 316 r->last = p; 317 } 318 319 320 static void 321 roff_free1(struct roff *r) 322 { 323 struct tbl_node *t; 324 struct eqn_node *e; 325 326 while (NULL != (t = r->first_tbl)) { 327 r->first_tbl = t->next; 328 tbl_free(t); 329 } 330 331 r->first_tbl = r->last_tbl = r->tbl = NULL; 332 333 while (NULL != (e = r->first_eqn)) { 334 r->first_eqn = e->next; 335 eqn_free(e); 336 } 337 338 r->first_eqn = r->last_eqn = r->eqn = NULL; 339 340 while (r->last) 341 roffnode_pop(r); 342 343 roff_freestr(r); 344 } 345 346 347 void 348 roff_reset(struct roff *r) 349 { 350 351 roff_free1(r); 352 } 353 354 355 void 356 roff_free(struct roff *r) 357 { 358 359 roff_free1(r); 360 free(r); 361 } 362 363 364 struct roff * 365 roff_alloc(struct regset *regs, struct mparse *parse) 366 { 367 struct roff *r; 368 int i; 369 370 r = mandoc_calloc(1, sizeof(struct roff)); 371 r->regs = regs; 372 r->parse = parse; 373 r->rstackpos = -1; 374 375 roff_hash_init(); 376 377 for (i = 0; i < PREDEFS_MAX; i++) 378 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 379 380 return(r); 381 } 382 383 384 /* 385 * Pre-filter each and every line for reserved words (one beginning with 386 * `\*', e.g., `\*(ab'). These must be handled before the actual line 387 * is processed. 388 */ 389 static int 390 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) 391 { 392 const char *stesc; /* start of an escape sequence ('\\') */ 393 const char *stnam; /* start of the name, after "[(*" */ 394 const char *cp; /* end of the name, e.g. before ']' */ 395 const char *res; /* the string to be substituted */ 396 int i, maxl; 397 size_t nsz; 398 char *n; 399 400 /* Search for a leading backslash and save a pointer to it. */ 401 402 cp = *bufp + pos; 403 while (NULL != (cp = strchr(cp, '\\'))) { 404 stesc = cp++; 405 406 /* 407 * The second character must be an asterisk. 408 * If it isn't, skip it anyway: It is escaped, 409 * so it can't start another escape sequence. 410 */ 411 412 if ('\0' == *cp) 413 return(1); 414 if ('*' != *cp++) 415 continue; 416 417 /* 418 * The third character decides the length 419 * of the name of the string. 420 * Save a pointer to the name. 421 */ 422 423 switch (*cp) { 424 case ('\0'): 425 return(1); 426 case ('('): 427 cp++; 428 maxl = 2; 429 break; 430 case ('['): 431 cp++; 432 maxl = 0; 433 break; 434 default: 435 maxl = 1; 436 break; 437 } 438 stnam = cp; 439 440 /* Advance to the end of the name. */ 441 442 for (i = 0; 0 == maxl || i < maxl; i++, cp++) { 443 if ('\0' == *cp) 444 return(1); /* Error. */ 445 if (0 == maxl && ']' == *cp) 446 break; 447 } 448 449 /* 450 * Retrieve the replacement string; if it is 451 * undefined, resume searching for escapes. 452 */ 453 454 res = roff_getstrn(r, stnam, (size_t)i); 455 456 if (NULL == res) { 457 /* TODO: keep track of the correct position. */ 458 mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL); 459 res = ""; 460 } 461 462 /* Replace the escape sequence by the string. */ 463 464 nsz = *szp + strlen(res) + 1; 465 n = mandoc_malloc(nsz); 466 467 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); 468 strlcat(n, res, nsz); 469 strlcat(n, cp + (maxl ? 0 : 1), nsz); 470 471 free(*bufp); 472 473 *bufp = n; 474 *szp = nsz; 475 return(0); 476 } 477 478 return(1); 479 } 480 481 482 enum rofferr 483 roff_parseln(struct roff *r, int ln, char **bufp, 484 size_t *szp, int pos, int *offs) 485 { 486 enum rofft t; 487 enum rofferr e; 488 int ppos, ctl; 489 490 /* 491 * Run the reserved-word filter only if we have some reserved 492 * words to fill in. 493 */ 494 495 if (r->first_string && ! roff_res(r, bufp, szp, ln, pos)) 496 return(ROFF_REPARSE); 497 498 ppos = pos; 499 ctl = mandoc_getcontrol(*bufp, &pos); 500 501 /* 502 * First, if a scope is open and we're not a macro, pass the 503 * text through the macro's filter. If a scope isn't open and 504 * we're not a macro, just let it through. 505 * Finally, if there's an equation scope open, divert it into it 506 * no matter our state. 507 */ 508 509 if (r->last && ! ctl) { 510 t = r->last->tok; 511 assert(roffs[t].text); 512 e = (*roffs[t].text) 513 (r, t, bufp, szp, ln, pos, pos, offs); 514 assert(ROFF_IGN == e || ROFF_CONT == e); 515 if (ROFF_CONT != e) 516 return(e); 517 if (r->eqn) 518 return(eqn_read(&r->eqn, ln, *bufp, pos)); 519 if (r->tbl) 520 return(tbl_read(r->tbl, ln, *bufp, pos)); 521 return(ROFF_CONT); 522 } else if ( ! ctl) { 523 if (r->eqn) 524 return(eqn_read(&r->eqn, ln, *bufp, pos)); 525 if (r->tbl) 526 return(tbl_read(r->tbl, ln, *bufp, pos)); 527 return(ROFF_CONT); 528 } else if (r->eqn) 529 return(eqn_read(&r->eqn, ln, *bufp, ppos)); 530 531 /* 532 * If a scope is open, go to the child handler for that macro, 533 * as it may want to preprocess before doing anything with it. 534 * Don't do so if an equation is open. 535 */ 536 537 if (r->last) { 538 t = r->last->tok; 539 assert(roffs[t].sub); 540 return((*roffs[t].sub) 541 (r, t, bufp, szp, 542 ln, ppos, pos, offs)); 543 } 544 545 /* 546 * Lastly, as we've no scope open, try to look up and execute 547 * the new macro. If no macro is found, simply return and let 548 * the compilers handle it. 549 */ 550 551 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 552 return(ROFF_CONT); 553 554 assert(roffs[t].proc); 555 return((*roffs[t].proc) 556 (r, t, bufp, szp, 557 ln, ppos, pos, offs)); 558 } 559 560 561 void 562 roff_endparse(struct roff *r) 563 { 564 565 if (r->last) 566 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 567 r->last->line, r->last->col, NULL); 568 569 if (r->eqn) { 570 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 571 r->eqn->eqn.line, r->eqn->eqn.pos, NULL); 572 eqn_end(r->eqn); 573 r->eqn = NULL; 574 } 575 576 if (r->tbl) { 577 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 578 r->tbl->line, r->tbl->pos, NULL); 579 tbl_end(r->tbl); 580 r->tbl = NULL; 581 } 582 } 583 584 /* 585 * Parse a roff node's type from the input buffer. This must be in the 586 * form of ".foo xxx" in the usual way. 587 */ 588 static enum rofft 589 roff_parse(struct roff *r, const char *buf, int *pos) 590 { 591 const char *mac; 592 size_t maclen; 593 enum rofft t; 594 595 if ('\0' == buf[*pos] || '"' == buf[*pos]) 596 return(ROFF_MAX); 597 598 mac = buf + *pos; 599 maclen = strcspn(mac, " \\\t\0"); 600 601 t = (r->current_string = roff_getstrn(r, mac, maclen)) 602 ? ROFF_USERDEF : roff_hash_find(mac, maclen); 603 604 *pos += (int)maclen; 605 606 while (buf[*pos] && ' ' == buf[*pos]) 607 (*pos)++; 608 609 return(t); 610 } 611 612 /* ARGSUSED */ 613 static enum rofferr 614 roff_cblock(ROFF_ARGS) 615 { 616 617 /* 618 * A block-close `..' should only be invoked as a child of an 619 * ignore macro, otherwise raise a warning and just ignore it. 620 */ 621 622 if (NULL == r->last) { 623 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 624 return(ROFF_IGN); 625 } 626 627 switch (r->last->tok) { 628 case (ROFF_am): 629 /* FALLTHROUGH */ 630 case (ROFF_ami): 631 /* FALLTHROUGH */ 632 case (ROFF_am1): 633 /* FALLTHROUGH */ 634 case (ROFF_de): 635 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 636 /* FALLTHROUGH */ 637 case (ROFF_dei): 638 /* FALLTHROUGH */ 639 case (ROFF_ig): 640 break; 641 default: 642 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 643 return(ROFF_IGN); 644 } 645 646 if ((*bufp)[pos]) 647 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 648 649 roffnode_pop(r); 650 roffnode_cleanscope(r); 651 return(ROFF_IGN); 652 653 } 654 655 656 static void 657 roffnode_cleanscope(struct roff *r) 658 { 659 660 while (r->last) { 661 if (--r->last->endspan < 0) 662 break; 663 roffnode_pop(r); 664 } 665 } 666 667 668 /* ARGSUSED */ 669 static enum rofferr 670 roff_ccond(ROFF_ARGS) 671 { 672 673 if (NULL == r->last) { 674 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 675 return(ROFF_IGN); 676 } 677 678 switch (r->last->tok) { 679 case (ROFF_el): 680 /* FALLTHROUGH */ 681 case (ROFF_ie): 682 /* FALLTHROUGH */ 683 case (ROFF_if): 684 break; 685 default: 686 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 687 return(ROFF_IGN); 688 } 689 690 if (r->last->endspan > -1) { 691 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 692 return(ROFF_IGN); 693 } 694 695 if ((*bufp)[pos]) 696 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 697 698 roffnode_pop(r); 699 roffnode_cleanscope(r); 700 return(ROFF_IGN); 701 } 702 703 704 /* ARGSUSED */ 705 static enum rofferr 706 roff_block(ROFF_ARGS) 707 { 708 int sv; 709 size_t sz; 710 char *name; 711 712 name = NULL; 713 714 if (ROFF_ig != tok) { 715 if ('\0' == (*bufp)[pos]) { 716 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 717 return(ROFF_IGN); 718 } 719 720 /* 721 * Re-write `de1', since we don't really care about 722 * groff's strange compatibility mode, into `de'. 723 */ 724 725 if (ROFF_de1 == tok) 726 tok = ROFF_de; 727 if (ROFF_de == tok) 728 name = *bufp + pos; 729 else 730 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, 731 roffs[tok].name); 732 733 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 734 pos++; 735 736 while (isspace((unsigned char)(*bufp)[pos])) 737 (*bufp)[pos++] = '\0'; 738 } 739 740 roffnode_push(r, tok, name, ln, ppos); 741 742 /* 743 * At the beginning of a `de' macro, clear the existing string 744 * with the same name, if there is one. New content will be 745 * added from roff_block_text() in multiline mode. 746 */ 747 748 if (ROFF_de == tok) 749 roff_setstr(r, name, "", 0); 750 751 if ('\0' == (*bufp)[pos]) 752 return(ROFF_IGN); 753 754 /* If present, process the custom end-of-line marker. */ 755 756 sv = pos; 757 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 758 pos++; 759 760 /* 761 * Note: groff does NOT like escape characters in the input. 762 * Instead of detecting this, we're just going to let it fly and 763 * to hell with it. 764 */ 765 766 assert(pos > sv); 767 sz = (size_t)(pos - sv); 768 769 if (1 == sz && '.' == (*bufp)[sv]) 770 return(ROFF_IGN); 771 772 r->last->end = mandoc_malloc(sz + 1); 773 774 memcpy(r->last->end, *bufp + sv, sz); 775 r->last->end[(int)sz] = '\0'; 776 777 if ((*bufp)[pos]) 778 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 779 780 return(ROFF_IGN); 781 } 782 783 784 /* ARGSUSED */ 785 static enum rofferr 786 roff_block_sub(ROFF_ARGS) 787 { 788 enum rofft t; 789 int i, j; 790 791 /* 792 * First check whether a custom macro exists at this level. If 793 * it does, then check against it. This is some of groff's 794 * stranger behaviours. If we encountered a custom end-scope 795 * tag and that tag also happens to be a "real" macro, then we 796 * need to try interpreting it again as a real macro. If it's 797 * not, then return ignore. Else continue. 798 */ 799 800 if (r->last->end) { 801 for (i = pos, j = 0; r->last->end[j]; j++, i++) 802 if ((*bufp)[i] != r->last->end[j]) 803 break; 804 805 if ('\0' == r->last->end[j] && 806 ('\0' == (*bufp)[i] || 807 ' ' == (*bufp)[i] || 808 '\t' == (*bufp)[i])) { 809 roffnode_pop(r); 810 roffnode_cleanscope(r); 811 812 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 813 i++; 814 815 pos = i; 816 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 817 return(ROFF_RERUN); 818 return(ROFF_IGN); 819 } 820 } 821 822 /* 823 * If we have no custom end-query or lookup failed, then try 824 * pulling it out of the hashtable. 825 */ 826 827 t = roff_parse(r, *bufp, &pos); 828 829 /* 830 * Macros other than block-end are only significant 831 * in `de' blocks; elsewhere, simply throw them away. 832 */ 833 if (ROFF_cblock != t) { 834 if (ROFF_de == tok) 835 roff_setstr(r, r->last->name, *bufp + ppos, 1); 836 return(ROFF_IGN); 837 } 838 839 assert(roffs[t].proc); 840 return((*roffs[t].proc)(r, t, bufp, szp, 841 ln, ppos, pos, offs)); 842 } 843 844 845 /* ARGSUSED */ 846 static enum rofferr 847 roff_block_text(ROFF_ARGS) 848 { 849 850 if (ROFF_de == tok) 851 roff_setstr(r, r->last->name, *bufp + pos, 1); 852 853 return(ROFF_IGN); 854 } 855 856 857 /* ARGSUSED */ 858 static enum rofferr 859 roff_cond_sub(ROFF_ARGS) 860 { 861 enum rofft t; 862 enum roffrule rr; 863 char *ep; 864 865 rr = r->last->rule; 866 roffnode_cleanscope(r); 867 868 /* 869 * If the macro is unknown, first check if it contains a closing 870 * delimiter `\}'. If it does, close out our scope and return 871 * the currently-scoped rule (ignore or continue). Else, drop 872 * into the currently-scoped rule. 873 */ 874 875 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { 876 ep = &(*bufp)[pos]; 877 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 878 ep++; 879 if ('}' != *ep) 880 continue; 881 *ep = '&'; 882 roff_ccond(r, ROFF_ccond, bufp, szp, 883 ln, pos, pos + 2, offs); 884 break; 885 } 886 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 887 } 888 889 /* 890 * A denied conditional must evaluate its children if and only 891 * if they're either structurally required (such as loops and 892 * conditionals) or a closing macro. 893 */ 894 895 if (ROFFRULE_DENY == rr) 896 if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) 897 if (ROFF_ccond != t) 898 return(ROFF_IGN); 899 900 assert(roffs[t].proc); 901 return((*roffs[t].proc)(r, t, bufp, szp, 902 ln, ppos, pos, offs)); 903 } 904 905 /* ARGSUSED */ 906 static enum rofferr 907 roff_cond_text(ROFF_ARGS) 908 { 909 char *ep; 910 enum roffrule rr; 911 912 rr = r->last->rule; 913 roffnode_cleanscope(r); 914 915 ep = &(*bufp)[pos]; 916 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 917 ep++; 918 if ('}' != *ep) 919 continue; 920 *ep = '&'; 921 roff_ccond(r, ROFF_ccond, bufp, szp, 922 ln, pos, pos + 2, offs); 923 } 924 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 925 } 926 927 static enum roffrule 928 roff_evalcond(const char *v, int *pos) 929 { 930 931 switch (v[*pos]) { 932 case ('n'): 933 (*pos)++; 934 return(ROFFRULE_ALLOW); 935 case ('e'): 936 /* FALLTHROUGH */ 937 case ('o'): 938 /* FALLTHROUGH */ 939 case ('t'): 940 (*pos)++; 941 return(ROFFRULE_DENY); 942 default: 943 break; 944 } 945 946 while (v[*pos] && ' ' != v[*pos]) 947 (*pos)++; 948 return(ROFFRULE_DENY); 949 } 950 951 /* ARGSUSED */ 952 static enum rofferr 953 roff_line_ignore(ROFF_ARGS) 954 { 955 956 if (ROFF_it == tok) 957 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it"); 958 959 return(ROFF_IGN); 960 } 961 962 /* ARGSUSED */ 963 static enum rofferr 964 roff_cond(ROFF_ARGS) 965 { 966 int sv; 967 enum roffrule rule; 968 969 /* 970 * An `.el' has no conditional body: it will consume the value 971 * of the current rstack entry set in prior `ie' calls or 972 * defaults to DENY. 973 * 974 * If we're not an `el', however, then evaluate the conditional. 975 */ 976 977 rule = ROFF_el == tok ? 978 (r->rstackpos < 0 ? 979 ROFFRULE_DENY : r->rstack[r->rstackpos--]) : 980 roff_evalcond(*bufp, &pos); 981 982 sv = pos; 983 while (' ' == (*bufp)[pos]) 984 pos++; 985 986 /* 987 * Roff is weird. If we have just white-space after the 988 * conditional, it's considered the BODY and we exit without 989 * really doing anything. Warn about this. It's probably 990 * wrong. 991 */ 992 993 if ('\0' == (*bufp)[pos] && sv != pos) { 994 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 995 return(ROFF_IGN); 996 } 997 998 roffnode_push(r, tok, NULL, ln, ppos); 999 1000 r->last->rule = rule; 1001 1002 /* 1003 * An if-else will put the NEGATION of the current evaluated 1004 * conditional into the stack of rules. 1005 */ 1006 1007 if (ROFF_ie == tok) { 1008 if (r->rstackpos == RSTACK_MAX - 1) { 1009 mandoc_msg(MANDOCERR_MEM, 1010 r->parse, ln, ppos, NULL); 1011 return(ROFF_ERR); 1012 } 1013 r->rstack[++r->rstackpos] = 1014 ROFFRULE_DENY == r->last->rule ? 1015 ROFFRULE_ALLOW : ROFFRULE_DENY; 1016 } 1017 1018 /* If the parent has false as its rule, then so do we. */ 1019 1020 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1021 r->last->rule = ROFFRULE_DENY; 1022 1023 /* 1024 * Determine scope. If we're invoked with "\{" trailing the 1025 * conditional, then we're in a multiline scope. Else our scope 1026 * expires on the next line. 1027 */ 1028 1029 r->last->endspan = 1; 1030 1031 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1032 r->last->endspan = -1; 1033 pos += 2; 1034 } 1035 1036 /* 1037 * If there are no arguments on the line, the next-line scope is 1038 * assumed. 1039 */ 1040 1041 if ('\0' == (*bufp)[pos]) 1042 return(ROFF_IGN); 1043 1044 /* Otherwise re-run the roff parser after recalculating. */ 1045 1046 *offs = pos; 1047 return(ROFF_RERUN); 1048 } 1049 1050 1051 /* ARGSUSED */ 1052 static enum rofferr 1053 roff_ds(ROFF_ARGS) 1054 { 1055 char *name, *string; 1056 1057 /* 1058 * A symbol is named by the first word following the macro 1059 * invocation up to a space. Its value is anything after the 1060 * name's trailing whitespace and optional double-quote. Thus, 1061 * 1062 * [.ds foo "bar " ] 1063 * 1064 * will have `bar " ' as its value. 1065 */ 1066 1067 string = *bufp + pos; 1068 name = roff_getname(r, &string, ln, pos); 1069 if ('\0' == *name) 1070 return(ROFF_IGN); 1071 1072 /* Read past initial double-quote. */ 1073 if ('"' == *string) 1074 string++; 1075 1076 /* The rest is the value. */ 1077 roff_setstr(r, name, string, 0); 1078 return(ROFF_IGN); 1079 } 1080 1081 1082 /* ARGSUSED */ 1083 static enum rofferr 1084 roff_nr(ROFF_ARGS) 1085 { 1086 const char *key; 1087 char *val; 1088 int iv; 1089 struct reg *rg; 1090 1091 val = *bufp + pos; 1092 key = roff_getname(r, &val, ln, pos); 1093 rg = r->regs->regs; 1094 1095 if (0 == strcmp(key, "nS")) { 1096 rg[(int)REG_nS].set = 1; 1097 if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0) 1098 rg[REG_nS].v.u = (unsigned)iv; 1099 else 1100 rg[(int)REG_nS].v.u = 0u; 1101 } 1102 1103 return(ROFF_IGN); 1104 } 1105 1106 /* ARGSUSED */ 1107 static enum rofferr 1108 roff_rm(ROFF_ARGS) 1109 { 1110 const char *name; 1111 char *cp; 1112 1113 cp = *bufp + pos; 1114 while ('\0' != *cp) { 1115 name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); 1116 if ('\0' != *name) 1117 roff_setstr(r, name, NULL, 0); 1118 } 1119 return(ROFF_IGN); 1120 } 1121 1122 /* ARGSUSED */ 1123 static enum rofferr 1124 roff_TE(ROFF_ARGS) 1125 { 1126 1127 if (NULL == r->tbl) 1128 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1129 else 1130 tbl_end(r->tbl); 1131 1132 r->tbl = NULL; 1133 return(ROFF_IGN); 1134 } 1135 1136 /* ARGSUSED */ 1137 static enum rofferr 1138 roff_T_(ROFF_ARGS) 1139 { 1140 1141 if (NULL == r->tbl) 1142 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1143 else 1144 tbl_restart(ppos, ln, r->tbl); 1145 1146 return(ROFF_IGN); 1147 } 1148 1149 /* ARGSUSED */ 1150 static enum rofferr 1151 roff_EQ(ROFF_ARGS) 1152 { 1153 struct eqn_node *e; 1154 1155 assert(NULL == r->eqn); 1156 e = eqn_alloc(ppos, ln); 1157 1158 if (r->last_eqn) 1159 r->last_eqn->next = e; 1160 else 1161 r->first_eqn = r->last_eqn = e; 1162 1163 r->eqn = r->last_eqn = e; 1164 return(ROFF_IGN); 1165 } 1166 1167 /* ARGSUSED */ 1168 static enum rofferr 1169 roff_EN(ROFF_ARGS) 1170 { 1171 1172 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1173 return(ROFF_IGN); 1174 } 1175 1176 /* ARGSUSED */ 1177 static enum rofferr 1178 roff_TS(ROFF_ARGS) 1179 { 1180 struct tbl_node *t; 1181 1182 if (r->tbl) { 1183 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); 1184 tbl_end(r->tbl); 1185 } 1186 1187 t = tbl_alloc(ppos, ln, r->parse); 1188 1189 if (r->last_tbl) 1190 r->last_tbl->next = t; 1191 else 1192 r->first_tbl = r->last_tbl = t; 1193 1194 r->tbl = r->last_tbl = t; 1195 return(ROFF_IGN); 1196 } 1197 1198 /* ARGSUSED */ 1199 static enum rofferr 1200 roff_so(ROFF_ARGS) 1201 { 1202 char *name; 1203 1204 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); 1205 1206 /* 1207 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1208 * opening anything that's not in our cwd or anything beneath 1209 * it. Thus, explicitly disallow traversing up the file-system 1210 * or using absolute paths. 1211 */ 1212 1213 name = *bufp + pos; 1214 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1215 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); 1216 return(ROFF_ERR); 1217 } 1218 1219 *offs = pos; 1220 return(ROFF_SO); 1221 } 1222 1223 /* ARGSUSED */ 1224 static enum rofferr 1225 roff_userdef(ROFF_ARGS) 1226 { 1227 const char *arg[9]; 1228 char *cp, *n1, *n2; 1229 int i; 1230 1231 /* 1232 * Collect pointers to macro argument strings 1233 * and null-terminate them. 1234 */ 1235 cp = *bufp + pos; 1236 for (i = 0; i < 9; i++) 1237 arg[i] = '\0' == *cp ? "" : 1238 mandoc_getarg(r->parse, &cp, ln, &pos); 1239 1240 /* 1241 * Expand macro arguments. 1242 */ 1243 *szp = 0; 1244 n1 = cp = mandoc_strdup(r->current_string); 1245 while (NULL != (cp = strstr(cp, "\\$"))) { 1246 i = cp[2] - '1'; 1247 if (0 > i || 8 < i) { 1248 /* Not an argument invocation. */ 1249 cp += 2; 1250 continue; 1251 } 1252 1253 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1254 n2 = mandoc_malloc(*szp); 1255 1256 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1257 strlcat(n2, arg[i], *szp); 1258 strlcat(n2, cp + 3, *szp); 1259 1260 cp = n2 + (cp - n1); 1261 free(n1); 1262 n1 = n2; 1263 } 1264 1265 /* 1266 * Replace the macro invocation 1267 * by the expanded macro. 1268 */ 1269 free(*bufp); 1270 *bufp = n1; 1271 if (0 == *szp) 1272 *szp = strlen(*bufp) + 1; 1273 1274 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1275 ROFF_REPARSE : ROFF_APPEND); 1276 } 1277 1278 static char * 1279 roff_getname(struct roff *r, char **cpp, int ln, int pos) 1280 { 1281 char *name, *cp; 1282 1283 name = *cpp; 1284 if ('\0' == *name) 1285 return(name); 1286 1287 /* Read until end of name. */ 1288 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { 1289 if ('\\' != *cp) 1290 continue; 1291 cp++; 1292 if ('\\' == *cp) 1293 continue; 1294 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); 1295 *cp = '\0'; 1296 name = cp; 1297 } 1298 1299 /* Nil-terminate name. */ 1300 if ('\0' != *cp) 1301 *(cp++) = '\0'; 1302 1303 /* Read past spaces. */ 1304 while (' ' == *cp) 1305 cp++; 1306 1307 *cpp = cp; 1308 return(name); 1309 } 1310 1311 /* 1312 * Store *string into the user-defined string called *name. 1313 * In multiline mode, append to an existing entry and append '\n'; 1314 * else replace the existing entry, if there is one. 1315 * To clear an existing entry, call with (*r, *name, NULL, 0). 1316 */ 1317 static void 1318 roff_setstr(struct roff *r, const char *name, const char *string, 1319 int multiline) 1320 { 1321 struct roffstr *n; 1322 char *c; 1323 size_t oldch, newch; 1324 1325 /* Search for an existing string with the same name. */ 1326 n = r->first_string; 1327 while (n && strcmp(name, n->name)) 1328 n = n->next; 1329 1330 if (NULL == n) { 1331 /* Create a new string table entry. */ 1332 n = mandoc_malloc(sizeof(struct roffstr)); 1333 n->name = mandoc_strdup(name); 1334 n->string = NULL; 1335 n->next = r->first_string; 1336 r->first_string = n; 1337 } else if (0 == multiline) { 1338 /* In multiline mode, append; else replace. */ 1339 free(n->string); 1340 n->string = NULL; 1341 } 1342 1343 if (NULL == string) 1344 return; 1345 1346 /* 1347 * One additional byte for the '\n' in multiline mode, 1348 * and one for the terminating '\0'. 1349 */ 1350 newch = strlen(string) + (multiline ? 2u : 1u); 1351 if (NULL == n->string) { 1352 n->string = mandoc_malloc(newch); 1353 *n->string = '\0'; 1354 oldch = 0; 1355 } else { 1356 oldch = strlen(n->string); 1357 n->string = mandoc_realloc(n->string, oldch + newch); 1358 } 1359 1360 /* Skip existing content in the destination buffer. */ 1361 c = n->string + (int)oldch; 1362 1363 /* Append new content to the destination buffer. */ 1364 while (*string) { 1365 /* 1366 * Rudimentary roff copy mode: 1367 * Handle escaped backslashes. 1368 */ 1369 if ('\\' == *string && '\\' == *(string + 1)) 1370 string++; 1371 *c++ = *string++; 1372 } 1373 1374 /* Append terminating bytes. */ 1375 if (multiline) 1376 *c++ = '\n'; 1377 *c = '\0'; 1378 } 1379 1380 static const char * 1381 roff_getstrn(const struct roff *r, const char *name, size_t len) 1382 { 1383 const struct roffstr *n; 1384 1385 n = r->first_string; 1386 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len])) 1387 n = n->next; 1388 1389 return(n ? n->string : NULL); 1390 } 1391 1392 static void 1393 roff_freestr(struct roff *r) 1394 { 1395 struct roffstr *n, *nn; 1396 1397 for (n = r->first_string; n; n = nn) { 1398 free(n->name); 1399 free(n->string); 1400 nn = n->next; 1401 free(n); 1402 } 1403 1404 r->first_string = NULL; 1405 } 1406 1407 const struct tbl_span * 1408 roff_span(const struct roff *r) 1409 { 1410 1411 return(r->tbl ? tbl_span(r->tbl) : NULL); 1412 } 1413 1414 const struct eqn * 1415 roff_eqn(const struct roff *r) 1416 { 1417 1418 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 1419 } 1420