1 /* $Id: roff.c,v 1.178 2013/07/13 12:52:07 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 #include "mandoc.h" 29 #include "libroff.h" 30 #include "libmandoc.h" 31 32 /* Maximum number of nested if-else conditionals. */ 33 #define RSTACK_MAX 128 34 35 /* Maximum number of string expansions per line, to break infinite loops. */ 36 #define EXPAND_LIMIT 1000 37 38 enum rofft { 39 ROFF_ad, 40 ROFF_am, 41 ROFF_ami, 42 ROFF_am1, 43 ROFF_cc, 44 ROFF_de, 45 ROFF_dei, 46 ROFF_de1, 47 ROFF_ds, 48 ROFF_el, 49 ROFF_hy, 50 ROFF_ie, 51 ROFF_if, 52 ROFF_ig, 53 ROFF_it, 54 ROFF_ne, 55 ROFF_nh, 56 ROFF_nr, 57 ROFF_ns, 58 ROFF_ps, 59 ROFF_rm, 60 ROFF_so, 61 ROFF_ta, 62 ROFF_tr, 63 ROFF_Dd, 64 ROFF_TH, 65 ROFF_TS, 66 ROFF_TE, 67 ROFF_T_, 68 ROFF_EQ, 69 ROFF_EN, 70 ROFF_cblock, 71 ROFF_ccond, 72 ROFF_USERDEF, 73 ROFF_MAX 74 }; 75 76 enum roffrule { 77 ROFFRULE_ALLOW, 78 ROFFRULE_DENY 79 }; 80 81 /* 82 * A single register entity. If "set" is zero, the value of the 83 * register should be the default one, which is per-register. 84 * Registers are assumed to be unsigned ints for now. 85 */ 86 struct reg { 87 int set; /* whether set or not */ 88 unsigned int u; /* unsigned integer */ 89 }; 90 91 /* 92 * An incredibly-simple string buffer. 93 */ 94 struct roffstr { 95 char *p; /* nil-terminated buffer */ 96 size_t sz; /* saved strlen(p) */ 97 }; 98 99 /* 100 * A key-value roffstr pair as part of a singly-linked list. 101 */ 102 struct roffkv { 103 struct roffstr key; 104 struct roffstr val; 105 struct roffkv *next; /* next in list */ 106 }; 107 108 struct roff { 109 enum mparset parsetype; /* requested parse type */ 110 struct mparse *parse; /* parse point */ 111 struct roffnode *last; /* leaf of stack */ 112 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 113 char control; /* control character */ 114 int rstackpos; /* position in rstack */ 115 struct reg regs[REG__MAX]; 116 struct roffkv *strtab; /* user-defined strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* last equation parsed */ 124 struct eqn_node *first_eqn; /* first equation parsed */ 125 struct eqn_node *eqn; /* current equation being parsed */ 126 }; 127 128 struct roffnode { 129 enum rofft tok; /* type of node */ 130 struct roffnode *parent; /* up one in stack */ 131 int line; /* parse line */ 132 int col; /* parse col */ 133 char *name; /* node name, e.g. macro name */ 134 char *end; /* end-rules: custom token */ 135 int endspan; /* end-rules: next-line or infty */ 136 enum roffrule rule; /* current evaluation rule */ 137 }; 138 139 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 140 enum rofft tok, /* tok of macro */ \ 141 char **bufp, /* input buffer */ \ 142 size_t *szp, /* size of input buffer */ \ 143 int ln, /* parse line */ \ 144 int ppos, /* original pos in buffer */ \ 145 int pos, /* current pos in buffer */ \ 146 int *offs /* reset offset of buffer data */ 147 148 typedef enum rofferr (*roffproc)(ROFF_ARGS); 149 150 struct roffmac { 151 const char *name; /* macro name */ 152 roffproc proc; /* process new macro */ 153 roffproc text; /* process as child text of macro */ 154 roffproc sub; /* process as child of macro */ 155 int flags; 156 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 157 struct roffmac *next; 158 }; 159 160 struct predef { 161 const char *name; /* predefined input name */ 162 const char *str; /* replacement symbol */ 163 }; 164 165 #define PREDEF(__name, __str) \ 166 { (__name), (__str) }, 167 168 static enum rofft roffhash_find(const char *, size_t); 169 static void roffhash_init(void); 170 static void roffnode_cleanscope(struct roff *); 171 static void roffnode_pop(struct roff *); 172 static void roffnode_push(struct roff *, enum rofft, 173 const char *, int, int); 174 static enum rofferr roff_block(ROFF_ARGS); 175 static enum rofferr roff_block_text(ROFF_ARGS); 176 static enum rofferr roff_block_sub(ROFF_ARGS); 177 static enum rofferr roff_cblock(ROFF_ARGS); 178 static enum rofferr roff_cc(ROFF_ARGS); 179 static enum rofferr roff_ccond(ROFF_ARGS); 180 static enum rofferr roff_cond(ROFF_ARGS); 181 static enum rofferr roff_cond_text(ROFF_ARGS); 182 static enum rofferr roff_cond_sub(ROFF_ARGS); 183 static enum rofferr roff_ds(ROFF_ARGS); 184 static enum roffrule roff_evalcond(const char *, int *); 185 static void roff_free1(struct roff *); 186 static void roff_freestr(struct roffkv *); 187 static char *roff_getname(struct roff *, char **, int, int); 188 static const char *roff_getstrn(const struct roff *, 189 const char *, size_t); 190 static enum rofferr roff_it(ROFF_ARGS); 191 static enum rofferr roff_line_ignore(ROFF_ARGS); 192 static enum rofferr roff_nr(ROFF_ARGS); 193 static void roff_openeqn(struct roff *, const char *, 194 int, int, const char *); 195 static enum rofft roff_parse(struct roff *, const char *, int *); 196 static enum rofferr roff_parsetext(char **, size_t *, int, int *); 197 static enum rofferr roff_res(struct roff *, 198 char **, size_t *, int, int); 199 static enum rofferr roff_rm(ROFF_ARGS); 200 static void roff_setstr(struct roff *, 201 const char *, const char *, int); 202 static void roff_setstrn(struct roffkv **, const char *, 203 size_t, const char *, size_t, int); 204 static enum rofferr roff_so(ROFF_ARGS); 205 static enum rofferr roff_tr(ROFF_ARGS); 206 static enum rofferr roff_Dd(ROFF_ARGS); 207 static enum rofferr roff_TH(ROFF_ARGS); 208 static enum rofferr roff_TE(ROFF_ARGS); 209 static enum rofferr roff_TS(ROFF_ARGS); 210 static enum rofferr roff_EQ(ROFF_ARGS); 211 static enum rofferr roff_EN(ROFF_ARGS); 212 static enum rofferr roff_T_(ROFF_ARGS); 213 static enum rofferr roff_userdef(ROFF_ARGS); 214 215 /* See roffhash_find() */ 216 217 #define ASCII_HI 126 218 #define ASCII_LO 33 219 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 220 221 static struct roffmac *hash[HASHWIDTH]; 222 223 static struct roffmac roffs[ROFF_MAX] = { 224 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 225 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 226 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 227 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 228 { "cc", roff_cc, NULL, NULL, 0, NULL }, 229 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 230 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 231 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 232 { "ds", roff_ds, NULL, NULL, 0, NULL }, 233 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 234 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 235 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 236 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 237 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 238 { "it", roff_it, NULL, NULL, 0, NULL }, 239 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 240 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 241 { "nr", roff_nr, NULL, NULL, 0, NULL }, 242 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 243 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 244 { "rm", roff_rm, NULL, NULL, 0, NULL }, 245 { "so", roff_so, NULL, NULL, 0, NULL }, 246 { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, 247 { "tr", roff_tr, NULL, NULL, 0, NULL }, 248 { "Dd", roff_Dd, NULL, NULL, 0, NULL }, 249 { "TH", roff_TH, NULL, NULL, 0, NULL }, 250 { "TS", roff_TS, NULL, NULL, 0, NULL }, 251 { "TE", roff_TE, NULL, NULL, 0, NULL }, 252 { "T&", roff_T_, NULL, NULL, 0, NULL }, 253 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 254 { "EN", roff_EN, NULL, NULL, 0, NULL }, 255 { ".", roff_cblock, NULL, NULL, 0, NULL }, 256 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 257 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 258 }; 259 260 const char *const __mdoc_reserved[] = { 261 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 262 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", 263 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", 264 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", 265 "Ds", "Dt", "Dv", "Dx", "D1", 266 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em", 267 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex", 268 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", 269 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP", 270 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", 271 "Oc", "Oo", "Op", "Os", "Ot", "Ox", 272 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq", 273 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv", 274 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq", 275 "Ss", "St", "Sx", "Sy", 276 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", 277 "%A", "%B", "%D", "%I", "%J", "%N", "%O", 278 "%P", "%Q", "%R", "%T", "%U", "%V", 279 NULL 280 }; 281 282 const char *const __man_reserved[] = { 283 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT", 284 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR", 285 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT", 286 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY", 287 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS", 288 NULL 289 }; 290 291 /* Array of injected predefined strings. */ 292 #define PREDEFS_MAX 38 293 static const struct predef predefs[PREDEFS_MAX] = { 294 #include "predefs.in" 295 }; 296 297 /* See roffhash_find() */ 298 #define ROFF_HASH(p) (p[0] - ASCII_LO) 299 300 static int roffit_lines; /* number of lines to delay */ 301 static char *roffit_macro; /* nil-terminated macro line */ 302 303 static void 304 roffhash_init(void) 305 { 306 struct roffmac *n; 307 int buc, i; 308 309 for (i = 0; i < (int)ROFF_USERDEF; i++) { 310 assert(roffs[i].name[0] >= ASCII_LO); 311 assert(roffs[i].name[0] <= ASCII_HI); 312 313 buc = ROFF_HASH(roffs[i].name); 314 315 if (NULL != (n = hash[buc])) { 316 for ( ; n->next; n = n->next) 317 /* Do nothing. */ ; 318 n->next = &roffs[i]; 319 } else 320 hash[buc] = &roffs[i]; 321 } 322 } 323 324 /* 325 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 326 * the nil-terminated string name could be found. 327 */ 328 static enum rofft 329 roffhash_find(const char *p, size_t s) 330 { 331 int buc; 332 struct roffmac *n; 333 334 /* 335 * libroff has an extremely simple hashtable, for the time 336 * being, which simply keys on the first character, which must 337 * be printable, then walks a chain. It works well enough until 338 * optimised. 339 */ 340 341 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 342 return(ROFF_MAX); 343 344 buc = ROFF_HASH(p); 345 346 if (NULL == (n = hash[buc])) 347 return(ROFF_MAX); 348 for ( ; n; n = n->next) 349 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 350 return((enum rofft)(n - roffs)); 351 352 return(ROFF_MAX); 353 } 354 355 356 /* 357 * Pop the current node off of the stack of roff instructions currently 358 * pending. 359 */ 360 static void 361 roffnode_pop(struct roff *r) 362 { 363 struct roffnode *p; 364 365 assert(r->last); 366 p = r->last; 367 368 r->last = r->last->parent; 369 free(p->name); 370 free(p->end); 371 free(p); 372 } 373 374 375 /* 376 * Push a roff node onto the instruction stack. This must later be 377 * removed with roffnode_pop(). 378 */ 379 static void 380 roffnode_push(struct roff *r, enum rofft tok, const char *name, 381 int line, int col) 382 { 383 struct roffnode *p; 384 385 p = mandoc_calloc(1, sizeof(struct roffnode)); 386 p->tok = tok; 387 if (name) 388 p->name = mandoc_strdup(name); 389 p->parent = r->last; 390 p->line = line; 391 p->col = col; 392 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 393 394 r->last = p; 395 } 396 397 398 static void 399 roff_free1(struct roff *r) 400 { 401 struct tbl_node *tbl; 402 struct eqn_node *e; 403 int i; 404 405 while (NULL != (tbl = r->first_tbl)) { 406 r->first_tbl = tbl->next; 407 tbl_free(tbl); 408 } 409 410 r->first_tbl = r->last_tbl = r->tbl = NULL; 411 412 while (NULL != (e = r->first_eqn)) { 413 r->first_eqn = e->next; 414 eqn_free(e); 415 } 416 417 r->first_eqn = r->last_eqn = r->eqn = NULL; 418 419 while (r->last) 420 roffnode_pop(r); 421 422 roff_freestr(r->strtab); 423 roff_freestr(r->xmbtab); 424 425 r->strtab = r->xmbtab = NULL; 426 427 if (r->xtab) 428 for (i = 0; i < 128; i++) 429 free(r->xtab[i].p); 430 431 free(r->xtab); 432 r->xtab = NULL; 433 } 434 435 void 436 roff_reset(struct roff *r) 437 { 438 int i; 439 440 roff_free1(r); 441 442 r->control = 0; 443 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX); 444 445 for (i = 0; i < PREDEFS_MAX; i++) 446 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 447 } 448 449 450 void 451 roff_free(struct roff *r) 452 { 453 454 roff_free1(r); 455 free(r); 456 } 457 458 459 struct roff * 460 roff_alloc(enum mparset type, struct mparse *parse) 461 { 462 struct roff *r; 463 int i; 464 465 r = mandoc_calloc(1, sizeof(struct roff)); 466 r->parsetype = type; 467 r->parse = parse; 468 r->rstackpos = -1; 469 470 roffhash_init(); 471 472 for (i = 0; i < PREDEFS_MAX; i++) 473 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 474 475 return(r); 476 } 477 478 /* 479 * Pre-filter each and every line for reserved words (one beginning with 480 * `\*', e.g., `\*(ab'). These must be handled before the actual line 481 * is processed. 482 * This also checks the syntax of regular escapes. 483 */ 484 static enum rofferr 485 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) 486 { 487 enum mandoc_esc esc; 488 const char *stesc; /* start of an escape sequence ('\\') */ 489 const char *stnam; /* start of the name, after "[(*" */ 490 const char *cp; /* end of the name, e.g. before ']' */ 491 const char *res; /* the string to be substituted */ 492 int i, maxl, expand_count; 493 size_t nsz; 494 char *n; 495 496 expand_count = 0; 497 498 again: 499 cp = *bufp + pos; 500 while (NULL != (cp = strchr(cp, '\\'))) { 501 stesc = cp++; 502 503 /* 504 * The second character must be an asterisk. 505 * If it isn't, skip it anyway: It is escaped, 506 * so it can't start another escape sequence. 507 */ 508 509 if ('\0' == *cp) 510 return(ROFF_CONT); 511 512 if ('*' != *cp) { 513 res = cp; 514 esc = mandoc_escape(&cp, NULL, NULL); 515 if (ESCAPE_ERROR != esc) 516 continue; 517 cp = res; 518 mandoc_msg 519 (MANDOCERR_BADESCAPE, r->parse, 520 ln, (int)(stesc - *bufp), NULL); 521 return(ROFF_CONT); 522 } 523 524 cp++; 525 526 /* 527 * The third character decides the length 528 * of the name of the string. 529 * Save a pointer to the name. 530 */ 531 532 switch (*cp) { 533 case ('\0'): 534 return(ROFF_CONT); 535 case ('('): 536 cp++; 537 maxl = 2; 538 break; 539 case ('['): 540 cp++; 541 maxl = 0; 542 break; 543 default: 544 maxl = 1; 545 break; 546 } 547 stnam = cp; 548 549 /* Advance to the end of the name. */ 550 551 for (i = 0; 0 == maxl || i < maxl; i++, cp++) { 552 if ('\0' == *cp) { 553 mandoc_msg 554 (MANDOCERR_BADESCAPE, 555 r->parse, ln, 556 (int)(stesc - *bufp), NULL); 557 return(ROFF_CONT); 558 } 559 if (0 == maxl && ']' == *cp) 560 break; 561 } 562 563 /* 564 * Retrieve the replacement string; if it is 565 * undefined, resume searching for escapes. 566 */ 567 568 res = roff_getstrn(r, stnam, (size_t)i); 569 570 if (NULL == res) { 571 mandoc_msg 572 (MANDOCERR_BADESCAPE, r->parse, 573 ln, (int)(stesc - *bufp), NULL); 574 res = ""; 575 } 576 577 /* Replace the escape sequence by the string. */ 578 579 pos = stesc - *bufp; 580 581 nsz = *szp + strlen(res) + 1; 582 n = mandoc_malloc(nsz); 583 584 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); 585 strlcat(n, res, nsz); 586 strlcat(n, cp + (maxl ? 0 : 1), nsz); 587 588 free(*bufp); 589 590 *bufp = n; 591 *szp = nsz; 592 593 if (EXPAND_LIMIT >= ++expand_count) 594 goto again; 595 596 /* Just leave the string unexpanded. */ 597 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); 598 return(ROFF_IGN); 599 } 600 return(ROFF_CONT); 601 } 602 603 /* 604 * Process text streams: 605 * Convert all breakable hyphens into ASCII_HYPH. 606 * Decrement and spring input line trap. 607 */ 608 static enum rofferr 609 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs) 610 { 611 size_t sz; 612 const char *start; 613 char *p; 614 int isz; 615 enum mandoc_esc esc; 616 617 start = p = *bufp + pos; 618 619 while ('\0' != *p) { 620 sz = strcspn(p, "-\\"); 621 p += sz; 622 623 if ('\0' == *p) 624 break; 625 626 if ('\\' == *p) { 627 /* Skip over escapes. */ 628 p++; 629 esc = mandoc_escape 630 ((const char **)&p, NULL, NULL); 631 if (ESCAPE_ERROR == esc) 632 break; 633 continue; 634 } else if (p == start) { 635 p++; 636 continue; 637 } 638 639 if (isalpha((unsigned char)p[-1]) && 640 isalpha((unsigned char)p[1])) 641 *p = ASCII_HYPH; 642 p++; 643 } 644 645 /* Spring the input line trap. */ 646 if (1 == roffit_lines) { 647 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro); 648 if (-1 == isz) { 649 perror(NULL); 650 exit((int)MANDOCLEVEL_SYSERR); 651 } 652 free(*bufp); 653 *bufp = p; 654 *szp = isz + 1; 655 *offs = 0; 656 free(roffit_macro); 657 roffit_lines = 0; 658 return(ROFF_REPARSE); 659 } else if (1 < roffit_lines) 660 --roffit_lines; 661 return(ROFF_CONT); 662 } 663 664 enum rofferr 665 roff_parseln(struct roff *r, int ln, char **bufp, 666 size_t *szp, int pos, int *offs) 667 { 668 enum rofft t; 669 enum rofferr e; 670 int ppos, ctl; 671 672 /* 673 * Run the reserved-word filter only if we have some reserved 674 * words to fill in. 675 */ 676 677 e = roff_res(r, bufp, szp, ln, pos); 678 if (ROFF_IGN == e) 679 return(e); 680 assert(ROFF_CONT == e); 681 682 ppos = pos; 683 ctl = roff_getcontrol(r, *bufp, &pos); 684 685 /* 686 * First, if a scope is open and we're not a macro, pass the 687 * text through the macro's filter. If a scope isn't open and 688 * we're not a macro, just let it through. 689 * Finally, if there's an equation scope open, divert it into it 690 * no matter our state. 691 */ 692 693 if (r->last && ! ctl) { 694 t = r->last->tok; 695 assert(roffs[t].text); 696 e = (*roffs[t].text) 697 (r, t, bufp, szp, ln, pos, pos, offs); 698 assert(ROFF_IGN == e || ROFF_CONT == e); 699 if (ROFF_CONT != e) 700 return(e); 701 if (r->eqn) 702 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 703 if (r->tbl) 704 return(tbl_read(r->tbl, ln, *bufp, pos)); 705 return(roff_parsetext(bufp, szp, pos, offs)); 706 } else if ( ! ctl) { 707 if (r->eqn) 708 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 709 if (r->tbl) 710 return(tbl_read(r->tbl, ln, *bufp, pos)); 711 return(roff_parsetext(bufp, szp, pos, offs)); 712 } else if (r->eqn) 713 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); 714 715 /* 716 * If a scope is open, go to the child handler for that macro, 717 * as it may want to preprocess before doing anything with it. 718 * Don't do so if an equation is open. 719 */ 720 721 if (r->last) { 722 t = r->last->tok; 723 assert(roffs[t].sub); 724 return((*roffs[t].sub) 725 (r, t, bufp, szp, 726 ln, ppos, pos, offs)); 727 } 728 729 /* 730 * Lastly, as we've no scope open, try to look up and execute 731 * the new macro. If no macro is found, simply return and let 732 * the compilers handle it. 733 */ 734 735 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 736 return(ROFF_CONT); 737 738 assert(roffs[t].proc); 739 return((*roffs[t].proc) 740 (r, t, bufp, szp, 741 ln, ppos, pos, offs)); 742 } 743 744 745 void 746 roff_endparse(struct roff *r) 747 { 748 749 if (r->last) 750 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 751 r->last->line, r->last->col, NULL); 752 753 if (r->eqn) { 754 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 755 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); 756 eqn_end(&r->eqn); 757 } 758 759 if (r->tbl) { 760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 761 r->tbl->line, r->tbl->pos, NULL); 762 tbl_end(&r->tbl); 763 } 764 } 765 766 /* 767 * Parse a roff node's type from the input buffer. This must be in the 768 * form of ".foo xxx" in the usual way. 769 */ 770 static enum rofft 771 roff_parse(struct roff *r, const char *buf, int *pos) 772 { 773 const char *mac; 774 size_t maclen; 775 enum rofft t; 776 777 if ('\0' == buf[*pos] || '"' == buf[*pos] || 778 '\t' == buf[*pos] || ' ' == buf[*pos]) 779 return(ROFF_MAX); 780 781 /* 782 * We stop the macro parse at an escape, tab, space, or nil. 783 * However, `\}' is also a valid macro, so make sure we don't 784 * clobber it by seeing the `\' as the end of token. 785 */ 786 787 mac = buf + *pos; 788 maclen = strcspn(mac + 1, " \\\t\0") + 1; 789 790 t = (r->current_string = roff_getstrn(r, mac, maclen)) 791 ? ROFF_USERDEF : roffhash_find(mac, maclen); 792 793 *pos += (int)maclen; 794 795 while (buf[*pos] && ' ' == buf[*pos]) 796 (*pos)++; 797 798 return(t); 799 } 800 801 /* ARGSUSED */ 802 static enum rofferr 803 roff_cblock(ROFF_ARGS) 804 { 805 806 /* 807 * A block-close `..' should only be invoked as a child of an 808 * ignore macro, otherwise raise a warning and just ignore it. 809 */ 810 811 if (NULL == r->last) { 812 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 813 return(ROFF_IGN); 814 } 815 816 switch (r->last->tok) { 817 case (ROFF_am): 818 /* FALLTHROUGH */ 819 case (ROFF_ami): 820 /* FALLTHROUGH */ 821 case (ROFF_am1): 822 /* FALLTHROUGH */ 823 case (ROFF_de): 824 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 825 /* FALLTHROUGH */ 826 case (ROFF_dei): 827 /* FALLTHROUGH */ 828 case (ROFF_ig): 829 break; 830 default: 831 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 832 return(ROFF_IGN); 833 } 834 835 if ((*bufp)[pos]) 836 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 837 838 roffnode_pop(r); 839 roffnode_cleanscope(r); 840 return(ROFF_IGN); 841 842 } 843 844 845 static void 846 roffnode_cleanscope(struct roff *r) 847 { 848 849 while (r->last) { 850 if (--r->last->endspan != 0) 851 break; 852 roffnode_pop(r); 853 } 854 } 855 856 857 /* ARGSUSED */ 858 static enum rofferr 859 roff_ccond(ROFF_ARGS) 860 { 861 862 if (NULL == r->last) { 863 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 864 return(ROFF_IGN); 865 } 866 867 switch (r->last->tok) { 868 case (ROFF_el): 869 /* FALLTHROUGH */ 870 case (ROFF_ie): 871 /* FALLTHROUGH */ 872 case (ROFF_if): 873 break; 874 default: 875 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 876 return(ROFF_IGN); 877 } 878 879 if (r->last->endspan > -1) { 880 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 881 return(ROFF_IGN); 882 } 883 884 if ((*bufp)[pos]) 885 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 886 887 roffnode_pop(r); 888 roffnode_cleanscope(r); 889 return(ROFF_IGN); 890 } 891 892 893 /* ARGSUSED */ 894 static enum rofferr 895 roff_block(ROFF_ARGS) 896 { 897 int sv; 898 size_t sz; 899 char *name; 900 901 name = NULL; 902 903 if (ROFF_ig != tok) { 904 if ('\0' == (*bufp)[pos]) { 905 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 906 return(ROFF_IGN); 907 } 908 909 /* 910 * Re-write `de1', since we don't really care about 911 * groff's strange compatibility mode, into `de'. 912 */ 913 914 if (ROFF_de1 == tok) 915 tok = ROFF_de; 916 if (ROFF_de == tok) 917 name = *bufp + pos; 918 else 919 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, 920 roffs[tok].name); 921 922 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 923 pos++; 924 925 while (isspace((unsigned char)(*bufp)[pos])) 926 (*bufp)[pos++] = '\0'; 927 } 928 929 roffnode_push(r, tok, name, ln, ppos); 930 931 /* 932 * At the beginning of a `de' macro, clear the existing string 933 * with the same name, if there is one. New content will be 934 * added from roff_block_text() in multiline mode. 935 */ 936 937 if (ROFF_de == tok) 938 roff_setstr(r, name, "", 0); 939 940 if ('\0' == (*bufp)[pos]) 941 return(ROFF_IGN); 942 943 /* If present, process the custom end-of-line marker. */ 944 945 sv = pos; 946 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 947 pos++; 948 949 /* 950 * Note: groff does NOT like escape characters in the input. 951 * Instead of detecting this, we're just going to let it fly and 952 * to hell with it. 953 */ 954 955 assert(pos > sv); 956 sz = (size_t)(pos - sv); 957 958 if (1 == sz && '.' == (*bufp)[sv]) 959 return(ROFF_IGN); 960 961 r->last->end = mandoc_malloc(sz + 1); 962 963 memcpy(r->last->end, *bufp + sv, sz); 964 r->last->end[(int)sz] = '\0'; 965 966 if ((*bufp)[pos]) 967 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 968 969 return(ROFF_IGN); 970 } 971 972 973 /* ARGSUSED */ 974 static enum rofferr 975 roff_block_sub(ROFF_ARGS) 976 { 977 enum rofft t; 978 int i, j; 979 980 /* 981 * First check whether a custom macro exists at this level. If 982 * it does, then check against it. This is some of groff's 983 * stranger behaviours. If we encountered a custom end-scope 984 * tag and that tag also happens to be a "real" macro, then we 985 * need to try interpreting it again as a real macro. If it's 986 * not, then return ignore. Else continue. 987 */ 988 989 if (r->last->end) { 990 for (i = pos, j = 0; r->last->end[j]; j++, i++) 991 if ((*bufp)[i] != r->last->end[j]) 992 break; 993 994 if ('\0' == r->last->end[j] && 995 ('\0' == (*bufp)[i] || 996 ' ' == (*bufp)[i] || 997 '\t' == (*bufp)[i])) { 998 roffnode_pop(r); 999 roffnode_cleanscope(r); 1000 1001 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 1002 i++; 1003 1004 pos = i; 1005 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 1006 return(ROFF_RERUN); 1007 return(ROFF_IGN); 1008 } 1009 } 1010 1011 /* 1012 * If we have no custom end-query or lookup failed, then try 1013 * pulling it out of the hashtable. 1014 */ 1015 1016 t = roff_parse(r, *bufp, &pos); 1017 1018 /* 1019 * Macros other than block-end are only significant 1020 * in `de' blocks; elsewhere, simply throw them away. 1021 */ 1022 if (ROFF_cblock != t) { 1023 if (ROFF_de == tok) 1024 roff_setstr(r, r->last->name, *bufp + ppos, 1); 1025 return(ROFF_IGN); 1026 } 1027 1028 assert(roffs[t].proc); 1029 return((*roffs[t].proc)(r, t, bufp, szp, 1030 ln, ppos, pos, offs)); 1031 } 1032 1033 1034 /* ARGSUSED */ 1035 static enum rofferr 1036 roff_block_text(ROFF_ARGS) 1037 { 1038 1039 if (ROFF_de == tok) 1040 roff_setstr(r, r->last->name, *bufp + pos, 1); 1041 1042 return(ROFF_IGN); 1043 } 1044 1045 1046 /* ARGSUSED */ 1047 static enum rofferr 1048 roff_cond_sub(ROFF_ARGS) 1049 { 1050 enum rofft t; 1051 enum roffrule rr; 1052 char *ep; 1053 1054 rr = r->last->rule; 1055 roffnode_cleanscope(r); 1056 t = roff_parse(r, *bufp, &pos); 1057 1058 /* 1059 * Fully handle known macros when they are structurally 1060 * required or when the conditional evaluated to true. 1061 */ 1062 1063 if ((ROFF_MAX != t) && 1064 (ROFF_ccond == t || ROFFRULE_ALLOW == rr || 1065 ROFFMAC_STRUCT & roffs[t].flags)) { 1066 assert(roffs[t].proc); 1067 return((*roffs[t].proc)(r, t, bufp, szp, 1068 ln, ppos, pos, offs)); 1069 } 1070 1071 /* Always check for the closing delimiter `\}'. */ 1072 1073 ep = &(*bufp)[pos]; 1074 while (NULL != (ep = strchr(ep, '\\'))) { 1075 if ('}' != *(++ep)) 1076 continue; 1077 1078 /* 1079 * If we're at the end of line, then just chop 1080 * off the \} and resize the buffer. 1081 * If we aren't, then convert it to spaces. 1082 */ 1083 1084 if ('\0' == *(ep + 1)) { 1085 *--ep = '\0'; 1086 *szp -= 2; 1087 } else 1088 *(ep - 1) = *ep = ' '; 1089 1090 roff_ccond(r, ROFF_ccond, bufp, szp, 1091 ln, pos, pos + 2, offs); 1092 break; 1093 } 1094 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1095 } 1096 1097 /* ARGSUSED */ 1098 static enum rofferr 1099 roff_cond_text(ROFF_ARGS) 1100 { 1101 char *ep; 1102 enum roffrule rr; 1103 1104 rr = r->last->rule; 1105 roffnode_cleanscope(r); 1106 1107 ep = &(*bufp)[pos]; 1108 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 1109 ep++; 1110 if ('}' != *ep) 1111 continue; 1112 *ep = '&'; 1113 roff_ccond(r, ROFF_ccond, bufp, szp, 1114 ln, pos, pos + 2, offs); 1115 } 1116 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1117 } 1118 1119 static enum roffrule 1120 roff_evalcond(const char *v, int *pos) 1121 { 1122 1123 switch (v[*pos]) { 1124 case ('n'): 1125 (*pos)++; 1126 return(ROFFRULE_ALLOW); 1127 case ('e'): 1128 /* FALLTHROUGH */ 1129 case ('o'): 1130 /* FALLTHROUGH */ 1131 case ('t'): 1132 (*pos)++; 1133 return(ROFFRULE_DENY); 1134 default: 1135 break; 1136 } 1137 1138 while (v[*pos] && ' ' != v[*pos]) 1139 (*pos)++; 1140 return(ROFFRULE_DENY); 1141 } 1142 1143 /* ARGSUSED */ 1144 static enum rofferr 1145 roff_line_ignore(ROFF_ARGS) 1146 { 1147 1148 return(ROFF_IGN); 1149 } 1150 1151 /* ARGSUSED */ 1152 static enum rofferr 1153 roff_cond(ROFF_ARGS) 1154 { 1155 1156 roffnode_push(r, tok, NULL, ln, ppos); 1157 1158 /* 1159 * An `.el' has no conditional body: it will consume the value 1160 * of the current rstack entry set in prior `ie' calls or 1161 * defaults to DENY. 1162 * 1163 * If we're not an `el', however, then evaluate the conditional. 1164 */ 1165 1166 r->last->rule = ROFF_el == tok ? 1167 (r->rstackpos < 0 ? 1168 ROFFRULE_DENY : r->rstack[r->rstackpos--]) : 1169 roff_evalcond(*bufp, &pos); 1170 1171 /* 1172 * An if-else will put the NEGATION of the current evaluated 1173 * conditional into the stack of rules. 1174 */ 1175 1176 if (ROFF_ie == tok) { 1177 if (r->rstackpos == RSTACK_MAX - 1) { 1178 mandoc_msg(MANDOCERR_MEM, 1179 r->parse, ln, ppos, NULL); 1180 return(ROFF_ERR); 1181 } 1182 r->rstack[++r->rstackpos] = 1183 ROFFRULE_DENY == r->last->rule ? 1184 ROFFRULE_ALLOW : ROFFRULE_DENY; 1185 } 1186 1187 /* If the parent has false as its rule, then so do we. */ 1188 1189 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1190 r->last->rule = ROFFRULE_DENY; 1191 1192 /* 1193 * Determine scope. 1194 * If there is nothing on the line after the conditional, 1195 * not even whitespace, use next-line scope. 1196 */ 1197 1198 if ('\0' == (*bufp)[pos]) { 1199 r->last->endspan = 2; 1200 goto out; 1201 } 1202 1203 while (' ' == (*bufp)[pos]) 1204 pos++; 1205 1206 /* An opening brace requests multiline scope. */ 1207 1208 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1209 r->last->endspan = -1; 1210 pos += 2; 1211 goto out; 1212 } 1213 1214 /* 1215 * Anything else following the conditional causes 1216 * single-line scope. Warn if the scope contains 1217 * nothing but trailing whitespace. 1218 */ 1219 1220 if ('\0' == (*bufp)[pos]) 1221 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 1222 1223 r->last->endspan = 1; 1224 1225 out: 1226 *offs = pos; 1227 return(ROFF_RERUN); 1228 } 1229 1230 1231 /* ARGSUSED */ 1232 static enum rofferr 1233 roff_ds(ROFF_ARGS) 1234 { 1235 char *name, *string; 1236 1237 /* 1238 * A symbol is named by the first word following the macro 1239 * invocation up to a space. Its value is anything after the 1240 * name's trailing whitespace and optional double-quote. Thus, 1241 * 1242 * [.ds foo "bar " ] 1243 * 1244 * will have `bar " ' as its value. 1245 */ 1246 1247 string = *bufp + pos; 1248 name = roff_getname(r, &string, ln, pos); 1249 if ('\0' == *name) 1250 return(ROFF_IGN); 1251 1252 /* Read past initial double-quote. */ 1253 if ('"' == *string) 1254 string++; 1255 1256 /* The rest is the value. */ 1257 roff_setstr(r, name, string, 0); 1258 return(ROFF_IGN); 1259 } 1260 1261 int 1262 roff_regisset(const struct roff *r, enum regs reg) 1263 { 1264 1265 return(r->regs[(int)reg].set); 1266 } 1267 1268 unsigned int 1269 roff_regget(const struct roff *r, enum regs reg) 1270 { 1271 1272 return(r->regs[(int)reg].u); 1273 } 1274 1275 void 1276 roff_regunset(struct roff *r, enum regs reg) 1277 { 1278 1279 r->regs[(int)reg].set = 0; 1280 } 1281 1282 /* ARGSUSED */ 1283 static enum rofferr 1284 roff_nr(ROFF_ARGS) 1285 { 1286 const char *key; 1287 char *val; 1288 int iv; 1289 1290 val = *bufp + pos; 1291 key = roff_getname(r, &val, ln, pos); 1292 1293 if (0 == strcmp(key, "nS")) { 1294 r->regs[(int)REG_nS].set = 1; 1295 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0) 1296 r->regs[(int)REG_nS].u = (unsigned)iv; 1297 else 1298 r->regs[(int)REG_nS].u = 0u; 1299 } 1300 1301 return(ROFF_IGN); 1302 } 1303 1304 /* ARGSUSED */ 1305 static enum rofferr 1306 roff_rm(ROFF_ARGS) 1307 { 1308 const char *name; 1309 char *cp; 1310 1311 cp = *bufp + pos; 1312 while ('\0' != *cp) { 1313 name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); 1314 if ('\0' != *name) 1315 roff_setstr(r, name, NULL, 0); 1316 } 1317 return(ROFF_IGN); 1318 } 1319 1320 /* ARGSUSED */ 1321 static enum rofferr 1322 roff_it(ROFF_ARGS) 1323 { 1324 char *cp; 1325 size_t len; 1326 int iv; 1327 1328 /* Parse the number of lines. */ 1329 cp = *bufp + pos; 1330 len = strcspn(cp, " \t"); 1331 cp[len] = '\0'; 1332 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) { 1333 mandoc_msg(MANDOCERR_NUMERIC, r->parse, 1334 ln, ppos, *bufp + 1); 1335 return(ROFF_IGN); 1336 } 1337 cp += len + 1; 1338 1339 /* Arm the input line trap. */ 1340 roffit_lines = iv; 1341 roffit_macro = mandoc_strdup(cp); 1342 return(ROFF_IGN); 1343 } 1344 1345 /* ARGSUSED */ 1346 static enum rofferr 1347 roff_Dd(ROFF_ARGS) 1348 { 1349 const char *const *cp; 1350 1351 if (MPARSE_MDOC != r->parsetype) 1352 for (cp = __mdoc_reserved; *cp; cp++) 1353 roff_setstr(r, *cp, NULL, 0); 1354 1355 return(ROFF_CONT); 1356 } 1357 1358 /* ARGSUSED */ 1359 static enum rofferr 1360 roff_TH(ROFF_ARGS) 1361 { 1362 const char *const *cp; 1363 1364 if (MPARSE_MDOC != r->parsetype) 1365 for (cp = __man_reserved; *cp; cp++) 1366 roff_setstr(r, *cp, NULL, 0); 1367 1368 return(ROFF_CONT); 1369 } 1370 1371 /* ARGSUSED */ 1372 static enum rofferr 1373 roff_TE(ROFF_ARGS) 1374 { 1375 1376 if (NULL == r->tbl) 1377 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1378 else 1379 tbl_end(&r->tbl); 1380 1381 return(ROFF_IGN); 1382 } 1383 1384 /* ARGSUSED */ 1385 static enum rofferr 1386 roff_T_(ROFF_ARGS) 1387 { 1388 1389 if (NULL == r->tbl) 1390 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1391 else 1392 tbl_restart(ppos, ln, r->tbl); 1393 1394 return(ROFF_IGN); 1395 } 1396 1397 #if 0 1398 static int 1399 roff_closeeqn(struct roff *r) 1400 { 1401 1402 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0); 1403 } 1404 #endif 1405 1406 static void 1407 roff_openeqn(struct roff *r, const char *name, int line, 1408 int offs, const char *buf) 1409 { 1410 struct eqn_node *e; 1411 int poff; 1412 1413 assert(NULL == r->eqn); 1414 e = eqn_alloc(name, offs, line, r->parse); 1415 1416 if (r->last_eqn) 1417 r->last_eqn->next = e; 1418 else 1419 r->first_eqn = r->last_eqn = e; 1420 1421 r->eqn = r->last_eqn = e; 1422 1423 if (buf) { 1424 poff = 0; 1425 eqn_read(&r->eqn, line, buf, offs, &poff); 1426 } 1427 } 1428 1429 /* ARGSUSED */ 1430 static enum rofferr 1431 roff_EQ(ROFF_ARGS) 1432 { 1433 1434 roff_openeqn(r, *bufp + pos, ln, ppos, NULL); 1435 return(ROFF_IGN); 1436 } 1437 1438 /* ARGSUSED */ 1439 static enum rofferr 1440 roff_EN(ROFF_ARGS) 1441 { 1442 1443 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1444 return(ROFF_IGN); 1445 } 1446 1447 /* ARGSUSED */ 1448 static enum rofferr 1449 roff_TS(ROFF_ARGS) 1450 { 1451 struct tbl_node *tbl; 1452 1453 if (r->tbl) { 1454 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); 1455 tbl_end(&r->tbl); 1456 } 1457 1458 tbl = tbl_alloc(ppos, ln, r->parse); 1459 1460 if (r->last_tbl) 1461 r->last_tbl->next = tbl; 1462 else 1463 r->first_tbl = r->last_tbl = tbl; 1464 1465 r->tbl = r->last_tbl = tbl; 1466 return(ROFF_IGN); 1467 } 1468 1469 /* ARGSUSED */ 1470 static enum rofferr 1471 roff_cc(ROFF_ARGS) 1472 { 1473 const char *p; 1474 1475 p = *bufp + pos; 1476 1477 if ('\0' == *p || '.' == (r->control = *p++)) 1478 r->control = 0; 1479 1480 if ('\0' != *p) 1481 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1482 1483 return(ROFF_IGN); 1484 } 1485 1486 /* ARGSUSED */ 1487 static enum rofferr 1488 roff_tr(ROFF_ARGS) 1489 { 1490 const char *p, *first, *second; 1491 size_t fsz, ssz; 1492 enum mandoc_esc esc; 1493 1494 p = *bufp + pos; 1495 1496 if ('\0' == *p) { 1497 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1498 return(ROFF_IGN); 1499 } 1500 1501 while ('\0' != *p) { 1502 fsz = ssz = 1; 1503 1504 first = p++; 1505 if ('\\' == *first) { 1506 esc = mandoc_escape(&p, NULL, NULL); 1507 if (ESCAPE_ERROR == esc) { 1508 mandoc_msg 1509 (MANDOCERR_BADESCAPE, r->parse, 1510 ln, (int)(p - *bufp), NULL); 1511 return(ROFF_IGN); 1512 } 1513 fsz = (size_t)(p - first); 1514 } 1515 1516 second = p++; 1517 if ('\\' == *second) { 1518 esc = mandoc_escape(&p, NULL, NULL); 1519 if (ESCAPE_ERROR == esc) { 1520 mandoc_msg 1521 (MANDOCERR_BADESCAPE, r->parse, 1522 ln, (int)(p - *bufp), NULL); 1523 return(ROFF_IGN); 1524 } 1525 ssz = (size_t)(p - second); 1526 } else if ('\0' == *second) { 1527 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, 1528 ln, (int)(p - *bufp), NULL); 1529 second = " "; 1530 p--; 1531 } 1532 1533 if (fsz > 1) { 1534 roff_setstrn(&r->xmbtab, first, 1535 fsz, second, ssz, 0); 1536 continue; 1537 } 1538 1539 if (NULL == r->xtab) 1540 r->xtab = mandoc_calloc 1541 (128, sizeof(struct roffstr)); 1542 1543 free(r->xtab[(int)*first].p); 1544 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 1545 r->xtab[(int)*first].sz = ssz; 1546 } 1547 1548 return(ROFF_IGN); 1549 } 1550 1551 /* ARGSUSED */ 1552 static enum rofferr 1553 roff_so(ROFF_ARGS) 1554 { 1555 char *name; 1556 1557 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); 1558 1559 /* 1560 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1561 * opening anything that's not in our cwd or anything beneath 1562 * it. Thus, explicitly disallow traversing up the file-system 1563 * or using absolute paths. 1564 */ 1565 1566 name = *bufp + pos; 1567 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1568 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); 1569 return(ROFF_ERR); 1570 } 1571 1572 *offs = pos; 1573 return(ROFF_SO); 1574 } 1575 1576 /* ARGSUSED */ 1577 static enum rofferr 1578 roff_userdef(ROFF_ARGS) 1579 { 1580 const char *arg[9]; 1581 char *cp, *n1, *n2; 1582 int i; 1583 1584 /* 1585 * Collect pointers to macro argument strings 1586 * and null-terminate them. 1587 */ 1588 cp = *bufp + pos; 1589 for (i = 0; i < 9; i++) 1590 arg[i] = '\0' == *cp ? "" : 1591 mandoc_getarg(r->parse, &cp, ln, &pos); 1592 1593 /* 1594 * Expand macro arguments. 1595 */ 1596 *szp = 0; 1597 n1 = cp = mandoc_strdup(r->current_string); 1598 while (NULL != (cp = strstr(cp, "\\$"))) { 1599 i = cp[2] - '1'; 1600 if (0 > i || 8 < i) { 1601 /* Not an argument invocation. */ 1602 cp += 2; 1603 continue; 1604 } 1605 1606 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1607 n2 = mandoc_malloc(*szp); 1608 1609 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1610 strlcat(n2, arg[i], *szp); 1611 strlcat(n2, cp + 3, *szp); 1612 1613 cp = n2 + (cp - n1); 1614 free(n1); 1615 n1 = n2; 1616 } 1617 1618 /* 1619 * Replace the macro invocation 1620 * by the expanded macro. 1621 */ 1622 free(*bufp); 1623 *bufp = n1; 1624 if (0 == *szp) 1625 *szp = strlen(*bufp) + 1; 1626 1627 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1628 ROFF_REPARSE : ROFF_APPEND); 1629 } 1630 1631 static char * 1632 roff_getname(struct roff *r, char **cpp, int ln, int pos) 1633 { 1634 char *name, *cp; 1635 1636 name = *cpp; 1637 if ('\0' == *name) 1638 return(name); 1639 1640 /* Read until end of name. */ 1641 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { 1642 if ('\\' != *cp) 1643 continue; 1644 cp++; 1645 if ('\\' == *cp) 1646 continue; 1647 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); 1648 *cp = '\0'; 1649 name = cp; 1650 } 1651 1652 /* Nil-terminate name. */ 1653 if ('\0' != *cp) 1654 *(cp++) = '\0'; 1655 1656 /* Read past spaces. */ 1657 while (' ' == *cp) 1658 cp++; 1659 1660 *cpp = cp; 1661 return(name); 1662 } 1663 1664 /* 1665 * Store *string into the user-defined string called *name. 1666 * In multiline mode, append to an existing entry and append '\n'; 1667 * else replace the existing entry, if there is one. 1668 * To clear an existing entry, call with (*r, *name, NULL, 0). 1669 */ 1670 static void 1671 roff_setstr(struct roff *r, const char *name, const char *string, 1672 int multiline) 1673 { 1674 1675 roff_setstrn(&r->strtab, name, strlen(name), string, 1676 string ? strlen(string) : 0, multiline); 1677 } 1678 1679 static void 1680 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 1681 const char *string, size_t stringsz, int multiline) 1682 { 1683 struct roffkv *n; 1684 char *c; 1685 int i; 1686 size_t oldch, newch; 1687 1688 /* Search for an existing string with the same name. */ 1689 n = *r; 1690 1691 while (n && strcmp(name, n->key.p)) 1692 n = n->next; 1693 1694 if (NULL == n) { 1695 /* Create a new string table entry. */ 1696 n = mandoc_malloc(sizeof(struct roffkv)); 1697 n->key.p = mandoc_strndup(name, namesz); 1698 n->key.sz = namesz; 1699 n->val.p = NULL; 1700 n->val.sz = 0; 1701 n->next = *r; 1702 *r = n; 1703 } else if (0 == multiline) { 1704 /* In multiline mode, append; else replace. */ 1705 free(n->val.p); 1706 n->val.p = NULL; 1707 n->val.sz = 0; 1708 } 1709 1710 if (NULL == string) 1711 return; 1712 1713 /* 1714 * One additional byte for the '\n' in multiline mode, 1715 * and one for the terminating '\0'. 1716 */ 1717 newch = stringsz + (multiline ? 2u : 1u); 1718 1719 if (NULL == n->val.p) { 1720 n->val.p = mandoc_malloc(newch); 1721 *n->val.p = '\0'; 1722 oldch = 0; 1723 } else { 1724 oldch = n->val.sz; 1725 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 1726 } 1727 1728 /* Skip existing content in the destination buffer. */ 1729 c = n->val.p + (int)oldch; 1730 1731 /* Append new content to the destination buffer. */ 1732 i = 0; 1733 while (i < (int)stringsz) { 1734 /* 1735 * Rudimentary roff copy mode: 1736 * Handle escaped backslashes. 1737 */ 1738 if ('\\' == string[i] && '\\' == string[i + 1]) 1739 i++; 1740 *c++ = string[i++]; 1741 } 1742 1743 /* Append terminating bytes. */ 1744 if (multiline) 1745 *c++ = '\n'; 1746 1747 *c = '\0'; 1748 n->val.sz = (int)(c - n->val.p); 1749 } 1750 1751 static const char * 1752 roff_getstrn(const struct roff *r, const char *name, size_t len) 1753 { 1754 const struct roffkv *n; 1755 1756 for (n = r->strtab; n; n = n->next) 1757 if (0 == strncmp(name, n->key.p, len) && 1758 '\0' == n->key.p[(int)len]) 1759 return(n->val.p); 1760 1761 return(NULL); 1762 } 1763 1764 static void 1765 roff_freestr(struct roffkv *r) 1766 { 1767 struct roffkv *n, *nn; 1768 1769 for (n = r; n; n = nn) { 1770 free(n->key.p); 1771 free(n->val.p); 1772 nn = n->next; 1773 free(n); 1774 } 1775 } 1776 1777 const struct tbl_span * 1778 roff_span(const struct roff *r) 1779 { 1780 1781 return(r->tbl ? tbl_span(r->tbl) : NULL); 1782 } 1783 1784 const struct eqn * 1785 roff_eqn(const struct roff *r) 1786 { 1787 1788 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 1789 } 1790 1791 /* 1792 * Duplicate an input string, making the appropriate character 1793 * conversations (as stipulated by `tr') along the way. 1794 * Returns a heap-allocated string with all the replacements made. 1795 */ 1796 char * 1797 roff_strdup(const struct roff *r, const char *p) 1798 { 1799 const struct roffkv *cp; 1800 char *res; 1801 const char *pp; 1802 size_t ssz, sz; 1803 enum mandoc_esc esc; 1804 1805 if (NULL == r->xmbtab && NULL == r->xtab) 1806 return(mandoc_strdup(p)); 1807 else if ('\0' == *p) 1808 return(mandoc_strdup("")); 1809 1810 /* 1811 * Step through each character looking for term matches 1812 * (remember that a `tr' can be invoked with an escape, which is 1813 * a glyph but the escape is multi-character). 1814 * We only do this if the character hash has been initialised 1815 * and the string is >0 length. 1816 */ 1817 1818 res = NULL; 1819 ssz = 0; 1820 1821 while ('\0' != *p) { 1822 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 1823 sz = r->xtab[(int)*p].sz; 1824 res = mandoc_realloc(res, ssz + sz + 1); 1825 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 1826 ssz += sz; 1827 p++; 1828 continue; 1829 } else if ('\\' != *p) { 1830 res = mandoc_realloc(res, ssz + 2); 1831 res[ssz++] = *p++; 1832 continue; 1833 } 1834 1835 /* Search for term matches. */ 1836 for (cp = r->xmbtab; cp; cp = cp->next) 1837 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 1838 break; 1839 1840 if (NULL != cp) { 1841 /* 1842 * A match has been found. 1843 * Append the match to the array and move 1844 * forward by its keysize. 1845 */ 1846 res = mandoc_realloc 1847 (res, ssz + cp->val.sz + 1); 1848 memcpy(res + ssz, cp->val.p, cp->val.sz); 1849 ssz += cp->val.sz; 1850 p += (int)cp->key.sz; 1851 continue; 1852 } 1853 1854 /* 1855 * Handle escapes carefully: we need to copy 1856 * over just the escape itself, or else we might 1857 * do replacements within the escape itself. 1858 * Make sure to pass along the bogus string. 1859 */ 1860 pp = p++; 1861 esc = mandoc_escape(&p, NULL, NULL); 1862 if (ESCAPE_ERROR == esc) { 1863 sz = strlen(pp); 1864 res = mandoc_realloc(res, ssz + sz + 1); 1865 memcpy(res + ssz, pp, sz); 1866 break; 1867 } 1868 /* 1869 * We bail out on bad escapes. 1870 * No need to warn: we already did so when 1871 * roff_res() was called. 1872 */ 1873 sz = (int)(p - pp); 1874 res = mandoc_realloc(res, ssz + sz + 1); 1875 memcpy(res + ssz, pp, sz); 1876 ssz += sz; 1877 } 1878 1879 res[(int)ssz] = '\0'; 1880 return(res); 1881 } 1882 1883 /* 1884 * Find out whether a line is a macro line or not. 1885 * If it is, adjust the current position and return one; if it isn't, 1886 * return zero and don't change the current position. 1887 * If the control character has been set with `.cc', then let that grain 1888 * precedence. 1889 * This is slighly contrary to groff, where using the non-breaking 1890 * control character when `cc' has been invoked will cause the 1891 * non-breaking macro contents to be printed verbatim. 1892 */ 1893 int 1894 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 1895 { 1896 int pos; 1897 1898 pos = *ppos; 1899 1900 if (0 != r->control && cp[pos] == r->control) 1901 pos++; 1902 else if (0 != r->control) 1903 return(0); 1904 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 1905 pos += 2; 1906 else if ('.' == cp[pos] || '\'' == cp[pos]) 1907 pos++; 1908 else 1909 return(0); 1910 1911 while (' ' == cp[pos] || '\t' == cp[pos]) 1912 pos++; 1913 1914 *ppos = pos; 1915 return(1); 1916 } 1917