1 /* $Id: mdoc.c,v 1.223 2014/08/06 15:09:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <time.h> 31 32 #include "mdoc.h" 33 #include "mandoc.h" 34 #include "mandoc_aux.h" 35 #include "libmdoc.h" 36 #include "libmandoc.h" 37 38 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 39 "Ap", "Dd", "Dt", "Os", 40 "Sh", "Ss", "Pp", "D1", 41 "Dl", "Bd", "Ed", "Bl", 42 "El", "It", "Ad", "An", 43 "Ar", "Cd", "Cm", "Dv", 44 "Er", "Ev", "Ex", "Fa", 45 "Fd", "Fl", "Fn", "Ft", 46 "Ic", "In", "Li", "Nd", 47 "Nm", "Op", "Ot", "Pa", 48 "Rv", "St", "Va", "Vt", 49 "Xr", "%A", "%B", "%D", 50 "%I", "%J", "%N", "%O", 51 "%P", "%R", "%T", "%V", 52 "Ac", "Ao", "Aq", "At", 53 "Bc", "Bf", "Bo", "Bq", 54 "Bsx", "Bx", "Db", "Dc", 55 "Do", "Dq", "Ec", "Ef", 56 "Em", "Eo", "Fx", "Ms", 57 "No", "Ns", "Nx", "Ox", 58 "Pc", "Pf", "Po", "Pq", 59 "Qc", "Ql", "Qo", "Qq", 60 "Re", "Rs", "Sc", "So", 61 "Sq", "Sm", "Sx", "Sy", 62 "Tn", "Ux", "Xc", "Xo", 63 "Fo", "Fc", "Oo", "Oc", 64 "Bk", "Ek", "Bt", "Hf", 65 "Fr", "Ud", "Lb", "Lp", 66 "Lk", "Mt", "Brq", "Bro", 67 "Brc", "%C", "Es", "En", 68 "Dx", "%Q", "br", "sp", 69 "%U", "Ta", "ll", "text", 70 }; 71 72 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 73 "split", "nosplit", "ragged", 74 "unfilled", "literal", "file", 75 "offset", "bullet", "dash", 76 "hyphen", "item", "enum", 77 "tag", "diag", "hang", 78 "ohang", "inset", "column", 79 "width", "compact", "std", 80 "filled", "words", "emphasis", 81 "symbolic", "nested", "centered" 82 }; 83 84 const char * const *mdoc_macronames = __mdoc_macronames; 85 const char * const *mdoc_argnames = __mdoc_argnames; 86 87 static void mdoc_node_free(struct mdoc_node *); 88 static void mdoc_node_unlink(struct mdoc *, 89 struct mdoc_node *); 90 static void mdoc_free1(struct mdoc *); 91 static void mdoc_alloc1(struct mdoc *); 92 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 93 enum mdoct, enum mdoc_type); 94 static int node_append(struct mdoc *, 95 struct mdoc_node *); 96 #if 0 97 static int mdoc_preptext(struct mdoc *, int, char *, int); 98 #endif 99 static int mdoc_ptext(struct mdoc *, int, char *, int); 100 static int mdoc_pmacro(struct mdoc *, int, char *, int); 101 102 103 const struct mdoc_node * 104 mdoc_node(const struct mdoc *mdoc) 105 { 106 107 return(mdoc->first); 108 } 109 110 const struct mdoc_meta * 111 mdoc_meta(const struct mdoc *mdoc) 112 { 113 114 return(&mdoc->meta); 115 } 116 117 /* 118 * Frees volatile resources (parse tree, meta-data, fields). 119 */ 120 static void 121 mdoc_free1(struct mdoc *mdoc) 122 { 123 124 if (mdoc->first) 125 mdoc_node_delete(mdoc, mdoc->first); 126 free(mdoc->meta.msec); 127 free(mdoc->meta.vol); 128 free(mdoc->meta.arch); 129 free(mdoc->meta.date); 130 free(mdoc->meta.title); 131 free(mdoc->meta.os); 132 free(mdoc->meta.name); 133 } 134 135 /* 136 * Allocate all volatile resources (parse tree, meta-data, fields). 137 */ 138 static void 139 mdoc_alloc1(struct mdoc *mdoc) 140 { 141 142 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 143 mdoc->flags = 0; 144 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 145 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 146 mdoc->first = mdoc->last; 147 mdoc->last->type = MDOC_ROOT; 148 mdoc->last->tok = MDOC_MAX; 149 mdoc->next = MDOC_NEXT_CHILD; 150 } 151 152 /* 153 * Free up volatile resources (see mdoc_free1()) then re-initialises the 154 * data with mdoc_alloc1(). After invocation, parse data has been reset 155 * and the parser is ready for re-invocation on a new tree; however, 156 * cross-parse non-volatile data is kept intact. 157 */ 158 void 159 mdoc_reset(struct mdoc *mdoc) 160 { 161 162 mdoc_free1(mdoc); 163 mdoc_alloc1(mdoc); 164 } 165 166 /* 167 * Completely free up all volatile and non-volatile parse resources. 168 * After invocation, the pointer is no longer usable. 169 */ 170 void 171 mdoc_free(struct mdoc *mdoc) 172 { 173 174 mdoc_free1(mdoc); 175 free(mdoc); 176 } 177 178 /* 179 * Allocate volatile and non-volatile parse resources. 180 */ 181 struct mdoc * 182 mdoc_alloc(struct roff *roff, struct mparse *parse, 183 const char *defos, int quick) 184 { 185 struct mdoc *p; 186 187 p = mandoc_calloc(1, sizeof(struct mdoc)); 188 189 p->parse = parse; 190 p->defos = defos; 191 p->quick = quick; 192 p->roff = roff; 193 194 mdoc_hash_init(); 195 mdoc_alloc1(p); 196 return(p); 197 } 198 199 int 200 mdoc_endparse(struct mdoc *mdoc) 201 { 202 203 return(mdoc_macroend(mdoc)); 204 } 205 206 int 207 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 208 { 209 struct mdoc_node *n; 210 211 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 212 n->eqn = ep; 213 214 if ( ! node_append(mdoc, n)) 215 return(0); 216 217 mdoc->next = MDOC_NEXT_SIBLING; 218 return(1); 219 } 220 221 int 222 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 223 { 224 struct mdoc_node *n; 225 226 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 227 n->span = sp; 228 229 if ( ! node_append(mdoc, n)) 230 return(0); 231 232 mdoc->next = MDOC_NEXT_SIBLING; 233 return(1); 234 } 235 236 /* 237 * Main parse routine. Parses a single line -- really just hands off to 238 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 239 */ 240 int 241 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 242 { 243 244 mdoc->flags |= MDOC_NEWLINE; 245 246 /* 247 * Let the roff nS register switch SYNOPSIS mode early, 248 * such that the parser knows at all times 249 * whether this mode is on or off. 250 * Note that this mode is also switched by the Sh macro. 251 */ 252 if (roff_getreg(mdoc->roff, "nS")) 253 mdoc->flags |= MDOC_SYNOPSIS; 254 else 255 mdoc->flags &= ~MDOC_SYNOPSIS; 256 257 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 258 mdoc_pmacro(mdoc, ln, buf, offs) : 259 mdoc_ptext(mdoc, ln, buf, offs)); 260 } 261 262 int 263 mdoc_macro(MACRO_PROT_ARGS) 264 { 265 assert(tok < MDOC_MAX); 266 267 if (mdoc->flags & MDOC_PBODY) { 268 if (tok == MDOC_Dt) { 269 mandoc_vmsg(MANDOCERR_DT_LATE, 270 mdoc->parse, line, ppos, 271 "Dt %s", buf + *pos); 272 return(1); 273 } 274 } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) { 275 if (mdoc->meta.title == NULL) { 276 mandoc_vmsg(MANDOCERR_DT_NOTITLE, 277 mdoc->parse, line, ppos, "%s %s", 278 mdoc_macronames[tok], buf + *pos); 279 mdoc->meta.title = mandoc_strdup("UNTITLED"); 280 } 281 if (NULL == mdoc->meta.vol) 282 mdoc->meta.vol = mandoc_strdup("LOCAL"); 283 mdoc->flags |= MDOC_PBODY; 284 } 285 286 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 287 } 288 289 290 static int 291 node_append(struct mdoc *mdoc, struct mdoc_node *p) 292 { 293 294 assert(mdoc->last); 295 assert(mdoc->first); 296 assert(MDOC_ROOT != p->type); 297 298 switch (mdoc->next) { 299 case MDOC_NEXT_SIBLING: 300 mdoc->last->next = p; 301 p->prev = mdoc->last; 302 p->parent = mdoc->last->parent; 303 break; 304 case MDOC_NEXT_CHILD: 305 mdoc->last->child = p; 306 p->parent = mdoc->last; 307 break; 308 default: 309 abort(); 310 /* NOTREACHED */ 311 } 312 313 p->parent->nchild++; 314 315 /* 316 * Copy over the normalised-data pointer of our parent. Not 317 * everybody has one, but copying a null pointer is fine. 318 */ 319 320 switch (p->type) { 321 case MDOC_BODY: 322 if (ENDBODY_NOT != p->end) 323 break; 324 /* FALLTHROUGH */ 325 case MDOC_TAIL: 326 /* FALLTHROUGH */ 327 case MDOC_HEAD: 328 p->norm = p->parent->norm; 329 break; 330 default: 331 break; 332 } 333 334 if ( ! mdoc_valid_pre(mdoc, p)) 335 return(0); 336 337 switch (p->type) { 338 case MDOC_HEAD: 339 assert(MDOC_BLOCK == p->parent->type); 340 p->parent->head = p; 341 break; 342 case MDOC_TAIL: 343 assert(MDOC_BLOCK == p->parent->type); 344 p->parent->tail = p; 345 break; 346 case MDOC_BODY: 347 if (p->end) 348 break; 349 assert(MDOC_BLOCK == p->parent->type); 350 p->parent->body = p; 351 break; 352 default: 353 break; 354 } 355 356 mdoc->last = p; 357 358 switch (p->type) { 359 case MDOC_TBL: 360 /* FALLTHROUGH */ 361 case MDOC_TEXT: 362 if ( ! mdoc_valid_post(mdoc)) 363 return(0); 364 break; 365 default: 366 break; 367 } 368 369 return(1); 370 } 371 372 static struct mdoc_node * 373 node_alloc(struct mdoc *mdoc, int line, int pos, 374 enum mdoct tok, enum mdoc_type type) 375 { 376 struct mdoc_node *p; 377 378 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 379 p->sec = mdoc->lastsec; 380 p->line = line; 381 p->pos = pos; 382 p->lastline = line; 383 p->tok = tok; 384 p->type = type; 385 386 /* Flag analysis. */ 387 388 if (MDOC_SYNOPSIS & mdoc->flags) 389 p->flags |= MDOC_SYNPRETTY; 390 else 391 p->flags &= ~MDOC_SYNPRETTY; 392 if (MDOC_NEWLINE & mdoc->flags) 393 p->flags |= MDOC_LINE; 394 mdoc->flags &= ~MDOC_NEWLINE; 395 396 return(p); 397 } 398 399 int 400 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 401 { 402 struct mdoc_node *p; 403 404 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 405 if ( ! node_append(mdoc, p)) 406 return(0); 407 mdoc->next = MDOC_NEXT_CHILD; 408 return(1); 409 } 410 411 int 412 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 413 { 414 struct mdoc_node *p; 415 416 assert(mdoc->first); 417 assert(mdoc->last); 418 419 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 420 if ( ! node_append(mdoc, p)) 421 return(0); 422 mdoc->next = MDOC_NEXT_CHILD; 423 return(1); 424 } 425 426 int 427 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 428 { 429 struct mdoc_node *p; 430 431 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 432 if ( ! node_append(mdoc, p)) 433 return(0); 434 mdoc->next = MDOC_NEXT_CHILD; 435 return(1); 436 } 437 438 int 439 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 440 struct mdoc_node *body, enum mdoc_endbody end) 441 { 442 struct mdoc_node *p; 443 444 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 445 p->pending = body; 446 p->norm = body->norm; 447 p->end = end; 448 if ( ! node_append(mdoc, p)) 449 return(0); 450 mdoc->next = MDOC_NEXT_SIBLING; 451 return(1); 452 } 453 454 int 455 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 456 enum mdoct tok, struct mdoc_arg *args) 457 { 458 struct mdoc_node *p; 459 460 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 461 p->args = args; 462 if (p->args) 463 (args->refcnt)++; 464 465 switch (tok) { 466 case MDOC_Bd: 467 /* FALLTHROUGH */ 468 case MDOC_Bf: 469 /* FALLTHROUGH */ 470 case MDOC_Bl: 471 /* FALLTHROUGH */ 472 case MDOC_En: 473 /* FALLTHROUGH */ 474 case MDOC_Rs: 475 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 476 break; 477 default: 478 break; 479 } 480 481 if ( ! node_append(mdoc, p)) 482 return(0); 483 mdoc->next = MDOC_NEXT_CHILD; 484 return(1); 485 } 486 487 int 488 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 489 enum mdoct tok, struct mdoc_arg *args) 490 { 491 struct mdoc_node *p; 492 493 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 494 p->args = args; 495 if (p->args) 496 (args->refcnt)++; 497 498 switch (tok) { 499 case MDOC_An: 500 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 501 break; 502 default: 503 break; 504 } 505 506 if ( ! node_append(mdoc, p)) 507 return(0); 508 mdoc->next = MDOC_NEXT_CHILD; 509 return(1); 510 } 511 512 int 513 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 514 { 515 struct mdoc_node *n; 516 517 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 518 n->string = roff_strdup(mdoc->roff, p); 519 520 if ( ! node_append(mdoc, n)) 521 return(0); 522 523 mdoc->next = MDOC_NEXT_SIBLING; 524 return(1); 525 } 526 527 void 528 mdoc_word_append(struct mdoc *mdoc, const char *p) 529 { 530 struct mdoc_node *n; 531 char *addstr, *newstr; 532 533 n = mdoc->last; 534 addstr = roff_strdup(mdoc->roff, p); 535 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 536 free(addstr); 537 free(n->string); 538 n->string = newstr; 539 mdoc->next = MDOC_NEXT_SIBLING; 540 } 541 542 static void 543 mdoc_node_free(struct mdoc_node *p) 544 { 545 546 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 547 free(p->norm); 548 if (p->string) 549 free(p->string); 550 if (p->args) 551 mdoc_argv_free(p->args); 552 free(p); 553 } 554 555 static void 556 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 557 { 558 559 /* Adjust siblings. */ 560 561 if (n->prev) 562 n->prev->next = n->next; 563 if (n->next) 564 n->next->prev = n->prev; 565 566 /* Adjust parent. */ 567 568 if (n->parent) { 569 n->parent->nchild--; 570 if (n->parent->child == n) 571 n->parent->child = n->prev ? n->prev : n->next; 572 if (n->parent->last == n) 573 n->parent->last = n->prev ? n->prev : NULL; 574 } 575 576 /* Adjust parse point, if applicable. */ 577 578 if (mdoc && mdoc->last == n) { 579 if (n->prev) { 580 mdoc->last = n->prev; 581 mdoc->next = MDOC_NEXT_SIBLING; 582 } else { 583 mdoc->last = n->parent; 584 mdoc->next = MDOC_NEXT_CHILD; 585 } 586 } 587 588 if (mdoc && mdoc->first == n) 589 mdoc->first = NULL; 590 } 591 592 void 593 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 594 { 595 596 while (p->child) { 597 assert(p->nchild); 598 mdoc_node_delete(mdoc, p->child); 599 } 600 assert(0 == p->nchild); 601 602 mdoc_node_unlink(mdoc, p); 603 mdoc_node_free(p); 604 } 605 606 int 607 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 608 { 609 610 mdoc_node_unlink(mdoc, p); 611 return(node_append(mdoc, p)); 612 } 613 614 #if 0 615 /* 616 * Pre-treat a text line. 617 * Text lines can consist of equations, which must be handled apart from 618 * the regular text. 619 * Thus, use this function to step through a line checking if it has any 620 * equations embedded in it. 621 * This must handle multiple equations AND equations that do not end at 622 * the end-of-line, i.e., will re-enter in the next roff parse. 623 */ 624 static int 625 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 626 { 627 char *start, *end; 628 char delim; 629 630 while ('\0' != buf[offs]) { 631 /* Mark starting position if eqn is set. */ 632 start = NULL; 633 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 634 if (NULL != (start = strchr(buf + offs, delim))) 635 *start++ = '\0'; 636 637 /* Parse text as normal. */ 638 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 639 return(0); 640 641 /* Continue only if an equation exists. */ 642 if (NULL == start) 643 break; 644 645 /* Read past the end of the equation. */ 646 offs += start - (buf + offs); 647 assert(start == &buf[offs]); 648 if (NULL != (end = strchr(buf + offs, delim))) { 649 *end++ = '\0'; 650 while (' ' == *end) 651 end++; 652 } 653 654 /* Parse the equation itself. */ 655 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 656 657 /* Process a finished equation? */ 658 if (roff_closeeqn(mdoc->roff)) 659 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 660 return(0); 661 offs += (end - (buf + offs)); 662 } 663 664 return(1); 665 } 666 #endif 667 668 /* 669 * Parse free-form text, that is, a line that does not begin with the 670 * control character. 671 */ 672 static int 673 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 674 { 675 char *c, *ws, *end; 676 struct mdoc_node *n; 677 678 assert(mdoc->last); 679 n = mdoc->last; 680 681 /* 682 * Divert directly to list processing if we're encountering a 683 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 684 * (a MDOC_BODY means it's already open, in which case we should 685 * process within its context in the normal way). 686 */ 687 688 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 689 LIST_column == n->norm->Bl.type) { 690 /* `Bl' is open without any children. */ 691 mdoc->flags |= MDOC_FREECOL; 692 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 693 } 694 695 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 696 NULL != n->parent && 697 MDOC_Bl == n->parent->tok && 698 LIST_column == n->parent->norm->Bl.type) { 699 /* `Bl' has block-level `It' children. */ 700 mdoc->flags |= MDOC_FREECOL; 701 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 702 } 703 704 /* 705 * Search for the beginning of unescaped trailing whitespace (ws) 706 * and for the first character not to be output (end). 707 */ 708 709 /* FIXME: replace with strcspn(). */ 710 ws = NULL; 711 for (c = end = buf + offs; *c; c++) { 712 switch (*c) { 713 case ' ': 714 if (NULL == ws) 715 ws = c; 716 continue; 717 case '\t': 718 /* 719 * Always warn about trailing tabs, 720 * even outside literal context, 721 * where they should be put on the next line. 722 */ 723 if (NULL == ws) 724 ws = c; 725 /* 726 * Strip trailing tabs in literal context only; 727 * outside, they affect the next line. 728 */ 729 if (MDOC_LITERAL & mdoc->flags) 730 continue; 731 break; 732 case '\\': 733 /* Skip the escaped character, too, if any. */ 734 if (c[1]) 735 c++; 736 /* FALLTHROUGH */ 737 default: 738 ws = NULL; 739 break; 740 } 741 end = c + 1; 742 } 743 *end = '\0'; 744 745 if (ws) 746 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 747 line, (int)(ws-buf), NULL); 748 749 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 750 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 751 line, (int)(c - buf), NULL); 752 753 /* 754 * Insert a `sp' in the case of a blank line. Technically, 755 * blank lines aren't allowed, but enough manuals assume this 756 * behaviour that we want to work around it. 757 */ 758 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 759 return(0); 760 761 mdoc->next = MDOC_NEXT_SIBLING; 762 763 return(mdoc_valid_post(mdoc)); 764 } 765 766 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 767 return(0); 768 769 if (MDOC_LITERAL & mdoc->flags) 770 return(1); 771 772 /* 773 * End-of-sentence check. If the last character is an unescaped 774 * EOS character, then flag the node as being the end of a 775 * sentence. The front-end will know how to interpret this. 776 */ 777 778 assert(buf < end); 779 780 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 781 mdoc->last->flags |= MDOC_EOS; 782 783 return(1); 784 } 785 786 /* 787 * Parse a macro line, that is, a line beginning with the control 788 * character. 789 */ 790 static int 791 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 792 { 793 enum mdoct tok; 794 int i, sv; 795 char mac[5]; 796 struct mdoc_node *n; 797 798 /* Empty post-control lines are ignored. */ 799 800 if ('"' == buf[offs]) { 801 mandoc_msg(MANDOCERR_COMMENT_BAD, mdoc->parse, 802 ln, offs, NULL); 803 return(1); 804 } else if ('\0' == buf[offs]) 805 return(1); 806 807 sv = offs; 808 809 /* 810 * Copy the first word into a nil-terminated buffer. 811 * Stop copying when a tab, space, or eoln is encountered. 812 */ 813 814 i = 0; 815 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] && 816 '\t' != buf[offs]) 817 mac[i++] = buf[offs++]; 818 819 mac[i] = '\0'; 820 821 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 822 823 if (MDOC_MAX == tok) { 824 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 825 ln, sv, buf + sv - 1); 826 return(1); 827 } 828 829 /* Disregard the first trailing tab, if applicable. */ 830 831 if ('\t' == buf[offs]) 832 offs++; 833 834 /* Jump to the next non-whitespace word. */ 835 836 while (buf[offs] && ' ' == buf[offs]) 837 offs++; 838 839 /* 840 * Trailing whitespace. Note that tabs are allowed to be passed 841 * into the parser as "text", so we only warn about spaces here. 842 */ 843 844 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 845 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 846 ln, offs - 1, NULL); 847 848 /* 849 * If an initial macro or a list invocation, divert directly 850 * into macro processing. 851 */ 852 853 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) 854 return(mdoc_macro(mdoc, tok, ln, sv, &offs, buf)); 855 856 n = mdoc->last; 857 assert(mdoc->last); 858 859 /* 860 * If the first macro of a `Bl -column', open an `It' block 861 * context around the parsed macro. 862 */ 863 864 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 865 LIST_column == n->norm->Bl.type) { 866 mdoc->flags |= MDOC_FREECOL; 867 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); 868 } 869 870 /* 871 * If we're following a block-level `It' within a `Bl -column' 872 * context (perhaps opened in the above block or in ptext()), 873 * then open an `It' block context around the parsed macro. 874 */ 875 876 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 877 NULL != n->parent && 878 MDOC_Bl == n->parent->tok && 879 LIST_column == n->parent->norm->Bl.type) { 880 mdoc->flags |= MDOC_FREECOL; 881 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); 882 } 883 884 /* Normal processing of a macro. */ 885 886 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 887 return(0); 888 889 /* In quick mode (for mandocdb), abort after the NAME section. */ 890 891 if (mdoc->quick && MDOC_Sh == tok && 892 SEC_NAME != mdoc->last->sec) 893 return(2); 894 895 return(1); 896 } 897 898 enum mdelim 899 mdoc_isdelim(const char *p) 900 { 901 902 if ('\0' == p[0]) 903 return(DELIM_NONE); 904 905 if ('\0' == p[1]) 906 switch (p[0]) { 907 case '(': 908 /* FALLTHROUGH */ 909 case '[': 910 return(DELIM_OPEN); 911 case '|': 912 return(DELIM_MIDDLE); 913 case '.': 914 /* FALLTHROUGH */ 915 case ',': 916 /* FALLTHROUGH */ 917 case ';': 918 /* FALLTHROUGH */ 919 case ':': 920 /* FALLTHROUGH */ 921 case '?': 922 /* FALLTHROUGH */ 923 case '!': 924 /* FALLTHROUGH */ 925 case ')': 926 /* FALLTHROUGH */ 927 case ']': 928 return(DELIM_CLOSE); 929 default: 930 return(DELIM_NONE); 931 } 932 933 if ('\\' != p[0]) 934 return(DELIM_NONE); 935 936 if (0 == strcmp(p + 1, ".")) 937 return(DELIM_CLOSE); 938 if (0 == strcmp(p + 1, "fR|\\fP")) 939 return(DELIM_MIDDLE); 940 941 return(DELIM_NONE); 942 } 943 944 void 945 mdoc_deroff(char **dest, const struct mdoc_node *n) 946 { 947 char *cp; 948 size_t sz; 949 950 if (MDOC_TEXT != n->type) { 951 for (n = n->child; n; n = n->next) 952 mdoc_deroff(dest, n); 953 return; 954 } 955 956 /* Skip leading whitespace. */ 957 958 for (cp = n->string; '\0' != *cp; cp++) 959 if (0 == isspace((unsigned char)*cp)) 960 break; 961 962 /* Skip trailing whitespace. */ 963 964 for (sz = strlen(cp); sz; sz--) 965 if (0 == isspace((unsigned char)cp[sz-1])) 966 break; 967 968 /* Skip empty strings. */ 969 970 if (0 == sz) 971 return; 972 973 if (NULL == *dest) { 974 *dest = mandoc_strndup(cp, sz); 975 return; 976 } 977 978 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 979 free(*dest); 980 *dest = cp; 981 } 982