1 /* $Id: mdoc.c,v 1.203 2012/11/17 00:26:33 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "mdoc.h" 32 #include "mandoc.h" 33 #include "libmdoc.h" 34 #include "libmandoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX] = { 37 "Ap", "Dd", "Dt", "Os", 38 "Sh", "Ss", "Pp", "D1", 39 "Dl", "Bd", "Ed", "Bl", 40 "El", "It", "Ad", "An", 41 "Ar", "Cd", "Cm", "Dv", 42 "Er", "Ev", "Ex", "Fa", 43 "Fd", "Fl", "Fn", "Ft", 44 "Ic", "In", "Li", "Nd", 45 "Nm", "Op", "Ot", "Pa", 46 "Rv", "St", "Va", "Vt", 47 /* LINTED */ 48 "Xr", "%A", "%B", "%D", 49 /* LINTED */ 50 "%I", "%J", "%N", "%O", 51 /* LINTED */ 52 "%P", "%R", "%T", "%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Lp", 67 "Lk", "Mt", "Brq", "Bro", 68 /* LINTED */ 69 "Brc", "%C", "Es", "En", 70 /* LINTED */ 71 "Dx", "%Q", "br", "sp", 72 /* LINTED */ 73 "%U", "Ta" 74 }; 75 76 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77 "split", "nosplit", "ragged", 78 "unfilled", "literal", "file", 79 "offset", "bullet", "dash", 80 "hyphen", "item", "enum", 81 "tag", "diag", "hang", 82 "ohang", "inset", "column", 83 "width", "compact", "std", 84 "filled", "words", "emphasis", 85 "symbolic", "nested", "centered" 86 }; 87 88 const char * const *mdoc_macronames = __mdoc_macronames; 89 const char * const *mdoc_argnames = __mdoc_argnames; 90 91 static void mdoc_node_free(struct mdoc_node *); 92 static void mdoc_node_unlink(struct mdoc *, 93 struct mdoc_node *); 94 static void mdoc_free1(struct mdoc *); 95 static void mdoc_alloc1(struct mdoc *); 96 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 97 enum mdoct, enum mdoc_type); 98 static int node_append(struct mdoc *, 99 struct mdoc_node *); 100 #if 0 101 static int mdoc_preptext(struct mdoc *, int, char *, int); 102 #endif 103 static int mdoc_ptext(struct mdoc *, int, char *, int); 104 static int mdoc_pmacro(struct mdoc *, int, char *, int); 105 106 const struct mdoc_node * 107 mdoc_node(const struct mdoc *mdoc) 108 { 109 110 assert( ! (MDOC_HALT & mdoc->flags)); 111 return(mdoc->first); 112 } 113 114 115 const struct mdoc_meta * 116 mdoc_meta(const struct mdoc *mdoc) 117 { 118 119 assert( ! (MDOC_HALT & mdoc->flags)); 120 return(&mdoc->meta); 121 } 122 123 124 /* 125 * Frees volatile resources (parse tree, meta-data, fields). 126 */ 127 static void 128 mdoc_free1(struct mdoc *mdoc) 129 { 130 131 if (mdoc->first) 132 mdoc_node_delete(mdoc, mdoc->first); 133 if (mdoc->meta.title) 134 free(mdoc->meta.title); 135 if (mdoc->meta.os) 136 free(mdoc->meta.os); 137 if (mdoc->meta.name) 138 free(mdoc->meta.name); 139 if (mdoc->meta.arch) 140 free(mdoc->meta.arch); 141 if (mdoc->meta.vol) 142 free(mdoc->meta.vol); 143 if (mdoc->meta.msec) 144 free(mdoc->meta.msec); 145 if (mdoc->meta.date) 146 free(mdoc->meta.date); 147 } 148 149 150 /* 151 * Allocate all volatile resources (parse tree, meta-data, fields). 152 */ 153 static void 154 mdoc_alloc1(struct mdoc *mdoc) 155 { 156 157 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 158 mdoc->flags = 0; 159 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 160 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 161 mdoc->first = mdoc->last; 162 mdoc->last->type = MDOC_ROOT; 163 mdoc->last->tok = MDOC_MAX; 164 mdoc->next = MDOC_NEXT_CHILD; 165 } 166 167 168 /* 169 * Free up volatile resources (see mdoc_free1()) then re-initialises the 170 * data with mdoc_alloc1(). After invocation, parse data has been reset 171 * and the parser is ready for re-invocation on a new tree; however, 172 * cross-parse non-volatile data is kept intact. 173 */ 174 void 175 mdoc_reset(struct mdoc *mdoc) 176 { 177 178 mdoc_free1(mdoc); 179 mdoc_alloc1(mdoc); 180 } 181 182 183 /* 184 * Completely free up all volatile and non-volatile parse resources. 185 * After invocation, the pointer is no longer usable. 186 */ 187 void 188 mdoc_free(struct mdoc *mdoc) 189 { 190 191 mdoc_free1(mdoc); 192 free(mdoc); 193 } 194 195 196 /* 197 * Allocate volatile and non-volatile parse resources. 198 */ 199 struct mdoc * 200 mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) 201 { 202 struct mdoc *p; 203 204 p = mandoc_calloc(1, sizeof(struct mdoc)); 205 206 p->parse = parse; 207 p->defos = defos; 208 p->roff = roff; 209 210 mdoc_hash_init(); 211 mdoc_alloc1(p); 212 return(p); 213 } 214 215 216 /* 217 * Climb back up the parse tree, validating open scopes. Mostly calls 218 * through to macro_end() in macro.c. 219 */ 220 int 221 mdoc_endparse(struct mdoc *mdoc) 222 { 223 224 assert( ! (MDOC_HALT & mdoc->flags)); 225 if (mdoc_macroend(mdoc)) 226 return(1); 227 mdoc->flags |= MDOC_HALT; 228 return(0); 229 } 230 231 int 232 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 233 { 234 struct mdoc_node *n; 235 236 assert( ! (MDOC_HALT & mdoc->flags)); 237 238 /* No text before an initial macro. */ 239 240 if (SEC_NONE == mdoc->lastnamed) { 241 mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT); 242 return(1); 243 } 244 245 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 246 n->eqn = ep; 247 248 if ( ! node_append(mdoc, n)) 249 return(0); 250 251 mdoc->next = MDOC_NEXT_SIBLING; 252 return(1); 253 } 254 255 int 256 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 257 { 258 struct mdoc_node *n; 259 260 assert( ! (MDOC_HALT & mdoc->flags)); 261 262 /* No text before an initial macro. */ 263 264 if (SEC_NONE == mdoc->lastnamed) { 265 mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT); 266 return(1); 267 } 268 269 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 270 n->span = sp; 271 272 if ( ! node_append(mdoc, n)) 273 return(0); 274 275 mdoc->next = MDOC_NEXT_SIBLING; 276 return(1); 277 } 278 279 280 /* 281 * Main parse routine. Parses a single line -- really just hands off to 282 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 283 */ 284 int 285 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 286 { 287 288 assert( ! (MDOC_HALT & mdoc->flags)); 289 290 mdoc->flags |= MDOC_NEWLINE; 291 292 /* 293 * Let the roff nS register switch SYNOPSIS mode early, 294 * such that the parser knows at all times 295 * whether this mode is on or off. 296 * Note that this mode is also switched by the Sh macro. 297 */ 298 if (roff_regisset(mdoc->roff, REG_nS)) { 299 if (roff_regget(mdoc->roff, REG_nS)) 300 mdoc->flags |= MDOC_SYNOPSIS; 301 else 302 mdoc->flags &= ~MDOC_SYNOPSIS; 303 } 304 305 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 306 mdoc_pmacro(mdoc, ln, buf, offs) : 307 mdoc_ptext(mdoc, ln, buf, offs)); 308 } 309 310 int 311 mdoc_macro(MACRO_PROT_ARGS) 312 { 313 assert(tok < MDOC_MAX); 314 315 /* If we're in the body, deny prologue calls. */ 316 317 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 318 MDOC_PBODY & mdoc->flags) { 319 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY); 320 return(1); 321 } 322 323 /* If we're in the prologue, deny "body" macros. */ 324 325 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 326 ! (MDOC_PBODY & mdoc->flags)) { 327 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG); 328 if (NULL == mdoc->meta.msec) 329 mdoc->meta.msec = mandoc_strdup("1"); 330 if (NULL == mdoc->meta.title) 331 mdoc->meta.title = mandoc_strdup("UNKNOWN"); 332 if (NULL == mdoc->meta.vol) 333 mdoc->meta.vol = mandoc_strdup("LOCAL"); 334 if (NULL == mdoc->meta.os) 335 mdoc->meta.os = mandoc_strdup("LOCAL"); 336 if (NULL == mdoc->meta.date) 337 mdoc->meta.date = mandoc_normdate 338 (mdoc->parse, NULL, line, ppos); 339 mdoc->flags |= MDOC_PBODY; 340 } 341 342 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 343 } 344 345 346 static int 347 node_append(struct mdoc *mdoc, struct mdoc_node *p) 348 { 349 350 assert(mdoc->last); 351 assert(mdoc->first); 352 assert(MDOC_ROOT != p->type); 353 354 switch (mdoc->next) { 355 case (MDOC_NEXT_SIBLING): 356 mdoc->last->next = p; 357 p->prev = mdoc->last; 358 p->parent = mdoc->last->parent; 359 break; 360 case (MDOC_NEXT_CHILD): 361 mdoc->last->child = p; 362 p->parent = mdoc->last; 363 break; 364 default: 365 abort(); 366 /* NOTREACHED */ 367 } 368 369 p->parent->nchild++; 370 371 /* 372 * Copy over the normalised-data pointer of our parent. Not 373 * everybody has one, but copying a null pointer is fine. 374 */ 375 376 switch (p->type) { 377 case (MDOC_BODY): 378 if (ENDBODY_NOT != p->end) 379 break; 380 /* FALLTHROUGH */ 381 case (MDOC_TAIL): 382 /* FALLTHROUGH */ 383 case (MDOC_HEAD): 384 p->norm = p->parent->norm; 385 break; 386 default: 387 break; 388 } 389 390 if ( ! mdoc_valid_pre(mdoc, p)) 391 return(0); 392 393 switch (p->type) { 394 case (MDOC_HEAD): 395 assert(MDOC_BLOCK == p->parent->type); 396 p->parent->head = p; 397 break; 398 case (MDOC_TAIL): 399 assert(MDOC_BLOCK == p->parent->type); 400 p->parent->tail = p; 401 break; 402 case (MDOC_BODY): 403 if (p->end) 404 break; 405 assert(MDOC_BLOCK == p->parent->type); 406 p->parent->body = p; 407 break; 408 default: 409 break; 410 } 411 412 mdoc->last = p; 413 414 switch (p->type) { 415 case (MDOC_TBL): 416 /* FALLTHROUGH */ 417 case (MDOC_TEXT): 418 if ( ! mdoc_valid_post(mdoc)) 419 return(0); 420 break; 421 default: 422 break; 423 } 424 425 return(1); 426 } 427 428 429 static struct mdoc_node * 430 node_alloc(struct mdoc *mdoc, int line, int pos, 431 enum mdoct tok, enum mdoc_type type) 432 { 433 struct mdoc_node *p; 434 435 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 436 p->sec = mdoc->lastsec; 437 p->line = line; 438 p->pos = pos; 439 p->tok = tok; 440 p->type = type; 441 442 /* Flag analysis. */ 443 444 if (MDOC_SYNOPSIS & mdoc->flags) 445 p->flags |= MDOC_SYNPRETTY; 446 else 447 p->flags &= ~MDOC_SYNPRETTY; 448 if (MDOC_NEWLINE & mdoc->flags) 449 p->flags |= MDOC_LINE; 450 mdoc->flags &= ~MDOC_NEWLINE; 451 452 return(p); 453 } 454 455 456 int 457 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 458 { 459 struct mdoc_node *p; 460 461 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 462 if ( ! node_append(mdoc, p)) 463 return(0); 464 mdoc->next = MDOC_NEXT_CHILD; 465 return(1); 466 } 467 468 469 int 470 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 471 { 472 struct mdoc_node *p; 473 474 assert(mdoc->first); 475 assert(mdoc->last); 476 477 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 478 if ( ! node_append(mdoc, p)) 479 return(0); 480 mdoc->next = MDOC_NEXT_CHILD; 481 return(1); 482 } 483 484 485 int 486 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 487 { 488 struct mdoc_node *p; 489 490 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 491 if ( ! node_append(mdoc, p)) 492 return(0); 493 mdoc->next = MDOC_NEXT_CHILD; 494 return(1); 495 } 496 497 498 int 499 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 500 struct mdoc_node *body, enum mdoc_endbody end) 501 { 502 struct mdoc_node *p; 503 504 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 505 p->pending = body; 506 p->norm = body->norm; 507 p->end = end; 508 if ( ! node_append(mdoc, p)) 509 return(0); 510 mdoc->next = MDOC_NEXT_SIBLING; 511 return(1); 512 } 513 514 515 int 516 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 517 enum mdoct tok, struct mdoc_arg *args) 518 { 519 struct mdoc_node *p; 520 521 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 522 p->args = args; 523 if (p->args) 524 (args->refcnt)++; 525 526 switch (tok) { 527 case (MDOC_Bd): 528 /* FALLTHROUGH */ 529 case (MDOC_Bf): 530 /* FALLTHROUGH */ 531 case (MDOC_Bl): 532 /* FALLTHROUGH */ 533 case (MDOC_Rs): 534 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 535 break; 536 default: 537 break; 538 } 539 540 if ( ! node_append(mdoc, p)) 541 return(0); 542 mdoc->next = MDOC_NEXT_CHILD; 543 return(1); 544 } 545 546 547 int 548 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 549 enum mdoct tok, struct mdoc_arg *args) 550 { 551 struct mdoc_node *p; 552 553 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 554 p->args = args; 555 if (p->args) 556 (args->refcnt)++; 557 558 switch (tok) { 559 case (MDOC_An): 560 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 561 break; 562 default: 563 break; 564 } 565 566 if ( ! node_append(mdoc, p)) 567 return(0); 568 mdoc->next = MDOC_NEXT_CHILD; 569 return(1); 570 } 571 572 int 573 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 574 { 575 struct mdoc_node *n; 576 577 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 578 n->string = roff_strdup(mdoc->roff, p); 579 580 if ( ! node_append(mdoc, n)) 581 return(0); 582 583 mdoc->next = MDOC_NEXT_SIBLING; 584 return(1); 585 } 586 587 588 static void 589 mdoc_node_free(struct mdoc_node *p) 590 { 591 592 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 593 free(p->norm); 594 if (p->string) 595 free(p->string); 596 if (p->args) 597 mdoc_argv_free(p->args); 598 free(p); 599 } 600 601 602 static void 603 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 604 { 605 606 /* Adjust siblings. */ 607 608 if (n->prev) 609 n->prev->next = n->next; 610 if (n->next) 611 n->next->prev = n->prev; 612 613 /* Adjust parent. */ 614 615 if (n->parent) { 616 n->parent->nchild--; 617 if (n->parent->child == n) 618 n->parent->child = n->prev ? n->prev : n->next; 619 if (n->parent->last == n) 620 n->parent->last = n->prev ? n->prev : NULL; 621 } 622 623 /* Adjust parse point, if applicable. */ 624 625 if (mdoc && mdoc->last == n) { 626 if (n->prev) { 627 mdoc->last = n->prev; 628 mdoc->next = MDOC_NEXT_SIBLING; 629 } else { 630 mdoc->last = n->parent; 631 mdoc->next = MDOC_NEXT_CHILD; 632 } 633 } 634 635 if (mdoc && mdoc->first == n) 636 mdoc->first = NULL; 637 } 638 639 640 void 641 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 642 { 643 644 while (p->child) { 645 assert(p->nchild); 646 mdoc_node_delete(mdoc, p->child); 647 } 648 assert(0 == p->nchild); 649 650 mdoc_node_unlink(mdoc, p); 651 mdoc_node_free(p); 652 } 653 654 int 655 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 656 { 657 658 mdoc_node_unlink(mdoc, p); 659 return(node_append(mdoc, p)); 660 } 661 662 #if 0 663 /* 664 * Pre-treat a text line. 665 * Text lines can consist of equations, which must be handled apart from 666 * the regular text. 667 * Thus, use this function to step through a line checking if it has any 668 * equations embedded in it. 669 * This must handle multiple equations AND equations that do not end at 670 * the end-of-line, i.e., will re-enter in the next roff parse. 671 */ 672 static int 673 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 674 { 675 char *start, *end; 676 char delim; 677 678 while ('\0' != buf[offs]) { 679 /* Mark starting position if eqn is set. */ 680 start = NULL; 681 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 682 if (NULL != (start = strchr(buf + offs, delim))) 683 *start++ = '\0'; 684 685 /* Parse text as normal. */ 686 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 687 return(0); 688 689 /* Continue only if an equation exists. */ 690 if (NULL == start) 691 break; 692 693 /* Read past the end of the equation. */ 694 offs += start - (buf + offs); 695 assert(start == &buf[offs]); 696 if (NULL != (end = strchr(buf + offs, delim))) { 697 *end++ = '\0'; 698 while (' ' == *end) 699 end++; 700 } 701 702 /* Parse the equation itself. */ 703 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 704 705 /* Process a finished equation? */ 706 if (roff_closeeqn(mdoc->roff)) 707 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 708 return(0); 709 offs += (end - (buf + offs)); 710 } 711 712 return(1); 713 } 714 #endif 715 716 /* 717 * Parse free-form text, that is, a line that does not begin with the 718 * control character. 719 */ 720 static int 721 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 722 { 723 char *c, *ws, *end; 724 struct mdoc_node *n; 725 726 /* No text before an initial macro. */ 727 728 if (SEC_NONE == mdoc->lastnamed) { 729 mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT); 730 return(1); 731 } 732 733 assert(mdoc->last); 734 n = mdoc->last; 735 736 /* 737 * Divert directly to list processing if we're encountering a 738 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 739 * (a MDOC_BODY means it's already open, in which case we should 740 * process within its context in the normal way). 741 */ 742 743 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 744 LIST_column == n->norm->Bl.type) { 745 /* `Bl' is open without any children. */ 746 mdoc->flags |= MDOC_FREECOL; 747 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 748 } 749 750 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 751 NULL != n->parent && 752 MDOC_Bl == n->parent->tok && 753 LIST_column == n->parent->norm->Bl.type) { 754 /* `Bl' has block-level `It' children. */ 755 mdoc->flags |= MDOC_FREECOL; 756 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 757 } 758 759 /* 760 * Search for the beginning of unescaped trailing whitespace (ws) 761 * and for the first character not to be output (end). 762 */ 763 764 /* FIXME: replace with strcspn(). */ 765 ws = NULL; 766 for (c = end = buf + offs; *c; c++) { 767 switch (*c) { 768 case ' ': 769 if (NULL == ws) 770 ws = c; 771 continue; 772 case '\t': 773 /* 774 * Always warn about trailing tabs, 775 * even outside literal context, 776 * where they should be put on the next line. 777 */ 778 if (NULL == ws) 779 ws = c; 780 /* 781 * Strip trailing tabs in literal context only; 782 * outside, they affect the next line. 783 */ 784 if (MDOC_LITERAL & mdoc->flags) 785 continue; 786 break; 787 case '\\': 788 /* Skip the escaped character, too, if any. */ 789 if (c[1]) 790 c++; 791 /* FALLTHROUGH */ 792 default: 793 ws = NULL; 794 break; 795 } 796 end = c + 1; 797 } 798 *end = '\0'; 799 800 if (ws) 801 mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 802 803 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 804 mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 805 806 /* 807 * Insert a `sp' in the case of a blank line. Technically, 808 * blank lines aren't allowed, but enough manuals assume this 809 * behaviour that we want to work around it. 810 */ 811 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 812 return(0); 813 814 mdoc->next = MDOC_NEXT_SIBLING; 815 816 return(mdoc_valid_post(mdoc)); 817 } 818 819 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 820 return(0); 821 822 if (MDOC_LITERAL & mdoc->flags) 823 return(1); 824 825 /* 826 * End-of-sentence check. If the last character is an unescaped 827 * EOS character, then flag the node as being the end of a 828 * sentence. The front-end will know how to interpret this. 829 */ 830 831 assert(buf < end); 832 833 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 834 mdoc->last->flags |= MDOC_EOS; 835 836 return(1); 837 } 838 839 840 /* 841 * Parse a macro line, that is, a line beginning with the control 842 * character. 843 */ 844 static int 845 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 846 { 847 enum mdoct tok; 848 int i, sv; 849 char mac[5]; 850 struct mdoc_node *n; 851 852 /* Empty post-control lines are ignored. */ 853 854 if ('"' == buf[offs]) { 855 mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT); 856 return(1); 857 } else if ('\0' == buf[offs]) 858 return(1); 859 860 sv = offs; 861 862 /* 863 * Copy the first word into a nil-terminated buffer. 864 * Stop copying when a tab, space, or eoln is encountered. 865 */ 866 867 i = 0; 868 while (i < 4 && '\0' != buf[offs] && 869 ' ' != buf[offs] && '\t' != buf[offs]) 870 mac[i++] = buf[offs++]; 871 872 mac[i] = '\0'; 873 874 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 875 876 if (MDOC_MAX == tok) { 877 mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 878 ln, sv, "%s", buf + sv - 1); 879 return(1); 880 } 881 882 /* Disregard the first trailing tab, if applicable. */ 883 884 if ('\t' == buf[offs]) 885 offs++; 886 887 /* Jump to the next non-whitespace word. */ 888 889 while (buf[offs] && ' ' == buf[offs]) 890 offs++; 891 892 /* 893 * Trailing whitespace. Note that tabs are allowed to be passed 894 * into the parser as "text", so we only warn about spaces here. 895 */ 896 897 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 898 mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE); 899 900 /* 901 * If an initial macro or a list invocation, divert directly 902 * into macro processing. 903 */ 904 905 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 906 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 907 goto err; 908 return(1); 909 } 910 911 n = mdoc->last; 912 assert(mdoc->last); 913 914 /* 915 * If the first macro of a `Bl -column', open an `It' block 916 * context around the parsed macro. 917 */ 918 919 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 920 LIST_column == n->norm->Bl.type) { 921 mdoc->flags |= MDOC_FREECOL; 922 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 923 goto err; 924 return(1); 925 } 926 927 /* 928 * If we're following a block-level `It' within a `Bl -column' 929 * context (perhaps opened in the above block or in ptext()), 930 * then open an `It' block context around the parsed macro. 931 */ 932 933 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 934 NULL != n->parent && 935 MDOC_Bl == n->parent->tok && 936 LIST_column == n->parent->norm->Bl.type) { 937 mdoc->flags |= MDOC_FREECOL; 938 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 939 goto err; 940 return(1); 941 } 942 943 /* Normal processing of a macro. */ 944 945 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 946 goto err; 947 948 return(1); 949 950 err: /* Error out. */ 951 952 mdoc->flags |= MDOC_HALT; 953 return(0); 954 } 955 956 enum mdelim 957 mdoc_isdelim(const char *p) 958 { 959 960 if ('\0' == p[0]) 961 return(DELIM_NONE); 962 963 if ('\0' == p[1]) 964 switch (p[0]) { 965 case('('): 966 /* FALLTHROUGH */ 967 case('['): 968 return(DELIM_OPEN); 969 case('|'): 970 return(DELIM_MIDDLE); 971 case('.'): 972 /* FALLTHROUGH */ 973 case(','): 974 /* FALLTHROUGH */ 975 case(';'): 976 /* FALLTHROUGH */ 977 case(':'): 978 /* FALLTHROUGH */ 979 case('?'): 980 /* FALLTHROUGH */ 981 case('!'): 982 /* FALLTHROUGH */ 983 case(')'): 984 /* FALLTHROUGH */ 985 case(']'): 986 return(DELIM_CLOSE); 987 default: 988 return(DELIM_NONE); 989 } 990 991 if ('\\' != p[0]) 992 return(DELIM_NONE); 993 994 if (0 == strcmp(p + 1, ".")) 995 return(DELIM_CLOSE); 996 if (0 == strcmp(p + 1, "fR|\\fP")) 997 return(DELIM_MIDDLE); 998 999 return(DELIM_NONE); 1000 } 1001