1 /* $Id: mdoc.c,v 1.27 2009/09/21 21:11:37 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <assert.h> 18 #include <ctype.h> 19 #include <stdarg.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 24 #include "libmdoc.h" 25 26 const char *const __mdoc_merrnames[MERRMAX] = { 27 "trailing whitespace", /* ETAILWS */ 28 "unexpected quoted parameter", /* EQUOTPARM */ 29 "unterminated quoted parameter", /* EQUOTTERM */ 30 "system: malloc error", /* EMALLOC */ 31 "argument parameter suggested", /* EARGVAL */ 32 "macro disallowed in prologue", /* EBODYPROL */ 33 "macro disallowed in body", /* EPROLBODY */ 34 "text disallowed in prologue", /* ETEXTPROL */ 35 "blank line disallowed", /* ENOBLANK */ 36 "text parameter too long", /* ETOOLONG */ 37 "invalid escape sequence", /* EESCAPE */ 38 "invalid character", /* EPRINT */ 39 "document has no body", /* ENODAT */ 40 "document has no prologue", /* ENOPROLOGUE */ 41 "expected line arguments", /* ELINE */ 42 "invalid AT&T argument", /* EATT */ 43 "default name not yet set", /* ENAME */ 44 "missing list type", /* ELISTTYPE */ 45 "missing display type", /* EDISPTYPE */ 46 "too many display types", /* EMULTIDISP */ 47 "too many list types", /* EMULTILIST */ 48 "NAME section must be first", /* ESECNAME */ 49 "badly-formed NAME section", /* ENAMESECINC */ 50 "argument repeated", /* EARGREP */ 51 "expected boolean parameter", /* EBOOL */ 52 "inconsistent column syntax", /* ECOLMIS */ 53 "nested display invalid", /* ENESTDISP */ 54 "width argument missing", /* EMISSWIDTH */ 55 "invalid section for this manual section", /* EWRONGMSEC */ 56 "section out of conventional order", /* ESECOOO */ 57 "section repeated", /* ESECREP */ 58 "invalid standard argument", /* EBADSTAND */ 59 "multi-line arguments discouraged", /* ENOMULTILINE */ 60 "multi-line arguments suggested", /* EMULTILINE */ 61 "line arguments discouraged", /* ENOLINE */ 62 "prologue macro out of conventional order", /* EPROLOOO */ 63 "prologue macro repeated", /* EPROLREP */ 64 "invalid manual section", /* EBADMSEC */ 65 "invalid section", /* EBADSEC */ 66 "invalid font mode", /* EFONT */ 67 "invalid date syntax", /* EBADDATE */ 68 "invalid number format", /* ENUMFMT */ 69 "superfluous width argument", /* ENOWIDTH */ 70 "system: utsname error", /* EUTSNAME */ 71 "obsolete macro", /* EOBS */ 72 "end-of-line scope violation", /* EIMPBRK */ 73 "empty macro ignored", /* EIGNE */ 74 "unclosed explicit scope", /* EOPEN */ 75 "unterminated quoted phrase", /* EQUOTPHR */ 76 "closure macro without prior context", /* ENOCTX */ 77 "no description found for library" /* ELIB */ 78 }; 79 80 const char *const __mdoc_macronames[MDOC_MAX] = { 81 "Ap", "Dd", "Dt", "Os", 82 "Sh", "Ss", "Pp", "D1", 83 "Dl", "Bd", "Ed", "Bl", 84 "El", "It", "Ad", "An", 85 "Ar", "Cd", "Cm", "Dv", 86 "Er", "Ev", "Ex", "Fa", 87 "Fd", "Fl", "Fn", "Ft", 88 "Ic", "In", "Li", "Nd", 89 "Nm", "Op", "Ot", "Pa", 90 "Rv", "St", "Va", "Vt", 91 /* LINTED */ 92 "Xr", "\%A", "\%B", "\%D", 93 /* LINTED */ 94 "\%I", "\%J", "\%N", "\%O", 95 /* LINTED */ 96 "\%P", "\%R", "\%T", "\%V", 97 "Ac", "Ao", "Aq", "At", 98 "Bc", "Bf", "Bo", "Bq", 99 "Bsx", "Bx", "Db", "Dc", 100 "Do", "Dq", "Ec", "Ef", 101 "Em", "Eo", "Fx", "Ms", 102 "No", "Ns", "Nx", "Ox", 103 "Pc", "Pf", "Po", "Pq", 104 "Qc", "Ql", "Qo", "Qq", 105 "Re", "Rs", "Sc", "So", 106 "Sq", "Sm", "Sx", "Sy", 107 "Tn", "Ux", "Xc", "Xo", 108 "Fo", "Fc", "Oo", "Oc", 109 "Bk", "Ek", "Bt", "Hf", 110 "Fr", "Ud", "Lb", "Lp", 111 "Lk", "Mt", "Brq", "Bro", 112 /* LINTED */ 113 "Brc", "\%C", "Es", "En", 114 /* LINTED */ 115 "Dx", "\%Q", "br", "sp" 116 }; 117 118 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 119 "split", "nosplit", "ragged", 120 "unfilled", "literal", "file", 121 "offset", "bullet", "dash", 122 "hyphen", "item", "enum", 123 "tag", "diag", "hang", 124 "ohang", "inset", "column", 125 "width", "compact", "std", 126 "filled", "words", "emphasis", 127 "symbolic", "nested" 128 }; 129 130 const char * const *mdoc_macronames = __mdoc_macronames; 131 const char * const *mdoc_argnames = __mdoc_argnames; 132 133 static void mdoc_free1(struct mdoc *); 134 static int mdoc_alloc1(struct mdoc *); 135 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 136 int, enum mdoc_type); 137 static int node_append(struct mdoc *, 138 struct mdoc_node *); 139 static int parsetext(struct mdoc *, int, char *); 140 static int parsemacro(struct mdoc *, int, char *); 141 static int macrowarn(struct mdoc *, int, const char *); 142 static int pstring(struct mdoc *, int, int, 143 const char *, size_t); 144 145 146 const struct mdoc_node * 147 mdoc_node(const struct mdoc *m) 148 { 149 150 return(MDOC_HALT & m->flags ? NULL : m->first); 151 } 152 153 154 const struct mdoc_meta * 155 mdoc_meta(const struct mdoc *m) 156 { 157 158 return(MDOC_HALT & m->flags ? NULL : &m->meta); 159 } 160 161 162 /* 163 * Frees volatile resources (parse tree, meta-data, fields). 164 */ 165 static void 166 mdoc_free1(struct mdoc *mdoc) 167 { 168 169 if (mdoc->first) 170 mdoc_node_freelist(mdoc->first); 171 if (mdoc->meta.title) 172 free(mdoc->meta.title); 173 if (mdoc->meta.os) 174 free(mdoc->meta.os); 175 if (mdoc->meta.name) 176 free(mdoc->meta.name); 177 if (mdoc->meta.arch) 178 free(mdoc->meta.arch); 179 if (mdoc->meta.vol) 180 free(mdoc->meta.vol); 181 } 182 183 184 /* 185 * Allocate all volatile resources (parse tree, meta-data, fields). 186 */ 187 static int 188 mdoc_alloc1(struct mdoc *mdoc) 189 { 190 191 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 192 mdoc->flags = 0; 193 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 194 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 195 if (NULL == mdoc->last) 196 return(0); 197 198 mdoc->first = mdoc->last; 199 mdoc->last->type = MDOC_ROOT; 200 mdoc->next = MDOC_NEXT_CHILD; 201 return(1); 202 } 203 204 205 /* 206 * Free up volatile resources (see mdoc_free1()) then re-initialises the 207 * data with mdoc_alloc1(). After invocation, parse data has been reset 208 * and the parser is ready for re-invocation on a new tree; however, 209 * cross-parse non-volatile data is kept intact. 210 */ 211 int 212 mdoc_reset(struct mdoc *mdoc) 213 { 214 215 mdoc_free1(mdoc); 216 return(mdoc_alloc1(mdoc)); 217 } 218 219 220 /* 221 * Completely free up all volatile and non-volatile parse resources. 222 * After invocation, the pointer is no longer usable. 223 */ 224 void 225 mdoc_free(struct mdoc *mdoc) 226 { 227 228 mdoc_free1(mdoc); 229 free(mdoc); 230 } 231 232 233 /* 234 * Allocate volatile and non-volatile parse resources. 235 */ 236 struct mdoc * 237 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 238 { 239 struct mdoc *p; 240 241 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 242 return(NULL); 243 if (cb) 244 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 245 246 mdoc_hash_init(); 247 248 p->data = data; 249 p->pflags = pflags; 250 251 if (mdoc_alloc1(p)) 252 return(p); 253 254 free(p); 255 return(NULL); 256 } 257 258 259 /* 260 * Climb back up the parse tree, validating open scopes. Mostly calls 261 * through to macro_end() in macro.c. 262 */ 263 int 264 mdoc_endparse(struct mdoc *m) 265 { 266 267 if (MDOC_HALT & m->flags) 268 return(0); 269 else if (mdoc_macroend(m)) 270 return(1); 271 m->flags |= MDOC_HALT; 272 return(0); 273 } 274 275 276 /* 277 * Main parse routine. Parses a single line -- really just hands off to 278 * the macro (parsemacro()) or text parser (parsetext()). 279 */ 280 int 281 mdoc_parseln(struct mdoc *m, int ln, char *buf) 282 { 283 284 if (MDOC_HALT & m->flags) 285 return(0); 286 287 return('.' == *buf ? parsemacro(m, ln, buf) : 288 parsetext(m, ln, buf)); 289 } 290 291 292 int 293 mdoc_verr(struct mdoc *mdoc, int ln, int pos, 294 const char *fmt, ...) 295 { 296 char buf[256]; 297 va_list ap; 298 299 if (NULL == mdoc->cb.mdoc_err) 300 return(0); 301 302 va_start(ap, fmt); 303 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 304 va_end(ap); 305 306 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 307 } 308 309 310 int 311 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 312 { 313 char buf[256]; 314 va_list ap; 315 316 if (NULL == mdoc->cb.mdoc_warn) 317 return(0); 318 319 va_start(ap, fmt); 320 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 321 va_end(ap); 322 323 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 324 } 325 326 327 int 328 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 329 { 330 const char *p; 331 332 p = __mdoc_merrnames[(int)type]; 333 assert(p); 334 335 if (iserr) 336 return(mdoc_verr(m, line, pos, p)); 337 338 return(mdoc_vwarn(m, line, pos, p)); 339 } 340 341 342 int 343 mdoc_macro(struct mdoc *m, int tok, 344 int ln, int pp, int *pos, char *buf) 345 { 346 /* 347 * If we're in the prologue, deny "body" macros. Similarly, if 348 * we're in the body, deny prologue calls. 349 */ 350 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 351 MDOC_PBODY & m->flags) 352 return(mdoc_perr(m, ln, pp, EPROLBODY)); 353 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 354 ! (MDOC_PBODY & m->flags)) 355 return(mdoc_perr(m, ln, pp, EBODYPROL)); 356 357 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 358 } 359 360 361 static int 362 node_append(struct mdoc *mdoc, struct mdoc_node *p) 363 { 364 365 assert(mdoc->last); 366 assert(mdoc->first); 367 assert(MDOC_ROOT != p->type); 368 369 switch (mdoc->next) { 370 case (MDOC_NEXT_SIBLING): 371 mdoc->last->next = p; 372 p->prev = mdoc->last; 373 p->parent = mdoc->last->parent; 374 break; 375 case (MDOC_NEXT_CHILD): 376 mdoc->last->child = p; 377 p->parent = mdoc->last; 378 break; 379 default: 380 abort(); 381 /* NOTREACHED */ 382 } 383 384 p->parent->nchild++; 385 386 if ( ! mdoc_valid_pre(mdoc, p)) 387 return(0); 388 if ( ! mdoc_action_pre(mdoc, p)) 389 return(0); 390 391 switch (p->type) { 392 case (MDOC_HEAD): 393 assert(MDOC_BLOCK == p->parent->type); 394 p->parent->head = p; 395 break; 396 case (MDOC_TAIL): 397 assert(MDOC_BLOCK == p->parent->type); 398 p->parent->tail = p; 399 break; 400 case (MDOC_BODY): 401 assert(MDOC_BLOCK == p->parent->type); 402 p->parent->body = p; 403 break; 404 default: 405 break; 406 } 407 408 mdoc->last = p; 409 410 switch (p->type) { 411 case (MDOC_TEXT): 412 if ( ! mdoc_valid_post(mdoc)) 413 return(0); 414 if ( ! mdoc_action_post(mdoc)) 415 return(0); 416 break; 417 default: 418 break; 419 } 420 421 return(1); 422 } 423 424 425 static struct mdoc_node * 426 node_alloc(struct mdoc *m, int line, 427 int pos, int tok, enum mdoc_type type) 428 { 429 struct mdoc_node *p; 430 431 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 432 (void)mdoc_nerr(m, m->last, EMALLOC); 433 return(NULL); 434 } 435 436 p->sec = m->lastsec; 437 p->line = line; 438 p->pos = pos; 439 p->tok = tok; 440 if (MDOC_TEXT != (p->type = type)) 441 assert(p->tok >= 0); 442 443 return(p); 444 } 445 446 447 int 448 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 449 { 450 struct mdoc_node *p; 451 452 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 453 if (NULL == p) 454 return(0); 455 if ( ! node_append(m, p)) 456 return(0); 457 m->next = MDOC_NEXT_CHILD; 458 return(1); 459 } 460 461 462 int 463 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 464 { 465 struct mdoc_node *p; 466 467 assert(m->first); 468 assert(m->last); 469 470 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 471 if (NULL == p) 472 return(0); 473 if ( ! node_append(m, p)) 474 return(0); 475 m->next = MDOC_NEXT_CHILD; 476 return(1); 477 } 478 479 480 int 481 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 482 { 483 struct mdoc_node *p; 484 485 p = node_alloc(m, line, pos, tok, MDOC_BODY); 486 if (NULL == p) 487 return(0); 488 if ( ! node_append(m, p)) 489 return(0); 490 m->next = MDOC_NEXT_CHILD; 491 return(1); 492 } 493 494 495 int 496 mdoc_block_alloc(struct mdoc *m, int line, int pos, 497 int tok, struct mdoc_arg *args) 498 { 499 struct mdoc_node *p; 500 501 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 502 if (NULL == p) 503 return(0); 504 p->args = args; 505 if (p->args) 506 (args->refcnt)++; 507 if ( ! node_append(m, p)) 508 return(0); 509 m->next = MDOC_NEXT_CHILD; 510 return(1); 511 } 512 513 514 int 515 mdoc_elem_alloc(struct mdoc *m, int line, int pos, 516 int tok, struct mdoc_arg *args) 517 { 518 struct mdoc_node *p; 519 520 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 521 if (NULL == p) 522 return(0); 523 p->args = args; 524 if (p->args) 525 (args->refcnt)++; 526 if ( ! node_append(m, p)) 527 return(0); 528 m->next = MDOC_NEXT_CHILD; 529 return(1); 530 } 531 532 533 static int 534 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 535 { 536 struct mdoc_node *n; 537 size_t sv; 538 539 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 540 if (NULL == n) 541 return(mdoc_nerr(m, m->last, EMALLOC)); 542 543 n->string = malloc(len + 1); 544 if (NULL == n->string) { 545 free(n); 546 return(mdoc_nerr(m, m->last, EMALLOC)); 547 } 548 549 sv = strlcpy(n->string, p, len + 1); 550 551 /* Prohibit truncation. */ 552 assert(sv < len + 1); 553 554 if ( ! node_append(m, n)) 555 return(0); 556 m->next = MDOC_NEXT_SIBLING; 557 return(1); 558 } 559 560 561 int 562 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 563 { 564 565 return(pstring(m, line, pos, p, strlen(p))); 566 } 567 568 569 void 570 mdoc_node_free(struct mdoc_node *p) 571 { 572 573 if (p->parent) 574 p->parent->nchild--; 575 if (p->string) 576 free(p->string); 577 if (p->args) 578 mdoc_argv_free(p->args); 579 free(p); 580 } 581 582 583 void 584 mdoc_node_freelist(struct mdoc_node *p) 585 { 586 587 if (p->child) 588 mdoc_node_freelist(p->child); 589 if (p->next) 590 mdoc_node_freelist(p->next); 591 592 assert(0 == p->nchild); 593 mdoc_node_free(p); 594 } 595 596 597 /* 598 * Parse free-form text, that is, a line that does not begin with the 599 * control character. 600 */ 601 static int 602 parsetext(struct mdoc *m, int line, char *buf) 603 { 604 int i, j; 605 606 if (SEC_NONE == m->lastnamed) 607 return(mdoc_perr(m, line, 0, ETEXTPROL)); 608 609 /* 610 * If in literal mode, then pass the buffer directly to the 611 * back-end, as it should be preserved as a single term. 612 */ 613 614 if (MDOC_LITERAL & m->flags) 615 return(mdoc_word_alloc(m, line, 0, buf)); 616 617 /* Disallow blank/white-space lines in non-literal mode. */ 618 619 for (i = 0; ' ' == buf[i]; i++) 620 /* Skip leading whitespace. */ ; 621 if (0 == buf[i]) 622 return(mdoc_perr(m, line, 0, ENOBLANK)); 623 624 /* 625 * Break apart a free-form line into tokens. Spaces are 626 * stripped out of the input. 627 */ 628 629 for (j = i; buf[i]; i++) { 630 if (' ' != buf[i]) 631 continue; 632 633 /* Escaped whitespace. */ 634 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 635 continue; 636 637 buf[i++] = 0; 638 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 639 return(0); 640 641 for ( ; ' ' == buf[i]; i++) 642 /* Skip trailing whitespace. */ ; 643 644 j = i; 645 if (0 == buf[i]) 646 break; 647 } 648 649 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 650 return(0); 651 652 m->next = MDOC_NEXT_SIBLING; 653 return(1); 654 } 655 656 657 658 659 static int 660 macrowarn(struct mdoc *m, int ln, const char *buf) 661 { 662 if ( ! (MDOC_IGN_MACRO & m->pflags)) 663 return(mdoc_verr(m, ln, 0, 664 "unknown macro: %s%s", 665 buf, strlen(buf) > 3 ? "..." : "")); 666 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 667 buf, strlen(buf) > 3 ? "..." : "")); 668 } 669 670 671 /* 672 * Parse a macro line, that is, a line beginning with the control 673 * character. 674 */ 675 int 676 parsemacro(struct mdoc *m, int ln, char *buf) 677 { 678 int i, j, c; 679 char mac[5]; 680 681 /* Empty lines are ignored. */ 682 683 if (0 == buf[1]) 684 return(1); 685 686 i = 1; 687 688 /* Accept whitespace after the initial control char. */ 689 690 if (' ' == buf[i]) { 691 i++; 692 while (buf[i] && ' ' == buf[i]) 693 i++; 694 if (0 == buf[i]) 695 return(1); 696 } 697 698 /* Copy the first word into a nil-terminated buffer. */ 699 700 for (j = 0; j < 4; j++, i++) { 701 if (0 == (mac[j] = buf[i])) 702 break; 703 else if (' ' == buf[i]) 704 break; 705 706 /* Check for invalid characters. */ 707 708 if (isgraph((u_char)buf[i])) 709 continue; 710 return(mdoc_perr(m, ln, i, EPRINT)); 711 } 712 713 mac[j] = 0; 714 715 if (j == 4 || j < 2) { 716 if ( ! macrowarn(m, ln, mac)) 717 goto err; 718 return(1); 719 } 720 721 if (MDOC_MAX == (c = mdoc_hash_find(mac))) { 722 if ( ! macrowarn(m, ln, mac)) 723 goto err; 724 return(1); 725 } 726 727 /* The macro is sane. Jump to the next word. */ 728 729 while (buf[i] && ' ' == buf[i]) 730 i++; 731 732 /* 733 * Begin recursive parse sequence. Since we're at the start of 734 * the line, we don't need to do callable/parseable checks. 735 */ 736 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 737 goto err; 738 739 return(1); 740 741 err: /* Error out. */ 742 743 m->flags |= MDOC_HALT; 744 return(0); 745 } 746 747 748