1 /* $Id: man.c,v 1.137 2014/08/01 21:24:17 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #ifdef HAVE_CONFIG_H 20 #include "config.h" 21 #endif 22 23 #include <sys/types.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <stdarg.h> 28 #include <stdlib.h> 29 #include <stdio.h> 30 #include <string.h> 31 32 #include "man.h" 33 #include "mandoc.h" 34 #include "mandoc_aux.h" 35 #include "libman.h" 36 #include "libmandoc.h" 37 38 const char *const __man_macronames[MAN_MAX] = { 39 "br", "TH", "SH", "SS", 40 "TP", "LP", "PP", "P", 41 "IP", "HP", "SM", "SB", 42 "BI", "IB", "BR", "RB", 43 "R", "B", "I", "IR", 44 "RI", "na", "sp", "nf", 45 "fi", "RE", "RS", "DT", 46 "UC", "PD", "AT", "in", 47 "ft", "OP", "EX", "EE", 48 "UR", "UE", "ll" 49 }; 50 51 const char * const *man_macronames = __man_macronames; 52 53 static struct man_node *man_node_alloc(struct man *, int, int, 54 enum man_type, enum mant); 55 static int man_node_append(struct man *, 56 struct man_node *); 57 static void man_node_free(struct man_node *); 58 static void man_node_unlink(struct man *, 59 struct man_node *); 60 static int man_ptext(struct man *, int, char *, int); 61 static int man_pmacro(struct man *, int, char *, int); 62 static void man_free1(struct man *); 63 static void man_alloc1(struct man *); 64 static int man_descope(struct man *, int, int); 65 66 67 const struct man_node * 68 man_node(const struct man *man) 69 { 70 71 return(man->first); 72 } 73 74 const struct man_meta * 75 man_meta(const struct man *man) 76 { 77 78 return(&man->meta); 79 } 80 81 void 82 man_reset(struct man *man) 83 { 84 85 man_free1(man); 86 man_alloc1(man); 87 } 88 89 void 90 man_free(struct man *man) 91 { 92 93 man_free1(man); 94 free(man); 95 } 96 97 struct man * 98 man_alloc(struct roff *roff, struct mparse *parse, int quick) 99 { 100 struct man *p; 101 102 p = mandoc_calloc(1, sizeof(struct man)); 103 104 man_hash_init(); 105 p->parse = parse; 106 p->quick = quick; 107 p->roff = roff; 108 109 man_alloc1(p); 110 return(p); 111 } 112 113 int 114 man_endparse(struct man *man) 115 { 116 117 return(man_macroend(man)); 118 } 119 120 int 121 man_parseln(struct man *man, int ln, char *buf, int offs) 122 { 123 124 man->flags |= MAN_NEWLINE; 125 126 return (roff_getcontrol(man->roff, buf, &offs) ? 127 man_pmacro(man, ln, buf, offs) : 128 man_ptext(man, ln, buf, offs)); 129 } 130 131 static void 132 man_free1(struct man *man) 133 { 134 135 if (man->first) 136 man_node_delete(man, man->first); 137 if (man->meta.title) 138 free(man->meta.title); 139 if (man->meta.source) 140 free(man->meta.source); 141 if (man->meta.date) 142 free(man->meta.date); 143 if (man->meta.vol) 144 free(man->meta.vol); 145 if (man->meta.msec) 146 free(man->meta.msec); 147 } 148 149 static void 150 man_alloc1(struct man *man) 151 { 152 153 memset(&man->meta, 0, sizeof(struct man_meta)); 154 man->flags = 0; 155 man->last = mandoc_calloc(1, sizeof(struct man_node)); 156 man->first = man->last; 157 man->last->type = MAN_ROOT; 158 man->last->tok = MAN_MAX; 159 man->next = MAN_NEXT_CHILD; 160 } 161 162 163 static int 164 man_node_append(struct man *man, struct man_node *p) 165 { 166 167 assert(man->last); 168 assert(man->first); 169 assert(MAN_ROOT != p->type); 170 171 switch (man->next) { 172 case MAN_NEXT_SIBLING: 173 man->last->next = p; 174 p->prev = man->last; 175 p->parent = man->last->parent; 176 break; 177 case MAN_NEXT_CHILD: 178 man->last->child = p; 179 p->parent = man->last; 180 break; 181 default: 182 abort(); 183 /* NOTREACHED */ 184 } 185 186 assert(p->parent); 187 p->parent->nchild++; 188 189 switch (p->type) { 190 case MAN_BLOCK: 191 if (p->tok == MAN_SH || p->tok == MAN_SS) 192 man->flags &= ~MAN_LITERAL; 193 break; 194 case MAN_HEAD: 195 assert(MAN_BLOCK == p->parent->type); 196 p->parent->head = p; 197 break; 198 case MAN_TAIL: 199 assert(MAN_BLOCK == p->parent->type); 200 p->parent->tail = p; 201 break; 202 case MAN_BODY: 203 assert(MAN_BLOCK == p->parent->type); 204 p->parent->body = p; 205 break; 206 default: 207 break; 208 } 209 210 man->last = p; 211 212 switch (p->type) { 213 case MAN_TBL: 214 /* FALLTHROUGH */ 215 case MAN_TEXT: 216 if ( ! man_valid_post(man)) 217 return(0); 218 break; 219 default: 220 break; 221 } 222 223 return(1); 224 } 225 226 static struct man_node * 227 man_node_alloc(struct man *man, int line, int pos, 228 enum man_type type, enum mant tok) 229 { 230 struct man_node *p; 231 232 p = mandoc_calloc(1, sizeof(struct man_node)); 233 p->line = line; 234 p->pos = pos; 235 p->type = type; 236 p->tok = tok; 237 238 if (MAN_NEWLINE & man->flags) 239 p->flags |= MAN_LINE; 240 man->flags &= ~MAN_NEWLINE; 241 return(p); 242 } 243 244 int 245 man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 246 { 247 struct man_node *p; 248 249 p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 250 if ( ! man_node_append(man, p)) 251 return(0); 252 man->next = MAN_NEXT_CHILD; 253 return(1); 254 } 255 256 int 257 man_tail_alloc(struct man *man, int line, int pos, enum mant tok) 258 { 259 struct man_node *p; 260 261 p = man_node_alloc(man, line, pos, MAN_TAIL, tok); 262 if ( ! man_node_append(man, p)) 263 return(0); 264 man->next = MAN_NEXT_CHILD; 265 return(1); 266 } 267 268 int 269 man_head_alloc(struct man *man, int line, int pos, enum mant tok) 270 { 271 struct man_node *p; 272 273 p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 274 if ( ! man_node_append(man, p)) 275 return(0); 276 man->next = MAN_NEXT_CHILD; 277 return(1); 278 } 279 280 int 281 man_body_alloc(struct man *man, int line, int pos, enum mant tok) 282 { 283 struct man_node *p; 284 285 p = man_node_alloc(man, line, pos, MAN_BODY, tok); 286 if ( ! man_node_append(man, p)) 287 return(0); 288 man->next = MAN_NEXT_CHILD; 289 return(1); 290 } 291 292 int 293 man_block_alloc(struct man *man, int line, int pos, enum mant tok) 294 { 295 struct man_node *p; 296 297 p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 298 if ( ! man_node_append(man, p)) 299 return(0); 300 man->next = MAN_NEXT_CHILD; 301 return(1); 302 } 303 304 int 305 man_word_alloc(struct man *man, int line, int pos, const char *word) 306 { 307 struct man_node *n; 308 309 n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 310 n->string = roff_strdup(man->roff, word); 311 312 if ( ! man_node_append(man, n)) 313 return(0); 314 315 man->next = MAN_NEXT_SIBLING; 316 return(1); 317 } 318 319 /* 320 * Free all of the resources held by a node. This does NOT unlink a 321 * node from its context; for that, see man_node_unlink(). 322 */ 323 static void 324 man_node_free(struct man_node *p) 325 { 326 327 if (p->string) 328 free(p->string); 329 free(p); 330 } 331 332 void 333 man_node_delete(struct man *man, struct man_node *p) 334 { 335 336 while (p->child) 337 man_node_delete(man, p->child); 338 339 man_node_unlink(man, p); 340 man_node_free(p); 341 } 342 343 int 344 man_addeqn(struct man *man, const struct eqn *ep) 345 { 346 struct man_node *n; 347 348 n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 349 n->eqn = ep; 350 351 if ( ! man_node_append(man, n)) 352 return(0); 353 354 man->next = MAN_NEXT_SIBLING; 355 return(man_descope(man, ep->ln, ep->pos)); 356 } 357 358 int 359 man_addspan(struct man *man, const struct tbl_span *sp) 360 { 361 struct man_node *n; 362 363 n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 364 n->span = sp; 365 366 if ( ! man_node_append(man, n)) 367 return(0); 368 369 man->next = MAN_NEXT_SIBLING; 370 return(man_descope(man, sp->line, 0)); 371 } 372 373 static int 374 man_descope(struct man *man, int line, int offs) 375 { 376 /* 377 * Co-ordinate what happens with having a next-line scope open: 378 * first close out the element scope (if applicable), then close 379 * out the block scope (also if applicable). 380 */ 381 382 if (MAN_ELINE & man->flags) { 383 man->flags &= ~MAN_ELINE; 384 if ( ! man_unscope(man, man->last->parent)) 385 return(0); 386 } 387 388 if ( ! (MAN_BLINE & man->flags)) 389 return(1); 390 man->flags &= ~MAN_BLINE; 391 392 if ( ! man_unscope(man, man->last->parent)) 393 return(0); 394 return(man_body_alloc(man, line, offs, man->last->tok)); 395 } 396 397 static int 398 man_ptext(struct man *man, int line, char *buf, int offs) 399 { 400 int i; 401 402 /* Literal free-form text whitespace is preserved. */ 403 404 if (MAN_LITERAL & man->flags) { 405 if ( ! man_word_alloc(man, line, offs, buf + offs)) 406 return(0); 407 return(man_descope(man, line, offs)); 408 } 409 410 for (i = offs; ' ' == buf[i]; i++) 411 /* Skip leading whitespace. */ ; 412 413 /* 414 * Blank lines are ignored right after headings 415 * but add a single vertical space elsewhere. 416 */ 417 418 if ('\0' == buf[i]) { 419 /* Allocate a blank entry. */ 420 if (MAN_SH != man->last->tok && 421 MAN_SS != man->last->tok) { 422 if ( ! man_elem_alloc(man, line, offs, MAN_sp)) 423 return(0); 424 man->next = MAN_NEXT_SIBLING; 425 } 426 return(1); 427 } 428 429 /* 430 * Warn if the last un-escaped character is whitespace. Then 431 * strip away the remaining spaces (tabs stay!). 432 */ 433 434 i = (int)strlen(buf); 435 assert(i); 436 437 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 438 if (i > 1 && '\\' != buf[i - 2]) 439 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 440 line, i - 1, NULL); 441 442 for (--i; i && ' ' == buf[i]; i--) 443 /* Spin back to non-space. */ ; 444 445 /* Jump ahead of escaped whitespace. */ 446 i += '\\' == buf[i] ? 2 : 1; 447 448 buf[i] = '\0'; 449 } 450 451 if ( ! man_word_alloc(man, line, offs, buf + offs)) 452 return(0); 453 454 /* 455 * End-of-sentence check. If the last character is an unescaped 456 * EOS character, then flag the node as being the end of a 457 * sentence. The front-end will know how to interpret this. 458 */ 459 460 assert(i); 461 if (mandoc_eos(buf, (size_t)i)) 462 man->last->flags |= MAN_EOS; 463 464 return(man_descope(man, line, offs)); 465 } 466 467 static int 468 man_pmacro(struct man *man, int ln, char *buf, int offs) 469 { 470 char mac[5]; 471 struct man_node *n; 472 enum mant tok; 473 int i, ppos; 474 int bline; 475 476 if ('"' == buf[offs]) { 477 mandoc_msg(MANDOCERR_COMMENT_BAD, man->parse, 478 ln, offs, NULL); 479 return(1); 480 } else if ('\0' == buf[offs]) 481 return(1); 482 483 ppos = offs; 484 485 /* 486 * Copy the first word into a nil-terminated buffer. 487 * Stop copying when a tab, space, or eoln is encountered. 488 */ 489 490 i = 0; 491 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] && 492 '\t' != buf[offs]) 493 mac[i++] = buf[offs++]; 494 495 mac[i] = '\0'; 496 497 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 498 499 if (MAN_MAX == tok) { 500 mandoc_msg(MANDOCERR_MACRO, man->parse, 501 ln, ppos, buf + ppos - 1); 502 return(1); 503 } 504 505 /* The macro is sane. Jump to the next word. */ 506 507 while (buf[offs] && ' ' == buf[offs]) 508 offs++; 509 510 /* 511 * Trailing whitespace. Note that tabs are allowed to be passed 512 * into the parser as "text", so we only warn about spaces here. 513 */ 514 515 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 516 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 517 ln, offs - 1, NULL); 518 519 /* 520 * Remove prior ELINE macro, as it's being clobbered by a new 521 * macro. Note that NSCOPED macros do not close out ELINE 522 * macros---they don't print text---so we let those slip by. 523 */ 524 525 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 526 man->flags & MAN_ELINE) { 527 n = man->last; 528 assert(MAN_TEXT != n->type); 529 530 /* Remove repeated NSCOPED macros causing ELINE. */ 531 532 if (MAN_NSCOPED & man_macros[n->tok].flags) 533 n = n->parent; 534 535 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line, 536 n->pos, "%s breaks %s", man_macronames[tok], 537 man_macronames[n->tok]); 538 539 man_node_delete(man, n); 540 man->flags &= ~MAN_ELINE; 541 } 542 543 /* 544 * Remove prior BLINE macro that is being clobbered. 545 */ 546 if ((man->flags & MAN_BLINE) && 547 (MAN_BSCOPE & man_macros[tok].flags)) { 548 n = man->last; 549 550 /* Might be a text node like 8 in 551 * .TP 8 552 * .SH foo 553 */ 554 if (MAN_TEXT == n->type) 555 n = n->parent; 556 557 /* Remove element that didn't end BLINE, if any. */ 558 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) 559 n = n->parent; 560 561 assert(MAN_HEAD == n->type); 562 n = n->parent; 563 assert(MAN_BLOCK == n->type); 564 assert(MAN_SCOPED & man_macros[n->tok].flags); 565 566 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line, 567 n->pos, "%s breaks %s", man_macronames[tok], 568 man_macronames[n->tok]); 569 570 man_node_delete(man, n); 571 man->flags &= ~MAN_BLINE; 572 } 573 574 /* Remember whether we are in next-line scope for a block head. */ 575 576 bline = man->flags & MAN_BLINE; 577 578 /* Call to handler... */ 579 580 assert(man_macros[tok].fp); 581 if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf)) 582 return(0); 583 584 /* In quick mode (for mandocdb), abort after the NAME section. */ 585 586 if (man->quick && MAN_SH == tok) { 587 n = man->last; 588 if (MAN_BODY == n->type && 589 strcmp(n->prev->child->string, "NAME")) 590 return(2); 591 } 592 593 /* 594 * If we are in a next-line scope for a block head, 595 * close it out now and switch to the body, 596 * unless the next-line scope is allowed to continue. 597 */ 598 599 if ( ! bline || man->flags & MAN_ELINE || 600 man_macros[tok].flags & MAN_NSCOPED) 601 return(1); 602 603 assert(MAN_BLINE & man->flags); 604 man->flags &= ~MAN_BLINE; 605 606 if ( ! man_unscope(man, man->last->parent)) 607 return(0); 608 return(man_body_alloc(man, ln, ppos, man->last->tok)); 609 } 610 611 /* 612 * Unlink a node from its context. If "man" is provided, the last parse 613 * point will also be adjusted accordingly. 614 */ 615 static void 616 man_node_unlink(struct man *man, struct man_node *n) 617 { 618 619 /* Adjust siblings. */ 620 621 if (n->prev) 622 n->prev->next = n->next; 623 if (n->next) 624 n->next->prev = n->prev; 625 626 /* Adjust parent. */ 627 628 if (n->parent) { 629 n->parent->nchild--; 630 if (n->parent->child == n) 631 n->parent->child = n->prev ? n->prev : n->next; 632 } 633 634 /* Adjust parse point, if applicable. */ 635 636 if (man && man->last == n) { 637 /*XXX: this can occur when bailing from validation. */ 638 /*assert(NULL == n->next);*/ 639 if (n->prev) { 640 man->last = n->prev; 641 man->next = MAN_NEXT_SIBLING; 642 } else { 643 man->last = n->parent; 644 man->next = MAN_NEXT_CHILD; 645 } 646 } 647 648 if (man && man->first == n) 649 man->first = NULL; 650 } 651 652 const struct mparse * 653 man_mparse(const struct man *man) 654 { 655 656 assert(man && man->parse); 657 return(man->parse); 658 } 659 660 void 661 man_deroff(char **dest, const struct man_node *n) 662 { 663 char *cp; 664 size_t sz; 665 666 if (MAN_TEXT != n->type) { 667 for (n = n->child; n; n = n->next) 668 man_deroff(dest, n); 669 return; 670 } 671 672 /* Skip leading whitespace and escape sequences. */ 673 674 cp = n->string; 675 while ('\0' != *cp) { 676 if ('\\' == *cp) { 677 cp++; 678 mandoc_escape((const char **)&cp, NULL, NULL); 679 } else if (isspace((unsigned char)*cp)) 680 cp++; 681 else 682 break; 683 } 684 685 /* Skip trailing whitespace. */ 686 687 for (sz = strlen(cp); sz; sz--) 688 if (0 == isspace((unsigned char)cp[sz-1])) 689 break; 690 691 /* Skip empty strings. */ 692 693 if (0 == sz) 694 return; 695 696 if (NULL == *dest) { 697 *dest = mandoc_strndup(cp, sz); 698 return; 699 } 700 701 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 702 free(*dest); 703 *dest = cp; 704 } 705