1 /* $OpenBSD: man_html.c,v 1.140 2023/10/24 20:30:49 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2013-15,2017-20,2022-23 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * HTML formatter for man(7) used by mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "man.h" 32 #include "out.h" 33 #include "html.h" 34 #include "main.h" 35 36 #define MAN_ARGS const struct roff_meta *man, \ 37 struct roff_node *n, \ 38 struct html *h 39 40 struct man_html_act { 41 int (*pre)(MAN_ARGS); 42 int (*post)(MAN_ARGS); 43 }; 44 45 static void print_man_head(const struct roff_meta *, 46 struct html *); 47 static void print_man_nodelist(MAN_ARGS); 48 static void print_man_node(MAN_ARGS); 49 static char list_continues(const struct roff_node *, 50 const struct roff_node *); 51 static int man_B_pre(MAN_ARGS); 52 static int man_IP_pre(MAN_ARGS); 53 static int man_I_pre(MAN_ARGS); 54 static int man_MR_pre(MAN_ARGS); 55 static int man_OP_pre(MAN_ARGS); 56 static int man_PP_pre(MAN_ARGS); 57 static int man_RS_pre(MAN_ARGS); 58 static int man_SH_pre(MAN_ARGS); 59 static int man_SM_pre(MAN_ARGS); 60 static int man_SY_pre(MAN_ARGS); 61 static int man_UR_pre(MAN_ARGS); 62 static int man_alt_pre(MAN_ARGS); 63 static int man_ign_pre(MAN_ARGS); 64 static int man_in_pre(MAN_ARGS); 65 static void man_root_post(const struct roff_meta *, 66 struct html *); 67 static void man_root_pre(const struct roff_meta *, 68 struct html *); 69 70 static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = { 71 { NULL, NULL }, /* TH */ 72 { man_SH_pre, NULL }, /* SH */ 73 { man_SH_pre, NULL }, /* SS */ 74 { man_IP_pre, NULL }, /* TP */ 75 { man_IP_pre, NULL }, /* TQ */ 76 { man_PP_pre, NULL }, /* LP */ 77 { man_PP_pre, NULL }, /* PP */ 78 { man_PP_pre, NULL }, /* P */ 79 { man_IP_pre, NULL }, /* IP */ 80 { man_PP_pre, NULL }, /* HP */ 81 { man_SM_pre, NULL }, /* SM */ 82 { man_SM_pre, NULL }, /* SB */ 83 { man_alt_pre, NULL }, /* BI */ 84 { man_alt_pre, NULL }, /* IB */ 85 { man_alt_pre, NULL }, /* BR */ 86 { man_alt_pre, NULL }, /* RB */ 87 { NULL, NULL }, /* R */ 88 { man_B_pre, NULL }, /* B */ 89 { man_I_pre, NULL }, /* I */ 90 { man_alt_pre, NULL }, /* IR */ 91 { man_alt_pre, NULL }, /* RI */ 92 { NULL, NULL }, /* RE */ 93 { man_RS_pre, NULL }, /* RS */ 94 { man_ign_pre, NULL }, /* DT */ 95 { man_ign_pre, NULL }, /* UC */ 96 { man_ign_pre, NULL }, /* PD */ 97 { man_ign_pre, NULL }, /* AT */ 98 { man_in_pre, NULL }, /* in */ 99 { man_SY_pre, NULL }, /* SY */ 100 { NULL, NULL }, /* YS */ 101 { man_OP_pre, NULL }, /* OP */ 102 { NULL, NULL }, /* EX */ 103 { NULL, NULL }, /* EE */ 104 { man_UR_pre, NULL }, /* UR */ 105 { NULL, NULL }, /* UE */ 106 { man_UR_pre, NULL }, /* MT */ 107 { NULL, NULL }, /* ME */ 108 { man_MR_pre, NULL }, /* MR */ 109 }; 110 111 112 void 113 html_man(void *arg, const struct roff_meta *man) 114 { 115 struct html *h; 116 struct roff_node *n; 117 struct tag *t; 118 119 h = (struct html *)arg; 120 n = man->first->child; 121 122 if ((h->oflags & HTML_FRAGMENT) == 0) { 123 print_gen_decls(h); 124 print_otag(h, TAG_HTML, ""); 125 t = print_otag(h, TAG_HEAD, ""); 126 print_man_head(man, h); 127 print_tagq(h, t); 128 if (n != NULL && n->type == ROFFT_COMMENT) 129 print_gen_comment(h, n); 130 print_otag(h, TAG_BODY, ""); 131 } 132 133 man_root_pre(man, h); 134 t = print_otag(h, TAG_MAIN, "c", "manual-text"); 135 print_man_nodelist(man, n, h); 136 print_tagq(h, t); 137 man_root_post(man, h); 138 print_tagq(h, NULL); 139 } 140 141 static void 142 print_man_head(const struct roff_meta *man, struct html *h) 143 { 144 char *cp; 145 146 print_gen_head(h); 147 mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec); 148 print_otag(h, TAG_TITLE, ""); 149 print_text(h, cp); 150 free(cp); 151 } 152 153 static void 154 print_man_nodelist(MAN_ARGS) 155 { 156 while (n != NULL) { 157 print_man_node(man, n, h); 158 n = n->next; 159 } 160 } 161 162 static void 163 print_man_node(MAN_ARGS) 164 { 165 struct tag *t; 166 int child; 167 168 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 169 return; 170 171 if ((n->flags & NODE_NOFILL) == 0) 172 html_fillmode(h, ROFF_fi); 173 else if (html_fillmode(h, ROFF_nf) == ROFF_nf && 174 n->tok != ROFF_fi && n->flags & NODE_LINE && 175 (n->prev == NULL || n->prev->tok != MAN_YS)) 176 print_endline(h); 177 178 child = 1; 179 switch (n->type) { 180 case ROFFT_TEXT: 181 if (*n->string == '\0') { 182 print_endline(h); 183 return; 184 } 185 if (*n->string == ' ' && n->flags & NODE_LINE && 186 (h->flags & HTML_NONEWLINE) == 0) 187 print_otag(h, TAG_BR, ""); 188 else if (n->flags & NODE_DELIMC) 189 h->flags |= HTML_NOSPACE; 190 t = h->tag; 191 t->refcnt++; 192 print_text(h, n->string); 193 break; 194 case ROFFT_EQN: 195 t = h->tag; 196 t->refcnt++; 197 print_eqn(h, n->eqn); 198 break; 199 case ROFFT_TBL: 200 /* 201 * This will take care of initialising all of the table 202 * state data for the first table, then tearing it down 203 * for the last one. 204 */ 205 print_tbl(h, n->span); 206 return; 207 default: 208 /* 209 * Close out scope of font prior to opening a macro 210 * scope. 211 */ 212 if (h->metac != ESCAPE_FONTROMAN) { 213 h->metal = h->metac; 214 h->metac = ESCAPE_FONTROMAN; 215 } 216 217 /* 218 * Close out the current table, if it's open, and unset 219 * the "meta" table state. This will be reopened on the 220 * next table element. 221 */ 222 if (h->tblt != NULL) 223 print_tblclose(h); 224 t = h->tag; 225 t->refcnt++; 226 if (n->tok < ROFF_MAX) { 227 roff_html_pre(h, n); 228 t->refcnt--; 229 print_stagq(h, t); 230 return; 231 } 232 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 233 if (man_html_acts[n->tok - MAN_TH].pre != NULL) 234 child = (*man_html_acts[n->tok - MAN_TH].pre)(man, 235 n, h); 236 break; 237 } 238 239 if (child && n->child != NULL) 240 print_man_nodelist(man, n->child, h); 241 242 /* This will automatically close out any font scope. */ 243 t->refcnt--; 244 if (n->type == ROFFT_BLOCK && 245 (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) { 246 t = h->tag; 247 while (t->tag != TAG_DL && t->tag != TAG_UL) 248 t = t->next; 249 /* 250 * Close the list if no further item of the same type 251 * follows; otherwise, close the item only. 252 */ 253 if (list_continues(n, roff_node_next(n)) == '\0') { 254 print_tagq(h, t); 255 t = NULL; 256 } 257 } 258 if (t != NULL) 259 print_stagq(h, t); 260 } 261 262 static void 263 man_root_pre(const struct roff_meta *man, struct html *h) 264 { 265 struct tag *t; 266 char *title; 267 268 assert(man->title); 269 assert(man->msec); 270 mandoc_asprintf(&title, "%s(%s)", man->title, man->msec); 271 272 t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader", 273 "aria-label", "Manual header line"); 274 275 print_otag(h, TAG_SPAN, "c", "head-ltitle"); 276 print_text(h, title); 277 print_stagq(h, t); 278 279 print_otag(h, TAG_SPAN, "c", "head-vol"); 280 if (man->vol != NULL) 281 print_text(h, man->vol); 282 print_stagq(h, t); 283 284 print_otag(h, TAG_SPAN, "c", "head-rtitle"); 285 print_text(h, title); 286 print_tagq(h, t); 287 free(title); 288 } 289 290 static void 291 man_root_post(const struct roff_meta *man, struct html *h) 292 { 293 struct tag *t; 294 295 t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter", 296 "aria-label", "Manual footer line"); 297 298 print_otag(h, TAG_SPAN, "c", "foot-left"); 299 print_stagq(h, t); 300 301 print_otag(h, TAG_SPAN, "c", "foot-date"); 302 print_text(h, man->date); 303 print_stagq(h, t); 304 305 print_otag(h, TAG_SPAN, "c", "foot-os"); 306 if (man->os != NULL) 307 print_text(h, man->os); 308 print_tagq(h, t); 309 } 310 311 static int 312 man_SH_pre(MAN_ARGS) 313 { 314 const char *class; 315 enum htmltag tag; 316 317 if (n->tok == MAN_SH) { 318 tag = TAG_H2; 319 class = "Sh"; 320 } else { 321 tag = TAG_H3; 322 class = "Ss"; 323 } 324 switch (n->type) { 325 case ROFFT_BLOCK: 326 html_close_paragraph(h); 327 print_otag(h, TAG_SECTION, "c", class); 328 break; 329 case ROFFT_HEAD: 330 print_otag_id(h, tag, class, n); 331 break; 332 case ROFFT_BODY: 333 break; 334 default: 335 abort(); 336 } 337 return 1; 338 } 339 340 static int 341 man_alt_pre(MAN_ARGS) 342 { 343 const struct roff_node *nn; 344 struct tag *t; 345 int i; 346 enum htmltag fp; 347 348 for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) { 349 switch (n->tok) { 350 case MAN_BI: 351 fp = i % 2 ? TAG_I : TAG_B; 352 break; 353 case MAN_IB: 354 fp = i % 2 ? TAG_B : TAG_I; 355 break; 356 case MAN_RI: 357 fp = i % 2 ? TAG_I : TAG_MAX; 358 break; 359 case MAN_IR: 360 fp = i % 2 ? TAG_MAX : TAG_I; 361 break; 362 case MAN_BR: 363 fp = i % 2 ? TAG_MAX : TAG_B; 364 break; 365 case MAN_RB: 366 fp = i % 2 ? TAG_B : TAG_MAX; 367 break; 368 default: 369 abort(); 370 } 371 372 if (i) 373 h->flags |= HTML_NOSPACE; 374 375 if (fp != TAG_MAX) 376 t = print_otag(h, fp, ""); 377 378 print_text(h, nn->string); 379 380 if (fp != TAG_MAX) 381 print_tagq(h, t); 382 } 383 return 0; 384 } 385 386 static int 387 man_SM_pre(MAN_ARGS) 388 { 389 print_otag(h, TAG_SMALL, ""); 390 if (n->tok == MAN_SB) 391 print_otag(h, TAG_B, ""); 392 return 1; 393 } 394 395 static int 396 man_PP_pre(MAN_ARGS) 397 { 398 switch (n->type) { 399 case ROFFT_BLOCK: 400 html_close_paragraph(h); 401 break; 402 case ROFFT_HEAD: 403 return 0; 404 case ROFFT_BODY: 405 if (n->child != NULL && 406 (n->child->flags & NODE_NOFILL) == 0) 407 print_otag(h, TAG_P, "c", 408 n->tok == MAN_HP ? "Pp HP" : "Pp"); 409 break; 410 default: 411 abort(); 412 } 413 return 1; 414 } 415 416 static char 417 list_continues(const struct roff_node *n1, const struct roff_node *n2) 418 { 419 const char *s1, *s2; 420 char c1, c2; 421 422 if (n1 == NULL || n1->type != ROFFT_BLOCK || 423 n2 == NULL || n2->type != ROFFT_BLOCK) 424 return '\0'; 425 if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) && 426 (n2->tok == MAN_TP || n2->tok == MAN_TQ)) 427 return ' '; 428 if (n1->tok != MAN_IP || n2->tok != MAN_IP) 429 return '\0'; 430 n1 = n1->head->child; 431 n2 = n2->head->child; 432 s1 = n1 == NULL ? "" : n1->string; 433 s2 = n2 == NULL ? "" : n2->string; 434 c1 = strcmp(s1, "*") == 0 ? '*' : 435 strcmp(s1, "\\-") == 0 ? '-' : 436 strcmp(s1, "\\(bu") == 0 ? 'b' : 437 strcmp(s1, "\\[bu]") == 0 ? 'b' : ' '; 438 c2 = strcmp(s2, "*") == 0 ? '*' : 439 strcmp(s2, "\\-") == 0 ? '-' : 440 strcmp(s2, "\\(bu") == 0 ? 'b' : 441 strcmp(s2, "\\[bu]") == 0 ? 'b' : ' '; 442 return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1; 443 } 444 445 static int 446 man_IP_pre(MAN_ARGS) 447 { 448 struct roff_node *nn; 449 const char *list_class; 450 enum htmltag list_elem, body_elem; 451 char list_type; 452 453 nn = n->type == ROFFT_BLOCK ? n : n->parent; 454 list_type = list_continues(roff_node_prev(nn), nn); 455 if (list_type == '\0') { 456 /* Start a new list. */ 457 list_type = list_continues(nn, roff_node_next(nn)); 458 if (list_type == '\0') 459 list_type = ' '; 460 switch (list_type) { 461 case ' ': 462 list_class = "Bl-tag"; 463 list_elem = TAG_DL; 464 break; 465 case '*': 466 list_class = "Bl-bullet"; 467 list_elem = TAG_UL; 468 break; 469 case '-': 470 list_class = "Bl-dash"; 471 list_elem = TAG_UL; 472 break; 473 default: 474 abort(); 475 } 476 } else { 477 /* Continue a list that was started earlier. */ 478 list_class = NULL; 479 list_elem = TAG_MAX; 480 } 481 body_elem = list_type == ' ' ? TAG_DD : TAG_LI; 482 483 switch (n->type) { 484 case ROFFT_BLOCK: 485 html_close_paragraph(h); 486 if (list_elem != TAG_MAX) 487 print_otag(h, list_elem, "c", list_class); 488 return 1; 489 case ROFFT_HEAD: 490 if (body_elem == TAG_LI) 491 return 0; 492 print_otag_id(h, TAG_DT, NULL, n); 493 break; 494 case ROFFT_BODY: 495 print_otag(h, body_elem, ""); 496 return 1; 497 default: 498 abort(); 499 } 500 switch(n->tok) { 501 case MAN_IP: /* Only print the first header element. */ 502 if (n->child != NULL) 503 print_man_node(man, n->child, h); 504 break; 505 case MAN_TP: /* Only print next-line header elements. */ 506 case MAN_TQ: 507 nn = n->child; 508 while (nn != NULL && (NODE_LINE & nn->flags) == 0) 509 nn = nn->next; 510 while (nn != NULL) { 511 print_man_node(man, nn, h); 512 nn = nn->next; 513 } 514 break; 515 default: 516 abort(); 517 } 518 return 0; 519 } 520 521 static int 522 man_MR_pre(MAN_ARGS) 523 { 524 struct tag *t; 525 const char *name, *section, *suffix; 526 char *label; 527 528 html_setfont(h, ESCAPE_FONTROMAN); 529 name = section = suffix = label = NULL; 530 if (n->child != NULL) { 531 name = n->child->string; 532 if (n->child->next != NULL) { 533 section = n->child->next->string; 534 mandoc_asprintf(&label, 535 "%s, section %s", name, section); 536 if (n->child->next->next != NULL) 537 suffix = n->child->next->next->string; 538 } 539 } 540 541 if (name != NULL && section != NULL && h->base_man1 != NULL) 542 t = print_otag(h, TAG_A, "chM?", "Xr", 543 name, section, "aria-label", label); 544 else 545 t = print_otag(h, TAG_A, "c?", "Xr", "aria-label", label); 546 547 free(label); 548 if (name != NULL) { 549 print_text(h, name); 550 h->flags |= HTML_NOSPACE; 551 } 552 print_text(h, "("); 553 h->flags |= HTML_NOSPACE; 554 if (section != NULL) { 555 print_text(h, section); 556 h->flags |= HTML_NOSPACE; 557 } 558 print_text(h, ")"); 559 print_tagq(h, t); 560 if (suffix != NULL) { 561 h->flags |= HTML_NOSPACE; 562 print_text(h, suffix); 563 } 564 return 0; 565 } 566 567 static int 568 man_OP_pre(MAN_ARGS) 569 { 570 struct tag *tt; 571 572 print_text(h, "["); 573 h->flags |= HTML_NOSPACE; 574 tt = print_otag(h, TAG_SPAN, "c", "Op"); 575 576 if ((n = n->child) != NULL) { 577 print_otag(h, TAG_B, ""); 578 print_text(h, n->string); 579 } 580 581 print_stagq(h, tt); 582 583 if (n != NULL && n->next != NULL) { 584 print_otag(h, TAG_I, ""); 585 print_text(h, n->next->string); 586 } 587 588 print_stagq(h, tt); 589 h->flags |= HTML_NOSPACE; 590 print_text(h, "]"); 591 return 0; 592 } 593 594 static int 595 man_B_pre(MAN_ARGS) 596 { 597 print_otag(h, TAG_B, ""); 598 return 1; 599 } 600 601 static int 602 man_I_pre(MAN_ARGS) 603 { 604 print_otag(h, TAG_I, ""); 605 return 1; 606 } 607 608 static int 609 man_in_pre(MAN_ARGS) 610 { 611 print_otag(h, TAG_BR, ""); 612 return 0; 613 } 614 615 static int 616 man_ign_pre(MAN_ARGS) 617 { 618 return 0; 619 } 620 621 static int 622 man_RS_pre(MAN_ARGS) 623 { 624 switch (n->type) { 625 case ROFFT_BLOCK: 626 html_close_paragraph(h); 627 break; 628 case ROFFT_HEAD: 629 return 0; 630 case ROFFT_BODY: 631 print_otag(h, TAG_DIV, "c", "Bd-indent"); 632 break; 633 default: 634 abort(); 635 } 636 return 1; 637 } 638 639 static int 640 man_SY_pre(MAN_ARGS) 641 { 642 switch (n->type) { 643 case ROFFT_BLOCK: 644 html_close_paragraph(h); 645 print_otag(h, TAG_TABLE, "c", "Nm"); 646 print_otag(h, TAG_TR, ""); 647 break; 648 case ROFFT_HEAD: 649 print_otag(h, TAG_TD, ""); 650 print_otag(h, TAG_CODE, "c", "Nm"); 651 break; 652 case ROFFT_BODY: 653 print_otag(h, TAG_TD, ""); 654 break; 655 default: 656 abort(); 657 } 658 return 1; 659 } 660 661 static int 662 man_UR_pre(MAN_ARGS) 663 { 664 char *cp; 665 666 n = n->child; 667 assert(n->type == ROFFT_HEAD); 668 if (n->child != NULL) { 669 assert(n->child->type == ROFFT_TEXT); 670 if (n->tok == MAN_MT) { 671 mandoc_asprintf(&cp, "mailto:%s", n->child->string); 672 print_otag(h, TAG_A, "ch", "Mt", cp); 673 free(cp); 674 } else 675 print_otag(h, TAG_A, "ch", "Lk", n->child->string); 676 } 677 678 assert(n->next->type == ROFFT_BODY); 679 if (n->next->child != NULL) 680 n = n->next; 681 682 print_man_nodelist(man, n->child, h); 683 return 0; 684 } 685