1 /* $OpenBSD: html.c,v 1.146 2021/09/09 14:45:18 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Common functions for mandoc(1) HTML formatters. 19 * For use by individual formatters and by the main program. 20 */ 21 #include <sys/types.h> 22 #include <sys/stat.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdarg.h> 27 #include <stddef.h> 28 #include <stdio.h> 29 #include <stdint.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc_ohash.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "out.h" 39 #include "html.h" 40 #include "manconf.h" 41 #include "main.h" 42 43 struct htmldata { 44 const char *name; 45 int flags; 46 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ 47 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ 48 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ 49 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ 50 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ 51 #define HTML_NLEND (1 << 5) /* Output line break before closing. */ 52 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ 53 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 54 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 55 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 56 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ 57 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ 58 }; 59 60 static const struct htmldata htmltags[TAG_MAX] = { 61 {"html", HTML_NLALL}, 62 {"head", HTML_NLALL | HTML_INDENT}, 63 {"meta", HTML_NOSTACK | HTML_NLALL}, 64 {"link", HTML_NOSTACK | HTML_NLALL}, 65 {"style", HTML_NLALL | HTML_INDENT}, 66 {"title", HTML_NLAROUND}, 67 {"body", HTML_NLALL}, 68 {"div", HTML_NLAROUND}, 69 {"section", HTML_NLALL}, 70 {"table", HTML_NLALL | HTML_INDENT}, 71 {"tr", HTML_NLALL | HTML_INDENT}, 72 {"td", HTML_NLAROUND}, 73 {"li", HTML_NLAROUND | HTML_INDENT}, 74 {"ul", HTML_NLALL | HTML_INDENT}, 75 {"ol", HTML_NLALL | HTML_INDENT}, 76 {"dl", HTML_NLALL | HTML_INDENT}, 77 {"dt", HTML_NLAROUND}, 78 {"dd", HTML_NLAROUND | HTML_INDENT}, 79 {"h1", HTML_TOPHRASE | HTML_NLAROUND}, 80 {"h2", HTML_TOPHRASE | HTML_NLAROUND}, 81 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, 82 {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT}, 83 {"a", HTML_INPHRASE | HTML_TOPHRASE}, 84 {"b", HTML_INPHRASE | HTML_TOPHRASE}, 85 {"cite", HTML_INPHRASE | HTML_TOPHRASE}, 86 {"code", HTML_INPHRASE | HTML_TOPHRASE}, 87 {"i", HTML_INPHRASE | HTML_TOPHRASE}, 88 {"small", HTML_INPHRASE | HTML_TOPHRASE}, 89 {"span", HTML_INPHRASE | HTML_TOPHRASE}, 90 {"var", HTML_INPHRASE | HTML_TOPHRASE}, 91 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, 92 {"hr", HTML_INPHRASE | HTML_NOSTACK}, 93 {"mark", HTML_INPHRASE }, 94 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, 95 {"mrow", 0}, 96 {"mi", 0}, 97 {"mn", 0}, 98 {"mo", 0}, 99 {"msup", 0}, 100 {"msub", 0}, 101 {"msubsup", 0}, 102 {"mfrac", 0}, 103 {"msqrt", 0}, 104 {"mfenced", 0}, 105 {"mtable", 0}, 106 {"mtr", 0}, 107 {"mtd", 0}, 108 {"munderover", 0}, 109 {"munder", 0}, 110 {"mover", 0}, 111 }; 112 113 /* Avoid duplicate HTML id= attributes. */ 114 115 struct id_entry { 116 int ord; /* Ordinal number of the latest occurrence. */ 117 char id[]; /* The id= attribute without any ordinal suffix. */ 118 }; 119 static struct ohash id_unique; 120 121 static void html_reset_internal(struct html *); 122 static void print_byte(struct html *, char); 123 static void print_endword(struct html *); 124 static void print_indent(struct html *); 125 static void print_word(struct html *, const char *); 126 127 static void print_ctag(struct html *, struct tag *); 128 static int print_escape(struct html *, char); 129 static int print_encode(struct html *, const char *, const char *, int); 130 static void print_href(struct html *, const char *, const char *, int); 131 static void print_metaf(struct html *); 132 133 134 void * 135 html_alloc(const struct manoutput *outopts) 136 { 137 struct html *h; 138 139 h = mandoc_calloc(1, sizeof(struct html)); 140 141 h->tag = NULL; 142 h->metac = h->metal = ESCAPE_FONTROMAN; 143 h->style = outopts->style; 144 if ((h->base_man1 = outopts->man) == NULL) 145 h->base_man2 = NULL; 146 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 147 *h->base_man2++ = '\0'; 148 h->base_includes = outopts->includes; 149 if (outopts->fragment) 150 h->oflags |= HTML_FRAGMENT; 151 if (outopts->toc) 152 h->oflags |= HTML_TOC; 153 154 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 155 156 return h; 157 } 158 159 static void 160 html_reset_internal(struct html *h) 161 { 162 struct tag *tag; 163 struct id_entry *entry; 164 unsigned int slot; 165 166 while ((tag = h->tag) != NULL) { 167 h->tag = tag->next; 168 free(tag); 169 } 170 entry = ohash_first(&id_unique, &slot); 171 while (entry != NULL) { 172 free(entry); 173 entry = ohash_next(&id_unique, &slot); 174 } 175 ohash_delete(&id_unique); 176 } 177 178 void 179 html_reset(void *p) 180 { 181 html_reset_internal(p); 182 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 183 } 184 185 void 186 html_free(void *p) 187 { 188 html_reset_internal(p); 189 free(p); 190 } 191 192 void 193 print_gen_head(struct html *h) 194 { 195 struct tag *t; 196 197 print_otag(h, TAG_META, "?", "charset", "utf-8"); 198 print_otag(h, TAG_META, "??", "name", "viewport", 199 "content", "width=device-width, initial-scale=1.0"); 200 if (h->style != NULL) { 201 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 202 h->style, "type", "text/css", "media", "all"); 203 return; 204 } 205 206 /* 207 * Print a minimal embedded style sheet. 208 */ 209 210 t = print_otag(h, TAG_STYLE, ""); 211 print_text(h, "table.head, table.foot { width: 100%; }"); 212 print_endline(h); 213 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 214 print_endline(h); 215 print_text(h, "td.head-vol { text-align: center; }"); 216 print_endline(h); 217 print_text(h, ".Nd, .Bf, .Op { display: inline; }"); 218 print_endline(h); 219 print_text(h, ".Pa, .Ad { font-style: italic; }"); 220 print_endline(h); 221 print_text(h, ".Ms { font-weight: bold; }"); 222 print_endline(h); 223 print_text(h, ".Bl-diag "); 224 print_byte(h, '>'); 225 print_text(h, " dt { font-weight: bold; }"); 226 print_endline(h); 227 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " 228 "{ font-weight: bold; font-family: inherit; }"); 229 print_tagq(h, t); 230 } 231 232 int 233 html_setfont(struct html *h, enum mandoc_esc font) 234 { 235 switch (font) { 236 case ESCAPE_FONTPREV: 237 font = h->metal; 238 break; 239 case ESCAPE_FONTITALIC: 240 case ESCAPE_FONTBOLD: 241 case ESCAPE_FONTBI: 242 case ESCAPE_FONTROMAN: 243 case ESCAPE_FONTCR: 244 case ESCAPE_FONTCB: 245 case ESCAPE_FONTCI: 246 break; 247 case ESCAPE_FONT: 248 font = ESCAPE_FONTROMAN; 249 break; 250 default: 251 return 0; 252 } 253 h->metal = h->metac; 254 h->metac = font; 255 return 1; 256 } 257 258 static void 259 print_metaf(struct html *h) 260 { 261 if (h->metaf) { 262 print_tagq(h, h->metaf); 263 h->metaf = NULL; 264 } 265 switch (h->metac) { 266 case ESCAPE_FONTITALIC: 267 h->metaf = print_otag(h, TAG_I, ""); 268 break; 269 case ESCAPE_FONTBOLD: 270 h->metaf = print_otag(h, TAG_B, ""); 271 break; 272 case ESCAPE_FONTBI: 273 h->metaf = print_otag(h, TAG_B, ""); 274 print_otag(h, TAG_I, ""); 275 break; 276 case ESCAPE_FONTCR: 277 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 278 break; 279 case ESCAPE_FONTCB: 280 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 281 print_otag(h, TAG_B, ""); 282 break; 283 case ESCAPE_FONTCI: 284 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 285 print_otag(h, TAG_I, ""); 286 break; 287 default: 288 break; 289 } 290 } 291 292 void 293 html_close_paragraph(struct html *h) 294 { 295 struct tag *this, *next; 296 int flags; 297 298 this = h->tag; 299 for (;;) { 300 next = this->next; 301 flags = htmltags[this->tag].flags; 302 if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) 303 print_ctag(h, this); 304 if ((flags & HTML_INPHRASE) == 0) 305 break; 306 this = next; 307 } 308 } 309 310 /* 311 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 312 * TOKEN_NONE does not switch. The old mode is returned. 313 */ 314 enum roff_tok 315 html_fillmode(struct html *h, enum roff_tok want) 316 { 317 struct tag *t; 318 enum roff_tok had; 319 320 for (t = h->tag; t != NULL; t = t->next) 321 if (t->tag == TAG_PRE) 322 break; 323 324 had = t == NULL ? ROFF_fi : ROFF_nf; 325 326 if (want != had) { 327 switch (want) { 328 case ROFF_fi: 329 print_tagq(h, t); 330 break; 331 case ROFF_nf: 332 html_close_paragraph(h); 333 print_otag(h, TAG_PRE, ""); 334 break; 335 case TOKEN_NONE: 336 break; 337 default: 338 abort(); 339 } 340 } 341 return had; 342 } 343 344 /* 345 * Allocate a string to be used for the "id=" attribute of an HTML 346 * element and/or as a segment identifier for a URI in an <a> element. 347 * The function may fail and return NULL if the node lacks text data 348 * to create the attribute from. 349 * The caller is responsible for free(3)ing the returned string. 350 * 351 * If the "unique" argument is non-zero, the "id_unique" ohash table 352 * is used for de-duplication. If the "unique" argument is 1, 353 * it is the first time the function is called for this tag and 354 * location, so if an ordinal suffix is needed, it is incremented. 355 * If the "unique" argument is 2, it is the second time the function 356 * is called for this tag and location, so the ordinal suffix 357 * remains unchanged. 358 */ 359 char * 360 html_make_id(const struct roff_node *n, int unique) 361 { 362 const struct roff_node *nch; 363 struct id_entry *entry; 364 char *buf, *cp; 365 size_t len; 366 unsigned int slot; 367 368 if (n->tag != NULL) 369 buf = mandoc_strdup(n->tag); 370 else { 371 switch (n->tok) { 372 case MDOC_Sh: 373 case MDOC_Ss: 374 case MDOC_Sx: 375 case MAN_SH: 376 case MAN_SS: 377 for (nch = n->child; nch != NULL; nch = nch->next) 378 if (nch->type != ROFFT_TEXT) 379 return NULL; 380 buf = NULL; 381 deroff(&buf, n); 382 if (buf == NULL) 383 return NULL; 384 break; 385 default: 386 if (n->child == NULL || n->child->type != ROFFT_TEXT) 387 return NULL; 388 buf = mandoc_strdup(n->child->string); 389 break; 390 } 391 } 392 393 /* 394 * In ID attributes, only use ASCII characters that are 395 * permitted in URL-fragment strings according to the 396 * explicit list at: 397 * https://url.spec.whatwg.org/#url-fragment-string 398 * In addition, reserve '~' for ordinal suffixes. 399 */ 400 401 for (cp = buf; *cp != '\0'; cp++) 402 if (isalnum((unsigned char)*cp) == 0 && 403 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL) 404 *cp = '_'; 405 406 if (unique == 0) 407 return buf; 408 409 /* Avoid duplicate HTML id= attributes. */ 410 411 slot = ohash_qlookup(&id_unique, buf); 412 if ((entry = ohash_find(&id_unique, slot)) == NULL) { 413 len = strlen(buf) + 1; 414 entry = mandoc_malloc(sizeof(*entry) + len); 415 entry->ord = 1; 416 memcpy(entry->id, buf, len); 417 ohash_insert(&id_unique, slot, entry); 418 } else if (unique == 1) 419 entry->ord++; 420 421 if (entry->ord > 1) { 422 cp = buf; 423 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord); 424 free(cp); 425 } 426 return buf; 427 } 428 429 static int 430 print_escape(struct html *h, char c) 431 { 432 433 switch (c) { 434 case '<': 435 print_word(h, "<"); 436 break; 437 case '>': 438 print_word(h, ">"); 439 break; 440 case '&': 441 print_word(h, "&"); 442 break; 443 case '"': 444 print_word(h, """); 445 break; 446 case ASCII_NBRSP: 447 print_word(h, " "); 448 break; 449 case ASCII_HYPH: 450 print_byte(h, '-'); 451 break; 452 case ASCII_BREAK: 453 break; 454 default: 455 return 0; 456 } 457 return 1; 458 } 459 460 static int 461 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 462 { 463 char numbuf[16]; 464 const char *seq; 465 size_t sz; 466 int c, len, breakline, nospace; 467 enum mandoc_esc esc; 468 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 469 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 470 471 if (pend == NULL) 472 pend = strchr(p, '\0'); 473 474 breakline = 0; 475 nospace = 0; 476 477 while (p < pend) { 478 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 479 h->flags &= ~HTML_SKIPCHAR; 480 p++; 481 continue; 482 } 483 484 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 485 print_byte(h, *p); 486 487 if (breakline && 488 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 489 print_otag(h, TAG_BR, ""); 490 breakline = 0; 491 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 492 p++; 493 continue; 494 } 495 496 if (p >= pend) 497 break; 498 499 if (*p == ' ') { 500 print_endword(h); 501 p++; 502 continue; 503 } 504 505 if (print_escape(h, *p++)) 506 continue; 507 508 esc = mandoc_escape(&p, &seq, &len); 509 switch (esc) { 510 case ESCAPE_FONT: 511 case ESCAPE_FONTPREV: 512 case ESCAPE_FONTBOLD: 513 case ESCAPE_FONTITALIC: 514 case ESCAPE_FONTBI: 515 case ESCAPE_FONTROMAN: 516 case ESCAPE_FONTCR: 517 case ESCAPE_FONTCB: 518 case ESCAPE_FONTCI: 519 if (0 == norecurse) { 520 h->flags |= HTML_NOSPACE; 521 if (html_setfont(h, esc)) 522 print_metaf(h); 523 h->flags &= ~HTML_NOSPACE; 524 } 525 continue; 526 case ESCAPE_SKIPCHAR: 527 h->flags |= HTML_SKIPCHAR; 528 continue; 529 case ESCAPE_ERROR: 530 continue; 531 default: 532 break; 533 } 534 535 if (h->flags & HTML_SKIPCHAR) { 536 h->flags &= ~HTML_SKIPCHAR; 537 continue; 538 } 539 540 switch (esc) { 541 case ESCAPE_UNICODE: 542 /* Skip past "u" header. */ 543 c = mchars_num2uc(seq + 1, len - 1); 544 break; 545 case ESCAPE_NUMBERED: 546 c = mchars_num2char(seq, len); 547 if (c < 0) 548 continue; 549 break; 550 case ESCAPE_SPECIAL: 551 c = mchars_spec2cp(seq, len); 552 if (c <= 0) 553 continue; 554 break; 555 case ESCAPE_UNDEF: 556 c = *seq; 557 break; 558 case ESCAPE_DEVICE: 559 print_word(h, "html"); 560 continue; 561 case ESCAPE_BREAK: 562 breakline = 1; 563 continue; 564 case ESCAPE_NOSPACE: 565 if ('\0' == *p) 566 nospace = 1; 567 continue; 568 case ESCAPE_OVERSTRIKE: 569 if (len == 0) 570 continue; 571 c = seq[len - 1]; 572 break; 573 default: 574 continue; 575 } 576 if ((c < 0x20 && c != 0x09) || 577 (c > 0x7E && c < 0xA0)) 578 c = 0xFFFD; 579 if (c > 0x7E) { 580 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 581 print_word(h, numbuf); 582 } else if (print_escape(h, c) == 0) 583 print_byte(h, c); 584 } 585 586 return nospace; 587 } 588 589 static void 590 print_href(struct html *h, const char *name, const char *sec, int man) 591 { 592 struct stat sb; 593 const char *p, *pp; 594 char *filename; 595 596 if (man) { 597 pp = h->base_man1; 598 if (h->base_man2 != NULL) { 599 mandoc_asprintf(&filename, "%s.%s", name, sec); 600 if (stat(filename, &sb) == -1) 601 pp = h->base_man2; 602 free(filename); 603 } 604 } else 605 pp = h->base_includes; 606 607 while ((p = strchr(pp, '%')) != NULL) { 608 print_encode(h, pp, p, 1); 609 if (man && p[1] == 'S') { 610 if (sec == NULL) 611 print_byte(h, '1'); 612 else 613 print_encode(h, sec, NULL, 1); 614 } else if ((man && p[1] == 'N') || 615 (man == 0 && p[1] == 'I')) 616 print_encode(h, name, NULL, 1); 617 else 618 print_encode(h, p, p + 2, 1); 619 pp = p + 2; 620 } 621 if (*pp != '\0') 622 print_encode(h, pp, NULL, 1); 623 } 624 625 struct tag * 626 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 627 { 628 va_list ap; 629 struct tag *t; 630 const char *attr; 631 char *arg1, *arg2; 632 int style_written, tflags; 633 634 tflags = htmltags[tag].flags; 635 636 /* Flow content is not allowed in phrasing context. */ 637 638 if ((tflags & HTML_INPHRASE) == 0) { 639 for (t = h->tag; t != NULL; t = t->next) { 640 if (t->closed) 641 continue; 642 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); 643 break; 644 } 645 646 /* 647 * Always wrap phrasing elements in a paragraph 648 * unless already contained in some flow container; 649 * never put them directly into a section. 650 */ 651 652 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) 653 print_otag(h, TAG_P, "c", "Pp"); 654 655 /* Push this tag onto the stack of open scopes. */ 656 657 if ((tflags & HTML_NOSTACK) == 0) { 658 t = mandoc_malloc(sizeof(struct tag)); 659 t->tag = tag; 660 t->next = h->tag; 661 t->refcnt = 0; 662 t->closed = 0; 663 h->tag = t; 664 } else 665 t = NULL; 666 667 if (tflags & HTML_NLBEFORE) 668 print_endline(h); 669 if (h->col == 0) 670 print_indent(h); 671 else if ((h->flags & HTML_NOSPACE) == 0) { 672 if (h->flags & HTML_KEEP) 673 print_word(h, " "); 674 else { 675 if (h->flags & HTML_PREKEEP) 676 h->flags |= HTML_KEEP; 677 print_endword(h); 678 } 679 } 680 681 if ( ! (h->flags & HTML_NONOSPACE)) 682 h->flags &= ~HTML_NOSPACE; 683 else 684 h->flags |= HTML_NOSPACE; 685 686 /* Print out the tag name and attributes. */ 687 688 print_byte(h, '<'); 689 print_word(h, htmltags[tag].name); 690 691 va_start(ap, fmt); 692 693 while (*fmt != '\0' && *fmt != 's') { 694 695 /* Parse attributes and arguments. */ 696 697 arg1 = va_arg(ap, char *); 698 arg2 = NULL; 699 switch (*fmt++) { 700 case 'c': 701 attr = "class"; 702 break; 703 case 'h': 704 attr = "href"; 705 break; 706 case 'i': 707 attr = "id"; 708 break; 709 case '?': 710 attr = arg1; 711 arg1 = va_arg(ap, char *); 712 break; 713 default: 714 abort(); 715 } 716 if (*fmt == 'M') 717 arg2 = va_arg(ap, char *); 718 if (arg1 == NULL) 719 continue; 720 721 /* Print the attributes. */ 722 723 print_byte(h, ' '); 724 print_word(h, attr); 725 print_byte(h, '='); 726 print_byte(h, '"'); 727 switch (*fmt) { 728 case 'I': 729 print_href(h, arg1, NULL, 0); 730 fmt++; 731 break; 732 case 'M': 733 print_href(h, arg1, arg2, 1); 734 fmt++; 735 break; 736 case 'R': 737 print_byte(h, '#'); 738 print_encode(h, arg1, NULL, 1); 739 fmt++; 740 break; 741 default: 742 print_encode(h, arg1, NULL, 1); 743 break; 744 } 745 print_byte(h, '"'); 746 } 747 748 style_written = 0; 749 while (*fmt++ == 's') { 750 arg1 = va_arg(ap, char *); 751 arg2 = va_arg(ap, char *); 752 if (arg2 == NULL) 753 continue; 754 print_byte(h, ' '); 755 if (style_written == 0) { 756 print_word(h, "style=\""); 757 style_written = 1; 758 } 759 print_word(h, arg1); 760 print_byte(h, ':'); 761 print_byte(h, ' '); 762 print_word(h, arg2); 763 print_byte(h, ';'); 764 } 765 if (style_written) 766 print_byte(h, '"'); 767 768 va_end(ap); 769 770 /* Accommodate for "well-formed" singleton escaping. */ 771 772 if (htmltags[tag].flags & HTML_NOSTACK) 773 print_byte(h, '/'); 774 775 print_byte(h, '>'); 776 777 if (tflags & HTML_NLBEGIN) 778 print_endline(h); 779 else 780 h->flags |= HTML_NOSPACE; 781 782 if (tflags & HTML_INDENT) 783 h->indent++; 784 if (tflags & HTML_NOINDENT) 785 h->noindent++; 786 787 return t; 788 } 789 790 /* 791 * Print an element with an optional "id=" attribute. 792 * If the element has phrasing content and an "id=" attribute, 793 * also add a permalink: outside if it can be in phrasing context, 794 * inside otherwise. 795 */ 796 struct tag * 797 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, 798 struct roff_node *n) 799 { 800 struct roff_node *nch; 801 struct tag *ret, *t; 802 char *id, *href; 803 804 ret = NULL; 805 id = href = NULL; 806 if (n->flags & NODE_ID) 807 id = html_make_id(n, 1); 808 if (n->flags & NODE_HREF) 809 href = id == NULL ? html_make_id(n, 2) : id; 810 if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE) 811 ret = print_otag(h, TAG_A, "chR", "permalink", href); 812 t = print_otag(h, elemtype, "ci", cattr, id); 813 if (ret == NULL) { 814 ret = t; 815 if (href != NULL && (nch = n->child) != NULL) { 816 /* man(7) is safe, it tags phrasing content only. */ 817 if (n->tok > MDOC_MAX || 818 htmltags[elemtype].flags & HTML_TOPHRASE) 819 nch = NULL; 820 else /* For mdoc(7), beware of nested blocks. */ 821 while (nch != NULL && nch->type == ROFFT_TEXT) 822 nch = nch->next; 823 if (nch == NULL) 824 print_otag(h, TAG_A, "chR", "permalink", href); 825 } 826 } 827 free(id); 828 if (id == NULL) 829 free(href); 830 return ret; 831 } 832 833 static void 834 print_ctag(struct html *h, struct tag *tag) 835 { 836 int tflags; 837 838 if (tag->closed == 0) { 839 tag->closed = 1; 840 if (tag == h->metaf) 841 h->metaf = NULL; 842 if (tag == h->tblt) 843 h->tblt = NULL; 844 845 tflags = htmltags[tag->tag].flags; 846 if (tflags & HTML_INDENT) 847 h->indent--; 848 if (tflags & HTML_NOINDENT) 849 h->noindent--; 850 if (tflags & HTML_NLEND) 851 print_endline(h); 852 print_indent(h); 853 print_byte(h, '<'); 854 print_byte(h, '/'); 855 print_word(h, htmltags[tag->tag].name); 856 print_byte(h, '>'); 857 if (tflags & HTML_NLAFTER) 858 print_endline(h); 859 } 860 if (tag->refcnt == 0) { 861 h->tag = tag->next; 862 free(tag); 863 } 864 } 865 866 void 867 print_gen_decls(struct html *h) 868 { 869 print_word(h, "<!DOCTYPE html>"); 870 print_endline(h); 871 } 872 873 void 874 print_gen_comment(struct html *h, struct roff_node *n) 875 { 876 int wantblank; 877 878 print_word(h, "<!-- This is an automatically generated file." 879 " Do not edit."); 880 h->indent = 1; 881 wantblank = 0; 882 while (n != NULL && n->type == ROFFT_COMMENT) { 883 if (strstr(n->string, "-->") == NULL && 884 (wantblank || *n->string != '\0')) { 885 print_endline(h); 886 print_indent(h); 887 print_word(h, n->string); 888 wantblank = *n->string != '\0'; 889 } 890 n = n->next; 891 } 892 if (wantblank) 893 print_endline(h); 894 print_word(h, " -->"); 895 print_endline(h); 896 h->indent = 0; 897 } 898 899 void 900 print_text(struct html *h, const char *word) 901 { 902 print_tagged_text(h, word, NULL); 903 } 904 905 void 906 print_tagged_text(struct html *h, const char *word, struct roff_node *n) 907 { 908 struct tag *t; 909 char *href; 910 911 /* 912 * Always wrap text in a paragraph unless already contained in 913 * some flow container; never put it directly into a section. 914 */ 915 916 if (h->tag->tag == TAG_SECTION) 917 print_otag(h, TAG_P, "c", "Pp"); 918 919 /* Output whitespace before this text? */ 920 921 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 922 if ( ! (HTML_KEEP & h->flags)) { 923 if (HTML_PREKEEP & h->flags) 924 h->flags |= HTML_KEEP; 925 print_endword(h); 926 } else 927 print_word(h, " "); 928 } 929 930 /* 931 * Optionally switch fonts, optionally write a permalink, then 932 * print the text, optionally surrounded by HTML whitespace. 933 */ 934 935 assert(h->metaf == NULL); 936 print_metaf(h); 937 print_indent(h); 938 939 if (n != NULL && (href = html_make_id(n, 2)) != NULL) { 940 t = print_otag(h, TAG_A, "chR", "permalink", href); 941 free(href); 942 } else 943 t = NULL; 944 945 if ( ! print_encode(h, word, NULL, 0)) { 946 if ( ! (h->flags & HTML_NONOSPACE)) 947 h->flags &= ~HTML_NOSPACE; 948 h->flags &= ~HTML_NONEWLINE; 949 } else 950 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 951 952 if (h->metaf != NULL) { 953 print_tagq(h, h->metaf); 954 h->metaf = NULL; 955 } else if (t != NULL) 956 print_tagq(h, t); 957 958 h->flags &= ~HTML_IGNDELIM; 959 } 960 961 void 962 print_tagq(struct html *h, const struct tag *until) 963 { 964 struct tag *this, *next; 965 966 for (this = h->tag; this != NULL; this = next) { 967 next = this == until ? NULL : this->next; 968 print_ctag(h, this); 969 } 970 } 971 972 /* 973 * Close out all open elements up to but excluding suntil. 974 * Note that a paragraph just inside stays open together with it 975 * because paragraphs include subsequent phrasing content. 976 */ 977 void 978 print_stagq(struct html *h, const struct tag *suntil) 979 { 980 struct tag *this, *next; 981 982 for (this = h->tag; this != NULL; this = next) { 983 next = this->next; 984 if (this == suntil || (next == suntil && 985 (this->tag == TAG_P || this->tag == TAG_PRE))) 986 break; 987 print_ctag(h, this); 988 } 989 } 990 991 992 /*********************************************************************** 993 * Low level output functions. 994 * They implement line breaking using a short static buffer. 995 ***********************************************************************/ 996 997 /* 998 * Buffer one HTML output byte. 999 * If the buffer is full, flush and deactivate it and start a new line. 1000 * If the buffer is inactive, print directly. 1001 */ 1002 static void 1003 print_byte(struct html *h, char c) 1004 { 1005 if ((h->flags & HTML_BUFFER) == 0) { 1006 putchar(c); 1007 h->col++; 1008 return; 1009 } 1010 1011 if (h->col + h->bufcol < sizeof(h->buf)) { 1012 h->buf[h->bufcol++] = c; 1013 return; 1014 } 1015 1016 putchar('\n'); 1017 h->col = 0; 1018 print_indent(h); 1019 putchar(' '); 1020 putchar(' '); 1021 fwrite(h->buf, h->bufcol, 1, stdout); 1022 putchar(c); 1023 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 1024 h->bufcol = 0; 1025 h->flags &= ~HTML_BUFFER; 1026 } 1027 1028 /* 1029 * If something was printed on the current output line, end it. 1030 * Not to be called right after print_indent(). 1031 */ 1032 void 1033 print_endline(struct html *h) 1034 { 1035 if (h->col == 0) 1036 return; 1037 1038 if (h->bufcol) { 1039 putchar(' '); 1040 fwrite(h->buf, h->bufcol, 1, stdout); 1041 h->bufcol = 0; 1042 } 1043 putchar('\n'); 1044 h->col = 0; 1045 h->flags |= HTML_NOSPACE; 1046 h->flags &= ~HTML_BUFFER; 1047 } 1048 1049 /* 1050 * Flush the HTML output buffer. 1051 * If it is inactive, activate it. 1052 */ 1053 static void 1054 print_endword(struct html *h) 1055 { 1056 if (h->noindent) { 1057 print_byte(h, ' '); 1058 return; 1059 } 1060 1061 if ((h->flags & HTML_BUFFER) == 0) { 1062 h->col++; 1063 h->flags |= HTML_BUFFER; 1064 } else if (h->bufcol) { 1065 putchar(' '); 1066 fwrite(h->buf, h->bufcol, 1, stdout); 1067 h->col += h->bufcol + 1; 1068 } 1069 h->bufcol = 0; 1070 } 1071 1072 /* 1073 * If at the beginning of a new output line, 1074 * perform indentation and mark the line as containing output. 1075 * Make sure to really produce some output right afterwards, 1076 * but do not use print_otag() for producing it. 1077 */ 1078 static void 1079 print_indent(struct html *h) 1080 { 1081 size_t i; 1082 1083 if (h->col || h->noindent) 1084 return; 1085 1086 h->col = h->indent * 2; 1087 for (i = 0; i < h->col; i++) 1088 putchar(' '); 1089 } 1090 1091 /* 1092 * Print or buffer some characters 1093 * depending on the current HTML output buffer state. 1094 */ 1095 static void 1096 print_word(struct html *h, const char *cp) 1097 { 1098 while (*cp != '\0') 1099 print_byte(h, *cp++); 1100 } 1101