1 /* $OpenBSD: html.c,v 1.132 2019/09/05 13:34:55 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stddef.h> 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "out.h" 36 #include "html.h" 37 #include "manconf.h" 38 #include "main.h" 39 40 struct htmldata { 41 const char *name; 42 int flags; 43 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ 44 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ 45 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ 46 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ 47 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ 48 #define HTML_NLEND (1 << 5) /* Output line break before closing. */ 49 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ 50 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 51 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 52 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 53 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ 54 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ 55 }; 56 57 static const struct htmldata htmltags[TAG_MAX] = { 58 {"html", HTML_NLALL}, 59 {"head", HTML_NLALL | HTML_INDENT}, 60 {"meta", HTML_NOSTACK | HTML_NLALL}, 61 {"link", HTML_NOSTACK | HTML_NLALL}, 62 {"style", HTML_NLALL | HTML_INDENT}, 63 {"title", HTML_NLAROUND}, 64 {"body", HTML_NLALL}, 65 {"div", HTML_NLAROUND}, 66 {"section", HTML_NLALL}, 67 {"table", HTML_NLALL | HTML_INDENT}, 68 {"tr", HTML_NLALL | HTML_INDENT}, 69 {"td", HTML_NLAROUND}, 70 {"li", HTML_NLAROUND | HTML_INDENT}, 71 {"ul", HTML_NLALL | HTML_INDENT}, 72 {"ol", HTML_NLALL | HTML_INDENT}, 73 {"dl", HTML_NLALL | HTML_INDENT}, 74 {"dt", HTML_NLAROUND}, 75 {"dd", HTML_NLAROUND | HTML_INDENT}, 76 {"h1", HTML_TOPHRASE | HTML_NLAROUND}, 77 {"h2", HTML_TOPHRASE | HTML_NLAROUND}, 78 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, 79 {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT}, 80 {"a", HTML_INPHRASE | HTML_TOPHRASE}, 81 {"b", HTML_INPHRASE | HTML_TOPHRASE}, 82 {"cite", HTML_INPHRASE | HTML_TOPHRASE}, 83 {"code", HTML_INPHRASE | HTML_TOPHRASE}, 84 {"i", HTML_INPHRASE | HTML_TOPHRASE}, 85 {"small", HTML_INPHRASE | HTML_TOPHRASE}, 86 {"span", HTML_INPHRASE | HTML_TOPHRASE}, 87 {"var", HTML_INPHRASE | HTML_TOPHRASE}, 88 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, 89 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, 90 {"mrow", 0}, 91 {"mi", 0}, 92 {"mn", 0}, 93 {"mo", 0}, 94 {"msup", 0}, 95 {"msub", 0}, 96 {"msubsup", 0}, 97 {"mfrac", 0}, 98 {"msqrt", 0}, 99 {"mfenced", 0}, 100 {"mtable", 0}, 101 {"mtr", 0}, 102 {"mtd", 0}, 103 {"munderover", 0}, 104 {"munder", 0}, 105 {"mover", 0}, 106 }; 107 108 /* Avoid duplicate HTML id= attributes. */ 109 static struct ohash id_unique; 110 111 static void html_reset_internal(struct html *); 112 static void print_byte(struct html *, char); 113 static void print_endword(struct html *); 114 static void print_indent(struct html *); 115 static void print_word(struct html *, const char *); 116 117 static void print_ctag(struct html *, struct tag *); 118 static int print_escape(struct html *, char); 119 static int print_encode(struct html *, const char *, const char *, int); 120 static void print_href(struct html *, const char *, const char *, int); 121 static void print_metaf(struct html *); 122 123 124 void * 125 html_alloc(const struct manoutput *outopts) 126 { 127 struct html *h; 128 129 h = mandoc_calloc(1, sizeof(struct html)); 130 131 h->tag = NULL; 132 h->style = outopts->style; 133 if ((h->base_man1 = outopts->man) == NULL) 134 h->base_man2 = NULL; 135 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 136 *h->base_man2++ = '\0'; 137 h->base_includes = outopts->includes; 138 if (outopts->fragment) 139 h->oflags |= HTML_FRAGMENT; 140 if (outopts->toc) 141 h->oflags |= HTML_TOC; 142 143 mandoc_ohash_init(&id_unique, 4, 0); 144 145 return h; 146 } 147 148 static void 149 html_reset_internal(struct html *h) 150 { 151 struct tag *tag; 152 char *cp; 153 unsigned int slot; 154 155 while ((tag = h->tag) != NULL) { 156 h->tag = tag->next; 157 free(tag); 158 } 159 cp = ohash_first(&id_unique, &slot); 160 while (cp != NULL) { 161 free(cp); 162 cp = ohash_next(&id_unique, &slot); 163 } 164 ohash_delete(&id_unique); 165 } 166 167 void 168 html_reset(void *p) 169 { 170 html_reset_internal(p); 171 mandoc_ohash_init(&id_unique, 4, 0); 172 } 173 174 void 175 html_free(void *p) 176 { 177 html_reset_internal(p); 178 free(p); 179 } 180 181 void 182 print_gen_head(struct html *h) 183 { 184 struct tag *t; 185 186 print_otag(h, TAG_META, "?", "charset", "utf-8"); 187 if (h->style != NULL) { 188 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 189 h->style, "type", "text/css", "media", "all"); 190 return; 191 } 192 193 /* 194 * Print a minimal embedded style sheet. 195 */ 196 197 t = print_otag(h, TAG_STYLE, ""); 198 print_text(h, "table.head, table.foot { width: 100%; }"); 199 print_endline(h); 200 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 201 print_endline(h); 202 print_text(h, "td.head-vol { text-align: center; }"); 203 print_endline(h); 204 print_text(h, ".Nd, .Bf, .Op { display: inline; }"); 205 print_endline(h); 206 print_text(h, ".Pa, .Ad { font-style: italic; }"); 207 print_endline(h); 208 print_text(h, ".Ms { font-weight: bold; }"); 209 print_endline(h); 210 print_text(h, ".Bl-diag "); 211 print_byte(h, '>'); 212 print_text(h, " dt { font-weight: bold; }"); 213 print_endline(h); 214 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " 215 "{ font-weight: bold; font-family: inherit; }"); 216 print_tagq(h, t); 217 } 218 219 int 220 html_setfont(struct html *h, enum mandoc_esc font) 221 { 222 switch (font) { 223 case ESCAPE_FONTPREV: 224 font = h->metal; 225 break; 226 case ESCAPE_FONTITALIC: 227 case ESCAPE_FONTBOLD: 228 case ESCAPE_FONTBI: 229 case ESCAPE_FONTCW: 230 case ESCAPE_FONTROMAN: 231 break; 232 case ESCAPE_FONT: 233 font = ESCAPE_FONTROMAN; 234 break; 235 default: 236 return 0; 237 } 238 h->metal = h->metac; 239 h->metac = font; 240 return 1; 241 } 242 243 static void 244 print_metaf(struct html *h) 245 { 246 if (h->metaf) { 247 print_tagq(h, h->metaf); 248 h->metaf = NULL; 249 } 250 switch (h->metac) { 251 case ESCAPE_FONTITALIC: 252 h->metaf = print_otag(h, TAG_I, ""); 253 break; 254 case ESCAPE_FONTBOLD: 255 h->metaf = print_otag(h, TAG_B, ""); 256 break; 257 case ESCAPE_FONTBI: 258 h->metaf = print_otag(h, TAG_B, ""); 259 print_otag(h, TAG_I, ""); 260 break; 261 case ESCAPE_FONTCW: 262 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 263 break; 264 default: 265 break; 266 } 267 } 268 269 void 270 html_close_paragraph(struct html *h) 271 { 272 struct tag *this, *next; 273 int flags; 274 275 this = h->tag; 276 for (;;) { 277 next = this->next; 278 flags = htmltags[this->tag].flags; 279 if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) 280 print_ctag(h, this); 281 if ((flags & HTML_INPHRASE) == 0) 282 break; 283 this = next; 284 } 285 } 286 287 /* 288 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 289 * TOKEN_NONE does not switch. The old mode is returned. 290 */ 291 enum roff_tok 292 html_fillmode(struct html *h, enum roff_tok want) 293 { 294 struct tag *t; 295 enum roff_tok had; 296 297 for (t = h->tag; t != NULL; t = t->next) 298 if (t->tag == TAG_PRE) 299 break; 300 301 had = t == NULL ? ROFF_fi : ROFF_nf; 302 303 if (want != had) { 304 switch (want) { 305 case ROFF_fi: 306 print_tagq(h, t); 307 break; 308 case ROFF_nf: 309 html_close_paragraph(h); 310 print_otag(h, TAG_PRE, ""); 311 break; 312 case TOKEN_NONE: 313 break; 314 default: 315 abort(); 316 } 317 } 318 return had; 319 } 320 321 char * 322 html_make_id(const struct roff_node *n, int unique) 323 { 324 const struct roff_node *nch; 325 char *buf, *bufs, *cp; 326 unsigned int slot; 327 int suffix; 328 329 for (nch = n->child; nch != NULL; nch = nch->next) 330 if (nch->type != ROFFT_TEXT) 331 return NULL; 332 333 buf = NULL; 334 deroff(&buf, n); 335 if (buf == NULL) 336 return NULL; 337 338 /* 339 * In ID attributes, only use ASCII characters that are 340 * permitted in URL-fragment strings according to the 341 * explicit list at: 342 * https://url.spec.whatwg.org/#url-fragment-string 343 */ 344 345 for (cp = buf; *cp != '\0'; cp++) 346 if (isalnum((unsigned char)*cp) == 0 && 347 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL) 348 *cp = '_'; 349 350 if (unique == 0) 351 return buf; 352 353 /* Avoid duplicate HTML id= attributes. */ 354 355 bufs = NULL; 356 suffix = 1; 357 slot = ohash_qlookup(&id_unique, buf); 358 cp = ohash_find(&id_unique, slot); 359 if (cp != NULL) { 360 while (cp != NULL) { 361 free(bufs); 362 if (++suffix > 127) { 363 free(buf); 364 return NULL; 365 } 366 mandoc_asprintf(&bufs, "%s_%d", buf, suffix); 367 slot = ohash_qlookup(&id_unique, bufs); 368 cp = ohash_find(&id_unique, slot); 369 } 370 free(buf); 371 buf = bufs; 372 } 373 ohash_insert(&id_unique, slot, buf); 374 return buf; 375 } 376 377 static int 378 print_escape(struct html *h, char c) 379 { 380 381 switch (c) { 382 case '<': 383 print_word(h, "<"); 384 break; 385 case '>': 386 print_word(h, ">"); 387 break; 388 case '&': 389 print_word(h, "&"); 390 break; 391 case '"': 392 print_word(h, """); 393 break; 394 case ASCII_NBRSP: 395 print_word(h, " "); 396 break; 397 case ASCII_HYPH: 398 print_byte(h, '-'); 399 break; 400 case ASCII_BREAK: 401 break; 402 default: 403 return 0; 404 } 405 return 1; 406 } 407 408 static int 409 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 410 { 411 char numbuf[16]; 412 const char *seq; 413 size_t sz; 414 int c, len, breakline, nospace; 415 enum mandoc_esc esc; 416 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 417 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 418 419 if (pend == NULL) 420 pend = strchr(p, '\0'); 421 422 breakline = 0; 423 nospace = 0; 424 425 while (p < pend) { 426 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 427 h->flags &= ~HTML_SKIPCHAR; 428 p++; 429 continue; 430 } 431 432 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 433 print_byte(h, *p); 434 435 if (breakline && 436 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 437 print_otag(h, TAG_BR, ""); 438 breakline = 0; 439 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 440 p++; 441 continue; 442 } 443 444 if (p >= pend) 445 break; 446 447 if (*p == ' ') { 448 print_endword(h); 449 p++; 450 continue; 451 } 452 453 if (print_escape(h, *p++)) 454 continue; 455 456 esc = mandoc_escape(&p, &seq, &len); 457 switch (esc) { 458 case ESCAPE_FONT: 459 case ESCAPE_FONTPREV: 460 case ESCAPE_FONTBOLD: 461 case ESCAPE_FONTITALIC: 462 case ESCAPE_FONTBI: 463 case ESCAPE_FONTCW: 464 case ESCAPE_FONTROMAN: 465 if (0 == norecurse) { 466 h->flags |= HTML_NOSPACE; 467 if (html_setfont(h, esc)) 468 print_metaf(h); 469 h->flags &= ~HTML_NOSPACE; 470 } 471 continue; 472 case ESCAPE_SKIPCHAR: 473 h->flags |= HTML_SKIPCHAR; 474 continue; 475 case ESCAPE_ERROR: 476 continue; 477 default: 478 break; 479 } 480 481 if (h->flags & HTML_SKIPCHAR) { 482 h->flags &= ~HTML_SKIPCHAR; 483 continue; 484 } 485 486 switch (esc) { 487 case ESCAPE_UNICODE: 488 /* Skip past "u" header. */ 489 c = mchars_num2uc(seq + 1, len - 1); 490 break; 491 case ESCAPE_NUMBERED: 492 c = mchars_num2char(seq, len); 493 if (c < 0) 494 continue; 495 break; 496 case ESCAPE_SPECIAL: 497 c = mchars_spec2cp(seq, len); 498 if (c <= 0) 499 continue; 500 break; 501 case ESCAPE_UNDEF: 502 c = *seq; 503 break; 504 case ESCAPE_DEVICE: 505 print_word(h, "html"); 506 continue; 507 case ESCAPE_BREAK: 508 breakline = 1; 509 continue; 510 case ESCAPE_NOSPACE: 511 if ('\0' == *p) 512 nospace = 1; 513 continue; 514 case ESCAPE_OVERSTRIKE: 515 if (len == 0) 516 continue; 517 c = seq[len - 1]; 518 break; 519 default: 520 continue; 521 } 522 if ((c < 0x20 && c != 0x09) || 523 (c > 0x7E && c < 0xA0)) 524 c = 0xFFFD; 525 if (c > 0x7E) { 526 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 527 print_word(h, numbuf); 528 } else if (print_escape(h, c) == 0) 529 print_byte(h, c); 530 } 531 532 return nospace; 533 } 534 535 static void 536 print_href(struct html *h, const char *name, const char *sec, int man) 537 { 538 struct stat sb; 539 const char *p, *pp; 540 char *filename; 541 542 if (man) { 543 pp = h->base_man1; 544 if (h->base_man2 != NULL) { 545 mandoc_asprintf(&filename, "%s.%s", name, sec); 546 if (stat(filename, &sb) == -1) 547 pp = h->base_man2; 548 free(filename); 549 } 550 } else 551 pp = h->base_includes; 552 553 while ((p = strchr(pp, '%')) != NULL) { 554 print_encode(h, pp, p, 1); 555 if (man && p[1] == 'S') { 556 if (sec == NULL) 557 print_byte(h, '1'); 558 else 559 print_encode(h, sec, NULL, 1); 560 } else if ((man && p[1] == 'N') || 561 (man == 0 && p[1] == 'I')) 562 print_encode(h, name, NULL, 1); 563 else 564 print_encode(h, p, p + 2, 1); 565 pp = p + 2; 566 } 567 if (*pp != '\0') 568 print_encode(h, pp, NULL, 1); 569 } 570 571 struct tag * 572 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 573 { 574 va_list ap; 575 struct tag *t; 576 const char *attr; 577 char *arg1, *arg2; 578 int style_written, tflags; 579 580 tflags = htmltags[tag].flags; 581 582 /* Flow content is not allowed in phrasing context. */ 583 584 if ((tflags & HTML_INPHRASE) == 0) { 585 for (t = h->tag; t != NULL; t = t->next) { 586 if (t->closed) 587 continue; 588 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); 589 break; 590 } 591 592 /* 593 * Always wrap phrasing elements in a paragraph 594 * unless already contained in some flow container; 595 * never put them directly into a section. 596 */ 597 598 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) 599 print_otag(h, TAG_P, "c", "Pp"); 600 601 /* Push this tag onto the stack of open scopes. */ 602 603 if ((tflags & HTML_NOSTACK) == 0) { 604 t = mandoc_malloc(sizeof(struct tag)); 605 t->tag = tag; 606 t->next = h->tag; 607 t->refcnt = 0; 608 t->closed = 0; 609 h->tag = t; 610 } else 611 t = NULL; 612 613 if (tflags & HTML_NLBEFORE) 614 print_endline(h); 615 if (h->col == 0) 616 print_indent(h); 617 else if ((h->flags & HTML_NOSPACE) == 0) { 618 if (h->flags & HTML_KEEP) 619 print_word(h, " "); 620 else { 621 if (h->flags & HTML_PREKEEP) 622 h->flags |= HTML_KEEP; 623 print_endword(h); 624 } 625 } 626 627 if ( ! (h->flags & HTML_NONOSPACE)) 628 h->flags &= ~HTML_NOSPACE; 629 else 630 h->flags |= HTML_NOSPACE; 631 632 /* Print out the tag name and attributes. */ 633 634 print_byte(h, '<'); 635 print_word(h, htmltags[tag].name); 636 637 va_start(ap, fmt); 638 639 while (*fmt != '\0' && *fmt != 's') { 640 641 /* Parse attributes and arguments. */ 642 643 arg1 = va_arg(ap, char *); 644 arg2 = NULL; 645 switch (*fmt++) { 646 case 'c': 647 attr = "class"; 648 break; 649 case 'h': 650 attr = "href"; 651 break; 652 case 'i': 653 attr = "id"; 654 break; 655 case '?': 656 attr = arg1; 657 arg1 = va_arg(ap, char *); 658 break; 659 default: 660 abort(); 661 } 662 if (*fmt == 'M') 663 arg2 = va_arg(ap, char *); 664 if (arg1 == NULL) 665 continue; 666 667 /* Print the attributes. */ 668 669 print_byte(h, ' '); 670 print_word(h, attr); 671 print_byte(h, '='); 672 print_byte(h, '"'); 673 switch (*fmt) { 674 case 'I': 675 print_href(h, arg1, NULL, 0); 676 fmt++; 677 break; 678 case 'M': 679 print_href(h, arg1, arg2, 1); 680 fmt++; 681 break; 682 case 'R': 683 print_byte(h, '#'); 684 print_encode(h, arg1, NULL, 1); 685 fmt++; 686 break; 687 default: 688 print_encode(h, arg1, NULL, 1); 689 break; 690 } 691 print_byte(h, '"'); 692 } 693 694 style_written = 0; 695 while (*fmt++ == 's') { 696 arg1 = va_arg(ap, char *); 697 arg2 = va_arg(ap, char *); 698 if (arg2 == NULL) 699 continue; 700 print_byte(h, ' '); 701 if (style_written == 0) { 702 print_word(h, "style=\""); 703 style_written = 1; 704 } 705 print_word(h, arg1); 706 print_byte(h, ':'); 707 print_byte(h, ' '); 708 print_word(h, arg2); 709 print_byte(h, ';'); 710 } 711 if (style_written) 712 print_byte(h, '"'); 713 714 va_end(ap); 715 716 /* Accommodate for "well-formed" singleton escaping. */ 717 718 if (htmltags[tag].flags & HTML_NOSTACK) 719 print_byte(h, '/'); 720 721 print_byte(h, '>'); 722 723 if (tflags & HTML_NLBEGIN) 724 print_endline(h); 725 else 726 h->flags |= HTML_NOSPACE; 727 728 if (tflags & HTML_INDENT) 729 h->indent++; 730 if (tflags & HTML_NOINDENT) 731 h->noindent++; 732 733 return t; 734 } 735 736 static void 737 print_ctag(struct html *h, struct tag *tag) 738 { 739 int tflags; 740 741 if (tag->closed == 0) { 742 tag->closed = 1; 743 if (tag == h->metaf) 744 h->metaf = NULL; 745 if (tag == h->tblt) 746 h->tblt = NULL; 747 748 tflags = htmltags[tag->tag].flags; 749 if (tflags & HTML_INDENT) 750 h->indent--; 751 if (tflags & HTML_NOINDENT) 752 h->noindent--; 753 if (tflags & HTML_NLEND) 754 print_endline(h); 755 print_indent(h); 756 print_byte(h, '<'); 757 print_byte(h, '/'); 758 print_word(h, htmltags[tag->tag].name); 759 print_byte(h, '>'); 760 if (tflags & HTML_NLAFTER) 761 print_endline(h); 762 } 763 if (tag->refcnt == 0) { 764 h->tag = tag->next; 765 free(tag); 766 } 767 } 768 769 void 770 print_gen_decls(struct html *h) 771 { 772 print_word(h, "<!DOCTYPE html>"); 773 print_endline(h); 774 } 775 776 void 777 print_gen_comment(struct html *h, struct roff_node *n) 778 { 779 int wantblank; 780 781 print_word(h, "<!-- This is an automatically generated file." 782 " Do not edit."); 783 h->indent = 1; 784 wantblank = 0; 785 while (n != NULL && n->type == ROFFT_COMMENT) { 786 if (strstr(n->string, "-->") == NULL && 787 (wantblank || *n->string != '\0')) { 788 print_endline(h); 789 print_indent(h); 790 print_word(h, n->string); 791 wantblank = *n->string != '\0'; 792 } 793 n = n->next; 794 } 795 if (wantblank) 796 print_endline(h); 797 print_word(h, " -->"); 798 print_endline(h); 799 h->indent = 0; 800 } 801 802 void 803 print_text(struct html *h, const char *word) 804 { 805 /* 806 * Always wrap text in a paragraph unless already contained in 807 * some flow container; never put it directly into a section. 808 */ 809 810 if (h->tag->tag == TAG_SECTION) 811 print_otag(h, TAG_P, "c", "Pp"); 812 813 /* Output whitespace before this text? */ 814 815 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 816 if ( ! (HTML_KEEP & h->flags)) { 817 if (HTML_PREKEEP & h->flags) 818 h->flags |= HTML_KEEP; 819 print_endword(h); 820 } else 821 print_word(h, " "); 822 } 823 824 /* 825 * Print the text, optionally surrounded by HTML whitespace, 826 * optionally manually switching fonts before and after. 827 */ 828 829 assert(h->metaf == NULL); 830 print_metaf(h); 831 print_indent(h); 832 if ( ! print_encode(h, word, NULL, 0)) { 833 if ( ! (h->flags & HTML_NONOSPACE)) 834 h->flags &= ~HTML_NOSPACE; 835 h->flags &= ~HTML_NONEWLINE; 836 } else 837 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 838 839 if (h->metaf != NULL) { 840 print_tagq(h, h->metaf); 841 h->metaf = NULL; 842 } 843 844 h->flags &= ~HTML_IGNDELIM; 845 } 846 847 void 848 print_tagq(struct html *h, const struct tag *until) 849 { 850 struct tag *this, *next; 851 852 for (this = h->tag; this != NULL; this = next) { 853 next = this == until ? NULL : this->next; 854 print_ctag(h, this); 855 } 856 } 857 858 /* 859 * Close out all open elements up to but excluding suntil. 860 * Note that a paragraph just inside stays open together with it 861 * because paragraphs include subsequent phrasing content. 862 */ 863 void 864 print_stagq(struct html *h, const struct tag *suntil) 865 { 866 struct tag *this, *next; 867 868 for (this = h->tag; this != NULL; this = next) { 869 next = this->next; 870 if (this == suntil || (next == suntil && 871 (this->tag == TAG_P || this->tag == TAG_PRE))) 872 break; 873 print_ctag(h, this); 874 } 875 } 876 877 878 /*********************************************************************** 879 * Low level output functions. 880 * They implement line breaking using a short static buffer. 881 ***********************************************************************/ 882 883 /* 884 * Buffer one HTML output byte. 885 * If the buffer is full, flush and deactivate it and start a new line. 886 * If the buffer is inactive, print directly. 887 */ 888 static void 889 print_byte(struct html *h, char c) 890 { 891 if ((h->flags & HTML_BUFFER) == 0) { 892 putchar(c); 893 h->col++; 894 return; 895 } 896 897 if (h->col + h->bufcol < sizeof(h->buf)) { 898 h->buf[h->bufcol++] = c; 899 return; 900 } 901 902 putchar('\n'); 903 h->col = 0; 904 print_indent(h); 905 putchar(' '); 906 putchar(' '); 907 fwrite(h->buf, h->bufcol, 1, stdout); 908 putchar(c); 909 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 910 h->bufcol = 0; 911 h->flags &= ~HTML_BUFFER; 912 } 913 914 /* 915 * If something was printed on the current output line, end it. 916 * Not to be called right after print_indent(). 917 */ 918 void 919 print_endline(struct html *h) 920 { 921 if (h->col == 0) 922 return; 923 924 if (h->bufcol) { 925 putchar(' '); 926 fwrite(h->buf, h->bufcol, 1, stdout); 927 h->bufcol = 0; 928 } 929 putchar('\n'); 930 h->col = 0; 931 h->flags |= HTML_NOSPACE; 932 h->flags &= ~HTML_BUFFER; 933 } 934 935 /* 936 * Flush the HTML output buffer. 937 * If it is inactive, activate it. 938 */ 939 static void 940 print_endword(struct html *h) 941 { 942 if (h->noindent) { 943 print_byte(h, ' '); 944 return; 945 } 946 947 if ((h->flags & HTML_BUFFER) == 0) { 948 h->col++; 949 h->flags |= HTML_BUFFER; 950 } else if (h->bufcol) { 951 putchar(' '); 952 fwrite(h->buf, h->bufcol, 1, stdout); 953 h->col += h->bufcol + 1; 954 } 955 h->bufcol = 0; 956 } 957 958 /* 959 * If at the beginning of a new output line, 960 * perform indentation and mark the line as containing output. 961 * Make sure to really produce some output right afterwards, 962 * but do not use print_otag() for producing it. 963 */ 964 static void 965 print_indent(struct html *h) 966 { 967 size_t i; 968 969 if (h->col || h->noindent) 970 return; 971 972 h->col = h->indent * 2; 973 for (i = 0; i < h->col; i++) 974 putchar(' '); 975 } 976 977 /* 978 * Print or buffer some characters 979 * depending on the current HTML output buffer state. 980 */ 981 static void 982 print_word(struct html *h, const char *cp) 983 { 984 while (*cp != '\0') 985 print_byte(h, *cp++); 986 } 987