1 /* $OpenBSD: html.c,v 1.56 2015/03/27 21:17:16 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdint.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "out.h" 32 #include "html.h" 33 #include "manconf.h" 34 #include "main.h" 35 36 struct htmldata { 37 const char *name; 38 int flags; 39 #define HTML_CLRLINE (1 << 0) 40 #define HTML_NOSTACK (1 << 1) 41 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 42 }; 43 44 static const struct htmldata htmltags[TAG_MAX] = { 45 {"html", HTML_CLRLINE}, /* TAG_HTML */ 46 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 47 {"body", HTML_CLRLINE}, /* TAG_BODY */ 48 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 49 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 50 {"div", HTML_CLRLINE}, /* TAG_DIV */ 51 {"h1", 0}, /* TAG_H1 */ 52 {"h2", 0}, /* TAG_H2 */ 53 {"span", 0}, /* TAG_SPAN */ 54 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ 55 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 56 {"a", 0}, /* TAG_A */ 57 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 58 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ 59 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 60 {"tr", HTML_CLRLINE}, /* TAG_TR */ 61 {"td", HTML_CLRLINE}, /* TAG_TD */ 62 {"li", HTML_CLRLINE}, /* TAG_LI */ 63 {"ul", HTML_CLRLINE}, /* TAG_UL */ 64 {"ol", HTML_CLRLINE}, /* TAG_OL */ 65 {"dl", HTML_CLRLINE}, /* TAG_DL */ 66 {"dt", HTML_CLRLINE}, /* TAG_DT */ 67 {"dd", HTML_CLRLINE}, /* TAG_DD */ 68 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ 69 {"pre", HTML_CLRLINE }, /* TAG_PRE */ 70 {"b", 0 }, /* TAG_B */ 71 {"i", 0 }, /* TAG_I */ 72 {"code", 0 }, /* TAG_CODE */ 73 {"small", 0 }, /* TAG_SMALL */ 74 {"style", HTML_CLRLINE}, /* TAG_STYLE */ 75 {"math", HTML_CLRLINE}, /* TAG_MATH */ 76 {"mrow", 0}, /* TAG_MROW */ 77 {"mi", 0}, /* TAG_MI */ 78 {"mo", 0}, /* TAG_MO */ 79 {"msup", 0}, /* TAG_MSUP */ 80 {"msub", 0}, /* TAG_MSUB */ 81 {"msubsup", 0}, /* TAG_MSUBSUP */ 82 {"mfrac", 0}, /* TAG_MFRAC */ 83 {"msqrt", 0}, /* TAG_MSQRT */ 84 {"mfenced", 0}, /* TAG_MFENCED */ 85 {"mtable", 0}, /* TAG_MTABLE */ 86 {"mtr", 0}, /* TAG_MTR */ 87 {"mtd", 0}, /* TAG_MTD */ 88 {"munderover", 0}, /* TAG_MUNDEROVER */ 89 {"munder", 0}, /* TAG_MUNDER*/ 90 {"mover", 0}, /* TAG_MOVER*/ 91 }; 92 93 static const char *const htmlattrs[ATTR_MAX] = { 94 "name", /* ATTR_NAME */ 95 "rel", /* ATTR_REL */ 96 "href", /* ATTR_HREF */ 97 "type", /* ATTR_TYPE */ 98 "media", /* ATTR_MEDIA */ 99 "class", /* ATTR_CLASS */ 100 "style", /* ATTR_STYLE */ 101 "id", /* ATTR_ID */ 102 "colspan", /* ATTR_COLSPAN */ 103 "charset", /* ATTR_CHARSET */ 104 "open", /* ATTR_OPEN */ 105 "close", /* ATTR_CLOSE */ 106 "mathvariant", /* ATTR_MATHVARIANT */ 107 }; 108 109 static const char *const roffscales[SCALE_MAX] = { 110 "cm", /* SCALE_CM */ 111 "in", /* SCALE_IN */ 112 "pc", /* SCALE_PC */ 113 "pt", /* SCALE_PT */ 114 "em", /* SCALE_EM */ 115 "em", /* SCALE_MM */ 116 "ex", /* SCALE_EN */ 117 "ex", /* SCALE_BU */ 118 "em", /* SCALE_VS */ 119 "ex", /* SCALE_FS */ 120 }; 121 122 static void bufncat(struct html *, const char *, size_t); 123 static void print_ctag(struct html *, struct tag *); 124 static int print_escape(char); 125 static int print_encode(struct html *, const char *, int); 126 static void print_metaf(struct html *, enum mandoc_esc); 127 static void print_attr(struct html *, const char *, const char *); 128 129 130 void * 131 html_alloc(const struct mchars *mchars, const struct manoutput *outopts) 132 { 133 struct html *h; 134 135 h = mandoc_calloc(1, sizeof(struct html)); 136 137 h->tags.head = NULL; 138 h->symtab = mchars; 139 140 h->style = outopts->style; 141 h->base_man = outopts->man; 142 h->base_includes = outopts->includes; 143 if (outopts->fragment) 144 h->oflags |= HTML_FRAGMENT; 145 146 return(h); 147 } 148 149 void 150 html_free(void *p) 151 { 152 struct tag *tag; 153 struct html *h; 154 155 h = (struct html *)p; 156 157 while ((tag = h->tags.head) != NULL) { 158 h->tags.head = tag->next; 159 free(tag); 160 } 161 162 free(h); 163 } 164 165 void 166 print_gen_head(struct html *h) 167 { 168 struct htmlpair tag[4]; 169 struct tag *t; 170 171 tag[0].key = ATTR_CHARSET; 172 tag[0].val = "utf-8"; 173 print_otag(h, TAG_META, 1, tag); 174 175 /* 176 * Print a default style-sheet. 177 */ 178 t = print_otag(h, TAG_STYLE, 0, NULL); 179 print_text(h, "table.head, table.foot { width: 100%; }\n" 180 "td.head-rtitle, td.foot-os { text-align: right; }\n" 181 "td.head-vol { text-align: center; }\n" 182 "table.foot td { width: 50%; }\n" 183 "table.head td { width: 33%; }\n" 184 "div.spacer { margin: 1em 0; }\n"); 185 print_tagq(h, t); 186 187 if (h->style) { 188 tag[0].key = ATTR_REL; 189 tag[0].val = "stylesheet"; 190 tag[1].key = ATTR_HREF; 191 tag[1].val = h->style; 192 tag[2].key = ATTR_TYPE; 193 tag[2].val = "text/css"; 194 tag[3].key = ATTR_MEDIA; 195 tag[3].val = "all"; 196 print_otag(h, TAG_LINK, 4, tag); 197 } 198 } 199 200 static void 201 print_metaf(struct html *h, enum mandoc_esc deco) 202 { 203 enum htmlfont font; 204 205 switch (deco) { 206 case ESCAPE_FONTPREV: 207 font = h->metal; 208 break; 209 case ESCAPE_FONTITALIC: 210 font = HTMLFONT_ITALIC; 211 break; 212 case ESCAPE_FONTBOLD: 213 font = HTMLFONT_BOLD; 214 break; 215 case ESCAPE_FONTBI: 216 font = HTMLFONT_BI; 217 break; 218 case ESCAPE_FONT: 219 /* FALLTHROUGH */ 220 case ESCAPE_FONTROMAN: 221 font = HTMLFONT_NONE; 222 break; 223 default: 224 abort(); 225 /* NOTREACHED */ 226 } 227 228 if (h->metaf) { 229 print_tagq(h, h->metaf); 230 h->metaf = NULL; 231 } 232 233 h->metal = h->metac; 234 h->metac = font; 235 236 switch (font) { 237 case HTMLFONT_ITALIC: 238 h->metaf = print_otag(h, TAG_I, 0, NULL); 239 break; 240 case HTMLFONT_BOLD: 241 h->metaf = print_otag(h, TAG_B, 0, NULL); 242 break; 243 case HTMLFONT_BI: 244 h->metaf = print_otag(h, TAG_B, 0, NULL); 245 print_otag(h, TAG_I, 0, NULL); 246 break; 247 default: 248 break; 249 } 250 } 251 252 int 253 html_strlen(const char *cp) 254 { 255 size_t rsz; 256 int skip, sz; 257 258 /* 259 * Account for escaped sequences within string length 260 * calculations. This follows the logic in term_strlen() as we 261 * must calculate the width of produced strings. 262 * Assume that characters are always width of "1". This is 263 * hacky, but it gets the job done for approximation of widths. 264 */ 265 266 sz = 0; 267 skip = 0; 268 while (1) { 269 rsz = strcspn(cp, "\\"); 270 if (rsz) { 271 cp += rsz; 272 if (skip) { 273 skip = 0; 274 rsz--; 275 } 276 sz += rsz; 277 } 278 if ('\0' == *cp) 279 break; 280 cp++; 281 switch (mandoc_escape(&cp, NULL, NULL)) { 282 case ESCAPE_ERROR: 283 return(sz); 284 case ESCAPE_UNICODE: 285 /* FALLTHROUGH */ 286 case ESCAPE_NUMBERED: 287 /* FALLTHROUGH */ 288 case ESCAPE_SPECIAL: 289 /* FALLTHROUGH */ 290 case ESCAPE_OVERSTRIKE: 291 if (skip) 292 skip = 0; 293 else 294 sz++; 295 break; 296 case ESCAPE_SKIPCHAR: 297 skip = 1; 298 break; 299 default: 300 break; 301 } 302 } 303 return(sz); 304 } 305 306 static int 307 print_escape(char c) 308 { 309 310 switch (c) { 311 case '<': 312 printf("<"); 313 break; 314 case '>': 315 printf(">"); 316 break; 317 case '&': 318 printf("&"); 319 break; 320 case '"': 321 printf("""); 322 break; 323 case ASCII_NBRSP: 324 putchar('-'); 325 break; 326 case ASCII_HYPH: 327 putchar('-'); 328 /* FALLTHROUGH */ 329 case ASCII_BREAK: 330 break; 331 default: 332 return(0); 333 } 334 return(1); 335 } 336 337 static int 338 print_encode(struct html *h, const char *p, int norecurse) 339 { 340 size_t sz; 341 int c, len, nospace; 342 const char *seq; 343 enum mandoc_esc esc; 344 static const char rejs[9] = { '\\', '<', '>', '&', '"', 345 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 346 347 nospace = 0; 348 349 while ('\0' != *p) { 350 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 351 h->flags &= ~HTML_SKIPCHAR; 352 p++; 353 continue; 354 } 355 356 sz = strcspn(p, rejs); 357 358 fwrite(p, 1, sz, stdout); 359 p += (int)sz; 360 361 if ('\0' == *p) 362 break; 363 364 if (print_escape(*p++)) 365 continue; 366 367 esc = mandoc_escape(&p, &seq, &len); 368 if (ESCAPE_ERROR == esc) 369 break; 370 371 switch (esc) { 372 case ESCAPE_FONT: 373 /* FALLTHROUGH */ 374 case ESCAPE_FONTPREV: 375 /* FALLTHROUGH */ 376 case ESCAPE_FONTBOLD: 377 /* FALLTHROUGH */ 378 case ESCAPE_FONTITALIC: 379 /* FALLTHROUGH */ 380 case ESCAPE_FONTBI: 381 /* FALLTHROUGH */ 382 case ESCAPE_FONTROMAN: 383 if (0 == norecurse) 384 print_metaf(h, esc); 385 continue; 386 case ESCAPE_SKIPCHAR: 387 h->flags |= HTML_SKIPCHAR; 388 continue; 389 default: 390 break; 391 } 392 393 if (h->flags & HTML_SKIPCHAR) { 394 h->flags &= ~HTML_SKIPCHAR; 395 continue; 396 } 397 398 switch (esc) { 399 case ESCAPE_UNICODE: 400 /* Skip past "u" header. */ 401 c = mchars_num2uc(seq + 1, len - 1); 402 break; 403 case ESCAPE_NUMBERED: 404 c = mchars_num2char(seq, len); 405 if (c < 0) 406 continue; 407 break; 408 case ESCAPE_SPECIAL: 409 c = mchars_spec2cp(h->symtab, seq, len); 410 if (c <= 0) 411 continue; 412 break; 413 case ESCAPE_NOSPACE: 414 if ('\0' == *p) 415 nospace = 1; 416 continue; 417 case ESCAPE_OVERSTRIKE: 418 if (len == 0) 419 continue; 420 c = seq[len - 1]; 421 break; 422 default: 423 continue; 424 } 425 if ((c < 0x20 && c != 0x09) || 426 (c > 0x7E && c < 0xA0)) 427 c = 0xFFFD; 428 if (c > 0x7E) 429 printf("&#%d;", c); 430 else if ( ! print_escape(c)) 431 putchar(c); 432 } 433 434 return(nospace); 435 } 436 437 static void 438 print_attr(struct html *h, const char *key, const char *val) 439 { 440 printf(" %s=\"", key); 441 (void)print_encode(h, val, 1); 442 putchar('\"'); 443 } 444 445 struct tag * 446 print_otag(struct html *h, enum htmltag tag, 447 int sz, const struct htmlpair *p) 448 { 449 int i; 450 struct tag *t; 451 452 /* Push this tags onto the stack of open scopes. */ 453 454 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 455 t = mandoc_malloc(sizeof(struct tag)); 456 t->tag = tag; 457 t->next = h->tags.head; 458 h->tags.head = t; 459 } else 460 t = NULL; 461 462 if ( ! (HTML_NOSPACE & h->flags)) 463 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { 464 /* Manage keeps! */ 465 if ( ! (HTML_KEEP & h->flags)) { 466 if (HTML_PREKEEP & h->flags) 467 h->flags |= HTML_KEEP; 468 putchar(' '); 469 } else 470 printf(" "); 471 } 472 473 if ( ! (h->flags & HTML_NONOSPACE)) 474 h->flags &= ~HTML_NOSPACE; 475 else 476 h->flags |= HTML_NOSPACE; 477 478 /* Print out the tag name and attributes. */ 479 480 printf("<%s", htmltags[tag].name); 481 for (i = 0; i < sz; i++) 482 print_attr(h, htmlattrs[p[i].key], p[i].val); 483 484 /* Accommodate for "well-formed" singleton escaping. */ 485 486 if (HTML_AUTOCLOSE & htmltags[tag].flags) 487 putchar('/'); 488 489 putchar('>'); 490 491 h->flags |= HTML_NOSPACE; 492 493 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) 494 putchar('\n'); 495 496 return(t); 497 } 498 499 static void 500 print_ctag(struct html *h, struct tag *tag) 501 { 502 503 /* 504 * Remember to close out and nullify the current 505 * meta-font and table, if applicable. 506 */ 507 if (tag == h->metaf) 508 h->metaf = NULL; 509 if (tag == h->tblt) 510 h->tblt = NULL; 511 512 printf("</%s>", htmltags[tag->tag].name); 513 if (HTML_CLRLINE & htmltags[tag->tag].flags) { 514 h->flags |= HTML_NOSPACE; 515 putchar('\n'); 516 } 517 518 h->tags.head = tag->next; 519 free(tag); 520 } 521 522 void 523 print_gen_decls(struct html *h) 524 { 525 526 puts("<!DOCTYPE html>"); 527 } 528 529 void 530 print_text(struct html *h, const char *word) 531 { 532 533 if ( ! (HTML_NOSPACE & h->flags)) { 534 /* Manage keeps! */ 535 if ( ! (HTML_KEEP & h->flags)) { 536 if (HTML_PREKEEP & h->flags) 537 h->flags |= HTML_KEEP; 538 putchar(' '); 539 } else 540 printf(" "); 541 } 542 543 assert(NULL == h->metaf); 544 switch (h->metac) { 545 case HTMLFONT_ITALIC: 546 h->metaf = print_otag(h, TAG_I, 0, NULL); 547 break; 548 case HTMLFONT_BOLD: 549 h->metaf = print_otag(h, TAG_B, 0, NULL); 550 break; 551 case HTMLFONT_BI: 552 h->metaf = print_otag(h, TAG_B, 0, NULL); 553 print_otag(h, TAG_I, 0, NULL); 554 break; 555 default: 556 break; 557 } 558 559 assert(word); 560 if ( ! print_encode(h, word, 0)) { 561 if ( ! (h->flags & HTML_NONOSPACE)) 562 h->flags &= ~HTML_NOSPACE; 563 h->flags &= ~HTML_NONEWLINE; 564 } else 565 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 566 567 if (h->metaf) { 568 print_tagq(h, h->metaf); 569 h->metaf = NULL; 570 } 571 572 h->flags &= ~HTML_IGNDELIM; 573 } 574 575 void 576 print_tagq(struct html *h, const struct tag *until) 577 { 578 struct tag *tag; 579 580 while ((tag = h->tags.head) != NULL) { 581 print_ctag(h, tag); 582 if (until && tag == until) 583 return; 584 } 585 } 586 587 void 588 print_stagq(struct html *h, const struct tag *suntil) 589 { 590 struct tag *tag; 591 592 while ((tag = h->tags.head) != NULL) { 593 if (suntil && tag == suntil) 594 return; 595 print_ctag(h, tag); 596 } 597 } 598 599 void 600 print_paragraph(struct html *h) 601 { 602 struct tag *t; 603 struct htmlpair tag; 604 605 PAIR_CLASS_INIT(&tag, "spacer"); 606 t = print_otag(h, TAG_DIV, 1, &tag); 607 print_tagq(h, t); 608 } 609 610 611 void 612 bufinit(struct html *h) 613 { 614 615 h->buf[0] = '\0'; 616 h->buflen = 0; 617 } 618 619 void 620 bufcat_style(struct html *h, const char *key, const char *val) 621 { 622 623 bufcat(h, key); 624 bufcat(h, ":"); 625 bufcat(h, val); 626 bufcat(h, ";"); 627 } 628 629 void 630 bufcat(struct html *h, const char *p) 631 { 632 633 /* 634 * XXX This is broken and not easy to fix. 635 * When using the -Oincludes option, buffmt_includes() 636 * may pass in strings overrunning BUFSIZ, causing a crash. 637 */ 638 639 h->buflen = strlcat(h->buf, p, BUFSIZ); 640 assert(h->buflen < BUFSIZ); 641 } 642 643 void 644 bufcat_fmt(struct html *h, const char *fmt, ...) 645 { 646 va_list ap; 647 648 va_start(ap, fmt); 649 (void)vsnprintf(h->buf + (int)h->buflen, 650 BUFSIZ - h->buflen - 1, fmt, ap); 651 va_end(ap); 652 h->buflen = strlen(h->buf); 653 } 654 655 static void 656 bufncat(struct html *h, const char *p, size_t sz) 657 { 658 659 assert(h->buflen + sz + 1 < BUFSIZ); 660 strncat(h->buf, p, sz); 661 h->buflen += sz; 662 } 663 664 void 665 buffmt_includes(struct html *h, const char *name) 666 { 667 const char *p, *pp; 668 669 pp = h->base_includes; 670 671 bufinit(h); 672 while (NULL != (p = strchr(pp, '%'))) { 673 bufncat(h, pp, (size_t)(p - pp)); 674 switch (*(p + 1)) { 675 case'I': 676 bufcat(h, name); 677 break; 678 default: 679 bufncat(h, p, 2); 680 break; 681 } 682 pp = p + 2; 683 } 684 if (pp) 685 bufcat(h, pp); 686 } 687 688 void 689 buffmt_man(struct html *h, const char *name, const char *sec) 690 { 691 const char *p, *pp; 692 693 pp = h->base_man; 694 695 bufinit(h); 696 while (NULL != (p = strchr(pp, '%'))) { 697 bufncat(h, pp, (size_t)(p - pp)); 698 switch (*(p + 1)) { 699 case 'S': 700 bufcat(h, sec ? sec : "1"); 701 break; 702 case 'N': 703 bufcat_fmt(h, "%s", name); 704 break; 705 default: 706 bufncat(h, p, 2); 707 break; 708 } 709 pp = p + 2; 710 } 711 if (pp) 712 bufcat(h, pp); 713 } 714 715 void 716 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 717 { 718 double v; 719 720 v = su->scale; 721 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) 722 v = 1.0; 723 else if (SCALE_BU == su->unit) 724 v /= 24.0; 725 726 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); 727 } 728 729 void 730 bufcat_id(struct html *h, const char *src) 731 { 732 733 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ 734 735 while ('\0' != *src) 736 bufcat_fmt(h, "%.2x", *src++); 737 } 738