1 /* $Id: html.c,v 1.131 2011/03/22 14:05:45 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdint.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #include "mandoc.h" 34 #include "out.h" 35 #include "html.h" 36 #include "main.h" 37 38 struct htmldata { 39 const char *name; 40 int flags; 41 #define HTML_CLRLINE (1 << 0) 42 #define HTML_NOSTACK (1 << 1) 43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 44 }; 45 46 static const struct htmldata htmltags[TAG_MAX] = { 47 {"html", HTML_CLRLINE}, /* TAG_HTML */ 48 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 49 {"body", HTML_CLRLINE}, /* TAG_BODY */ 50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 51 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 52 {"div", HTML_CLRLINE}, /* TAG_DIV */ 53 {"h1", 0}, /* TAG_H1 */ 54 {"h2", 0}, /* TAG_H2 */ 55 {"span", 0}, /* TAG_SPAN */ 56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ 57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 58 {"a", 0}, /* TAG_A */ 59 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ 61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 62 {"tr", HTML_CLRLINE}, /* TAG_TR */ 63 {"td", HTML_CLRLINE}, /* TAG_TD */ 64 {"li", HTML_CLRLINE}, /* TAG_LI */ 65 {"ul", HTML_CLRLINE}, /* TAG_UL */ 66 {"ol", HTML_CLRLINE}, /* TAG_OL */ 67 {"dl", HTML_CLRLINE}, /* TAG_DL */ 68 {"dt", HTML_CLRLINE}, /* TAG_DT */ 69 {"dd", HTML_CLRLINE}, /* TAG_DD */ 70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ 71 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */ 72 {"pre", HTML_CLRLINE }, /* TAG_PRE */ 73 {"b", 0 }, /* TAG_B */ 74 {"i", 0 }, /* TAG_I */ 75 {"code", 0 }, /* TAG_CODE */ 76 {"small", 0 }, /* TAG_SMALL */ 77 }; 78 79 static const char *const htmlattrs[ATTR_MAX] = { 80 "http-equiv", /* ATTR_HTTPEQUIV */ 81 "content", /* ATTR_CONTENT */ 82 "name", /* ATTR_NAME */ 83 "rel", /* ATTR_REL */ 84 "href", /* ATTR_HREF */ 85 "type", /* ATTR_TYPE */ 86 "media", /* ATTR_MEDIA */ 87 "class", /* ATTR_CLASS */ 88 "style", /* ATTR_STYLE */ 89 "width", /* ATTR_WIDTH */ 90 "id", /* ATTR_ID */ 91 "summary", /* ATTR_SUMMARY */ 92 "align", /* ATTR_ALIGN */ 93 "colspan", /* ATTR_COLSPAN */ 94 }; 95 96 static void print_num(struct html *, const char *, size_t); 97 static void print_spec(struct html *, enum roffdeco, 98 const char *, size_t); 99 static void print_res(struct html *, const char *, size_t); 100 static void print_ctag(struct html *, enum htmltag); 101 static void print_doctype(struct html *); 102 static void print_xmltype(struct html *); 103 static int print_encode(struct html *, const char *, int); 104 static void print_metaf(struct html *, enum roffdeco); 105 static void print_attr(struct html *, 106 const char *, const char *); 107 static void *ml_alloc(char *, enum htmltype); 108 109 110 static void * 111 ml_alloc(char *outopts, enum htmltype type) 112 { 113 struct html *h; 114 const char *toks[4]; 115 char *v; 116 117 toks[0] = "style"; 118 toks[1] = "man"; 119 toks[2] = "includes"; 120 toks[3] = NULL; 121 122 h = mandoc_calloc(1, sizeof(struct html)); 123 124 h->type = type; 125 h->tags.head = NULL; 126 h->symtab = chars_init(CHARS_HTML); 127 128 while (outopts && *outopts) 129 switch (getsubopt(&outopts, UNCONST(toks), &v)) { 130 case (0): 131 h->style = v; 132 break; 133 case (1): 134 h->base_man = v; 135 break; 136 case (2): 137 h->base_includes = v; 138 break; 139 default: 140 break; 141 } 142 143 return(h); 144 } 145 146 void * 147 html_alloc(char *outopts) 148 { 149 150 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); 151 } 152 153 154 void * 155 xhtml_alloc(char *outopts) 156 { 157 158 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); 159 } 160 161 162 void 163 html_free(void *p) 164 { 165 struct tag *tag; 166 struct html *h; 167 168 h = (struct html *)p; 169 170 while ((tag = h->tags.head) != NULL) { 171 h->tags.head = tag->next; 172 free(tag); 173 } 174 175 if (h->symtab) 176 chars_free(h->symtab); 177 178 free(h); 179 } 180 181 182 void 183 print_gen_head(struct html *h) 184 { 185 struct htmlpair tag[4]; 186 187 tag[0].key = ATTR_HTTPEQUIV; 188 tag[0].val = "Content-Type"; 189 tag[1].key = ATTR_CONTENT; 190 tag[1].val = "text/html; charset=utf-8"; 191 print_otag(h, TAG_META, 2, tag); 192 193 tag[0].key = ATTR_NAME; 194 tag[0].val = "resource-type"; 195 tag[1].key = ATTR_CONTENT; 196 tag[1].val = "document"; 197 print_otag(h, TAG_META, 2, tag); 198 199 if (h->style) { 200 tag[0].key = ATTR_REL; 201 tag[0].val = "stylesheet"; 202 tag[1].key = ATTR_HREF; 203 tag[1].val = h->style; 204 tag[2].key = ATTR_TYPE; 205 tag[2].val = "text/css"; 206 tag[3].key = ATTR_MEDIA; 207 tag[3].val = "all"; 208 print_otag(h, TAG_LINK, 4, tag); 209 } 210 } 211 212 /* ARGSUSED */ 213 static void 214 print_num(struct html *h, const char *p, size_t len) 215 { 216 const char *rhs; 217 218 rhs = chars_num2char(p, len); 219 if (rhs) 220 putchar((int)*rhs); 221 } 222 223 static void 224 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) 225 { 226 int cp; 227 const char *rhs; 228 size_t sz; 229 230 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { 231 printf("&#%d;", cp); 232 return; 233 } else if (-1 == cp && DECO_SSPECIAL == d) { 234 fwrite(p, 1, len, stdout); 235 return; 236 } else if (-1 == cp) 237 return; 238 239 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz))) 240 fwrite(rhs, 1, sz, stdout); 241 } 242 243 244 static void 245 print_res(struct html *h, const char *p, size_t len) 246 { 247 int cp; 248 const char *rhs; 249 size_t sz; 250 251 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) { 252 printf("&#%d;", cp); 253 return; 254 } else if (-1 == cp) 255 return; 256 257 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz))) 258 fwrite(rhs, 1, sz, stdout); 259 } 260 261 262 static void 263 print_metaf(struct html *h, enum roffdeco deco) 264 { 265 enum htmlfont font; 266 267 switch (deco) { 268 case (DECO_PREVIOUS): 269 font = h->metal; 270 break; 271 case (DECO_ITALIC): 272 font = HTMLFONT_ITALIC; 273 break; 274 case (DECO_BOLD): 275 font = HTMLFONT_BOLD; 276 break; 277 case (DECO_ROMAN): 278 font = HTMLFONT_NONE; 279 break; 280 default: 281 abort(); 282 /* NOTREACHED */ 283 } 284 285 if (h->metaf) { 286 print_tagq(h, h->metaf); 287 h->metaf = NULL; 288 } 289 290 h->metal = h->metac; 291 h->metac = font; 292 293 if (HTMLFONT_NONE != font) 294 h->metaf = HTMLFONT_BOLD == font ? 295 print_otag(h, TAG_B, 0, NULL) : 296 print_otag(h, TAG_I, 0, NULL); 297 } 298 299 300 static int 301 print_encode(struct html *h, const char *p, int norecurse) 302 { 303 size_t sz; 304 int len, nospace; 305 const char *seq; 306 enum roffdeco deco; 307 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; 308 309 nospace = 0; 310 311 for (; *p; p++) { 312 sz = strcspn(p, rejs); 313 314 fwrite(p, 1, sz, stdout); 315 p += /* LINTED */ 316 sz; 317 318 if ('<' == *p) { 319 printf("<"); 320 continue; 321 } else if ('>' == *p) { 322 printf(">"); 323 continue; 324 } else if ('&' == *p) { 325 printf("&"); 326 continue; 327 } else if (ASCII_HYPH == *p) { 328 /* 329 * Note: "soft hyphens" aren't graphically 330 * displayed when not breaking the text; we want 331 * them to be displayed. 332 */ 333 /*printf("­");*/ 334 putchar('-'); 335 continue; 336 } else if ('\0' == *p) 337 break; 338 339 seq = ++p; 340 len = a2roffdeco(&deco, &seq, &sz); 341 342 switch (deco) { 343 case (DECO_NUMBERED): 344 print_num(h, seq, sz); 345 break; 346 case (DECO_RESERVED): 347 print_res(h, seq, sz); 348 break; 349 case (DECO_SSPECIAL): 350 /* FALLTHROUGH */ 351 case (DECO_SPECIAL): 352 print_spec(h, deco, seq, sz); 353 break; 354 case (DECO_PREVIOUS): 355 /* FALLTHROUGH */ 356 case (DECO_BOLD): 357 /* FALLTHROUGH */ 358 case (DECO_ITALIC): 359 /* FALLTHROUGH */ 360 case (DECO_ROMAN): 361 if (norecurse) 362 break; 363 print_metaf(h, deco); 364 break; 365 default: 366 break; 367 } 368 369 p += len - 1; 370 371 if (DECO_NOSPACE == deco && '\0' == *(p + 1)) 372 nospace = 1; 373 } 374 375 return(nospace); 376 } 377 378 379 static void 380 print_attr(struct html *h, const char *key, const char *val) 381 { 382 printf(" %s=\"", key); 383 (void)print_encode(h, val, 1); 384 putchar('\"'); 385 } 386 387 388 struct tag * 389 print_otag(struct html *h, enum htmltag tag, 390 int sz, const struct htmlpair *p) 391 { 392 int i; 393 struct tag *t; 394 395 /* Push this tags onto the stack of open scopes. */ 396 397 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 398 t = mandoc_malloc(sizeof(struct tag)); 399 t->tag = tag; 400 t->next = h->tags.head; 401 h->tags.head = t; 402 } else 403 t = NULL; 404 405 if ( ! (HTML_NOSPACE & h->flags)) 406 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { 407 /* Manage keeps! */ 408 if ( ! (HTML_KEEP & h->flags)) { 409 if (HTML_PREKEEP & h->flags) 410 h->flags |= HTML_KEEP; 411 putchar(' '); 412 } else 413 printf(" "); 414 } 415 416 if ( ! (h->flags & HTML_NONOSPACE)) 417 h->flags &= ~HTML_NOSPACE; 418 else 419 h->flags |= HTML_NOSPACE; 420 421 /* Print out the tag name and attributes. */ 422 423 printf("<%s", htmltags[tag].name); 424 for (i = 0; i < sz; i++) 425 print_attr(h, htmlattrs[p[i].key], p[i].val); 426 427 /* Add non-overridable attributes. */ 428 429 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { 430 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); 431 print_attr(h, "xml:lang", "en"); 432 print_attr(h, "lang", "en"); 433 } 434 435 /* Accomodate for XML "well-formed" singleton escaping. */ 436 437 if (HTML_AUTOCLOSE & htmltags[tag].flags) 438 switch (h->type) { 439 case (HTML_XHTML_1_0_STRICT): 440 putchar('/'); 441 break; 442 default: 443 break; 444 } 445 446 putchar('>'); 447 448 h->flags |= HTML_NOSPACE; 449 450 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) 451 putchar('\n'); 452 453 return(t); 454 } 455 456 457 static void 458 print_ctag(struct html *h, enum htmltag tag) 459 { 460 461 printf("</%s>", htmltags[tag].name); 462 if (HTML_CLRLINE & htmltags[tag].flags) { 463 h->flags |= HTML_NOSPACE; 464 putchar('\n'); 465 } 466 } 467 468 469 void 470 print_gen_decls(struct html *h) 471 { 472 473 print_xmltype(h); 474 print_doctype(h); 475 } 476 477 478 static void 479 print_xmltype(struct html *h) 480 { 481 482 if (HTML_XHTML_1_0_STRICT == h->type) 483 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 484 } 485 486 487 static void 488 print_doctype(struct html *h) 489 { 490 const char *doctype; 491 const char *dtd; 492 const char *name; 493 494 switch (h->type) { 495 case (HTML_HTML_4_01_STRICT): 496 name = "HTML"; 497 doctype = "-//W3C//DTD HTML 4.01//EN"; 498 dtd = "http://www.w3.org/TR/html4/strict.dtd"; 499 break; 500 default: 501 name = "html"; 502 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; 503 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; 504 break; 505 } 506 507 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", 508 name, doctype, dtd); 509 } 510 511 void 512 print_text(struct html *h, const char *word) 513 { 514 515 if ( ! (HTML_NOSPACE & h->flags)) { 516 /* Manage keeps! */ 517 if ( ! (HTML_KEEP & h->flags)) { 518 if (HTML_PREKEEP & h->flags) 519 h->flags |= HTML_KEEP; 520 putchar(' '); 521 } else 522 printf(" "); 523 } 524 525 assert(NULL == h->metaf); 526 if (HTMLFONT_NONE != h->metac) 527 h->metaf = HTMLFONT_BOLD == h->metac ? 528 print_otag(h, TAG_B, 0, NULL) : 529 print_otag(h, TAG_I, 0, NULL); 530 531 assert(word); 532 if ( ! print_encode(h, word, 0)) 533 if ( ! (h->flags & HTML_NONOSPACE)) 534 h->flags &= ~HTML_NOSPACE; 535 536 if (h->metaf) { 537 print_tagq(h, h->metaf); 538 h->metaf = NULL; 539 } 540 541 h->flags &= ~HTML_IGNDELIM; 542 } 543 544 545 void 546 print_tagq(struct html *h, const struct tag *until) 547 { 548 struct tag *tag; 549 550 while ((tag = h->tags.head) != NULL) { 551 /* 552 * Remember to close out and nullify the current 553 * meta-font and table, if applicable. 554 */ 555 if (tag == h->metaf) 556 h->metaf = NULL; 557 if (tag == h->tblt) 558 h->tblt = NULL; 559 print_ctag(h, tag->tag); 560 h->tags.head = tag->next; 561 free(tag); 562 if (until && tag == until) 563 return; 564 } 565 } 566 567 568 void 569 print_stagq(struct html *h, const struct tag *suntil) 570 { 571 struct tag *tag; 572 573 while ((tag = h->tags.head) != NULL) { 574 if (suntil && tag == suntil) 575 return; 576 /* 577 * Remember to close out and nullify the current 578 * meta-font and table, if applicable. 579 */ 580 if (tag == h->metaf) 581 h->metaf = NULL; 582 if (tag == h->tblt) 583 h->tblt = NULL; 584 print_ctag(h, tag->tag); 585 h->tags.head = tag->next; 586 free(tag); 587 } 588 } 589 590 591 void 592 bufinit(struct html *h) 593 { 594 595 h->buf[0] = '\0'; 596 h->buflen = 0; 597 } 598 599 600 void 601 bufcat_style(struct html *h, const char *key, const char *val) 602 { 603 604 bufcat(h, key); 605 bufncat(h, ":", 1); 606 bufcat(h, val); 607 bufncat(h, ";", 1); 608 } 609 610 611 void 612 bufcat(struct html *h, const char *p) 613 { 614 615 bufncat(h, p, strlen(p)); 616 } 617 618 619 void 620 buffmt(struct html *h, const char *fmt, ...) 621 { 622 va_list ap; 623 624 va_start(ap, fmt); 625 (void)vsnprintf(h->buf + (int)h->buflen, 626 BUFSIZ - h->buflen - 1, fmt, ap); 627 va_end(ap); 628 h->buflen = strlen(h->buf); 629 } 630 631 632 void 633 bufncat(struct html *h, const char *p, size_t sz) 634 { 635 636 if (h->buflen + sz > BUFSIZ - 1) 637 sz = BUFSIZ - 1 - h->buflen; 638 639 (void)strncat(h->buf, p, sz); 640 h->buflen += sz; 641 } 642 643 644 void 645 buffmt_includes(struct html *h, const char *name) 646 { 647 const char *p, *pp; 648 649 pp = h->base_includes; 650 651 while (NULL != (p = strchr(pp, '%'))) { 652 bufncat(h, pp, (size_t)(p - pp)); 653 switch (*(p + 1)) { 654 case('I'): 655 bufcat(h, name); 656 break; 657 default: 658 bufncat(h, p, 2); 659 break; 660 } 661 pp = p + 2; 662 } 663 if (pp) 664 bufcat(h, pp); 665 } 666 667 668 void 669 buffmt_man(struct html *h, 670 const char *name, const char *sec) 671 { 672 const char *p, *pp; 673 674 pp = h->base_man; 675 676 /* LINTED */ 677 while (NULL != (p = strchr(pp, '%'))) { 678 bufncat(h, pp, (size_t)(p - pp)); 679 switch (*(p + 1)) { 680 case('S'): 681 bufcat(h, sec ? sec : "1"); 682 break; 683 case('N'): 684 buffmt(h, name); 685 break; 686 default: 687 bufncat(h, p, 2); 688 break; 689 } 690 pp = p + 2; 691 } 692 if (pp) 693 bufcat(h, pp); 694 } 695 696 697 void 698 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 699 { 700 double v; 701 const char *u; 702 703 v = su->scale; 704 705 switch (su->unit) { 706 case (SCALE_CM): 707 u = "cm"; 708 break; 709 case (SCALE_IN): 710 u = "in"; 711 break; 712 case (SCALE_PC): 713 u = "pc"; 714 break; 715 case (SCALE_PT): 716 u = "pt"; 717 break; 718 case (SCALE_EM): 719 u = "em"; 720 break; 721 case (SCALE_MM): 722 if (0 == (v /= 100)) 723 v = 1; 724 u = "em"; 725 break; 726 case (SCALE_EN): 727 u = "ex"; 728 break; 729 case (SCALE_BU): 730 u = "ex"; 731 break; 732 case (SCALE_VS): 733 u = "em"; 734 break; 735 default: 736 u = "ex"; 737 break; 738 } 739 740 /* 741 * XXX: the CSS spec isn't clear as to which types accept 742 * integer or real numbers, so we just make them all decimals. 743 */ 744 buffmt(h, "%s: %.2f%s;", p, v, u); 745 } 746 747 748 void 749 html_idcat(char *dst, const char *src, int sz) 750 { 751 int ssz; 752 753 assert(sz > 2); 754 755 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ 756 757 /* We can't start with a number (bah). */ 758 759 if ('#' == *dst) { 760 dst++; 761 sz--; 762 } 763 if ('\0' == *dst) { 764 *dst++ = 'x'; 765 *dst = '\0'; 766 sz--; 767 } 768 769 for ( ; *dst != '\0' && sz; dst++, sz--) 770 /* Jump to end. */ ; 771 772 for ( ; *src != '\0' && sz > 1; src++) { 773 ssz = snprintf(dst, (size_t)sz, "%.2x", *src); 774 sz -= ssz; 775 dst += ssz; 776 } 777 } 778