1 #ifndef _HTML_H_ 2 #define _HTML_H_ 1 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 AUTOLIB(html) 8 /* 9 #pragma lib "libhtml.a" 10 #pragma src "/sys/src/libhtml" 11 */ 12 13 /* UTILS */ 14 extern uchar* fromStr(Rune* buf, int n, int chset); 15 extern Rune* toStr(uchar* buf, int n, int chset); 16 17 /* Common LEX and BUILD enums */ 18 19 /* Media types */ 20 enum 21 { 22 ApplMsword, 23 ApplOctets, 24 ApplPdf, 25 ApplPostscript, 26 ApplRtf, 27 ApplFramemaker, 28 ApplMsexcel, 29 ApplMspowerpoint, 30 UnknownType, 31 Audio32kadpcm, 32 AudioBasic, 33 ImageCgm, 34 ImageG3fax, 35 ImageGif, 36 ImageIef, 37 ImageJpeg, 38 ImagePng, 39 ImageTiff, 40 ImageXBit, 41 ImageXBit2, 42 ImageXBitmulti, 43 ImageXXBitmap, 44 ModelVrml, 45 MultiDigest, 46 MultiMixed, 47 TextCss, 48 TextEnriched, 49 TextHtml, 50 TextJavascript, 51 TextPlain, 52 TextRichtext, 53 TextSgml, 54 TextTabSeparatedValues, 55 TextXml, 56 VideoMpeg, 57 VideoQuicktime, 58 NMEDIATYPES 59 }; 60 61 /* HTTP methods */ 62 enum 63 { 64 HGet, 65 HPost 66 }; 67 68 /* Charsets */ 69 enum 70 { 71 UnknownCharset, 72 US_Ascii, 73 ISO_8859_1, 74 UTF_8, 75 Unicode, 76 NCHARSETS 77 }; 78 79 /* Frame Target IDs */ 80 enum { 81 FTtop, 82 FTself, 83 FTparent, 84 FTblank 85 }; 86 87 /* LEX */ 88 typedef struct Token Token; 89 typedef struct Attr Attr; 90 91 /* BUILD */ 92 93 typedef struct Item Item; 94 typedef struct Itext Itext; 95 typedef struct Irule Irule; 96 typedef struct Iimage Iimage; 97 typedef struct Iformfield Iformfield; 98 typedef struct Itable Itable; 99 typedef struct Ifloat Ifloat; 100 typedef struct Ispacer Ispacer; 101 typedef struct Genattr Genattr; 102 typedef struct SEvent SEvent; 103 typedef struct Formfield Formfield; 104 typedef struct Option Option; 105 typedef struct Form Form; 106 typedef struct Table Table; 107 typedef struct Tablecol Tablecol; 108 typedef struct Tablerow Tablerow; 109 typedef struct Tablecell Tablecell; 110 typedef struct Align Align; 111 typedef struct Dimen Dimen; 112 typedef struct Anchor Anchor; 113 typedef struct DestAnchor DestAnchor; 114 typedef struct Map Map; 115 typedef struct Area Area; 116 typedef struct Background Background; 117 typedef struct Kidinfo Kidinfo; 118 typedef struct Docinfo Docinfo; 119 typedef struct Stack Stack; 120 typedef struct Pstate Pstate; 121 typedef struct ItemSource ItemSource; 122 typedef struct Lay Lay; /* defined in Layout module */ 123 124 /* Alignment types */ 125 enum { 126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify, 127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline 128 }; 129 130 struct Align 131 { 132 uchar halign; /* one of ALnone, ALleft, etc. */ 133 uchar valign; /* one of ALnone, ALtop, etc. */ 134 }; 135 136 /* A Dimen holds a dimension specification, especially for those */ 137 /* cases when a number can be followed by a % or a * to indicate */ 138 /* percentage of total or relative weight. */ 139 /* Dnone means no dimension was specified */ 140 141 /* To fit in a word, use top bits to identify kind, rest for value */ 142 enum { 143 Dnone = 0, 144 Dpixels = (1<<29), 145 Dpercent = (2<<29), 146 Drelative = (3<<29), 147 Dkindmask = (3<<29), 148 Dspecmask = (~Dkindmask) 149 }; 150 151 struct Dimen 152 { 153 int kindspec; /* kind | spec */ 154 }; 155 156 /* Background is either an image or a color. */ 157 /* If both are set, the image has precedence. */ 158 struct Background 159 { 160 Rune* image; /* url */ 161 int color; 162 }; 163 164 165 /* There are about a half dozen Item variants. */ 166 /* The all look like this at the start (using Plan 9 C's */ 167 /* anonymous structure member mechanism), */ 168 /* and then the tag field dictates what extra fields there are. */ 169 struct Item 170 { 171 Item* next; /* successor in list of items */ 172 int width; /* width in pixels (0 for floating items) */ 173 int height; /* height in pixels */ 174 Rectangle r; 175 int ascent; /* ascent (from top to baseline) in pixels */ 176 int anchorid; /* if nonzero, which anchor we're in */ 177 int state; /* flags and values (see below) */ 178 Genattr* genattr; /* generic attributes and events */ 179 int tag; /* variant discriminator: Itexttag, etc. */ 180 }; 181 182 /* Item variant tags */ 183 enum { 184 Itexttag, 185 Iruletag, 186 Iimagetag, 187 Iformfieldtag, 188 Itabletag, 189 Ifloattag, 190 Ispacertag 191 }; 192 193 struct Itext 194 { 195 Item item; /* (with tag ==Itexttag) */ 196 Rune* s; /* the characters */ 197 int fnt; /* style*NumSize+size (see font stuff, below) */ 198 int fg; /* Pixel (color) for text */ 199 uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */ 200 uchar ul; /* ULnone, ULunder, or ULmid */ 201 }; 202 203 struct Irule 204 { 205 Item item; /* (with tag ==Iruletag) */ 206 uchar align; /* alignment spec */ 207 uchar noshade; /* if true, don't shade */ 208 int size; /* size attr (rule height) */ 209 Dimen wspec; /* width spec */ 210 }; 211 212 213 struct Iimage 214 { 215 Item item; /* (with tag ==Iimagetag) */ 216 Rune* imsrc; /* image src url */ 217 int imwidth; /* spec width (actual, if no spec) */ 218 int imheight; /* spec height (actual, if no spec) */ 219 Rune* altrep; /* alternate representation, in absence of image */ 220 Map* map; /* if non-nil, client side map */ 221 int ctlid; /* if animated */ 222 uchar align; /* vertical alignment */ 223 uchar hspace; /* in pixels; buffer space on each side */ 224 uchar vspace; /* in pixels; buffer space on top and bottom */ 225 uchar border; /* in pixels: border width to draw around image */ 226 Iimage* nextimage; /* next in list of document's images */ 227 void *aux; 228 }; 229 230 231 struct Iformfield 232 { 233 Item item; /* (with tag ==Iformfieldtag) */ 234 Formfield* formfield; 235 void *aux; 236 }; 237 238 239 struct Itable 240 { 241 Item item; /* (with tag ==Itabletag) */ 242 Table* table; 243 }; 244 245 246 struct Ifloat 247 { 248 Item _item; /* (with tag ==Ifloattag) */ 249 Item* item; /* table or image item that floats */ 250 int x; /* x coord of top (from right, if ALright) */ 251 int y; /* y coord of top */ 252 uchar side; /* margin it floats to: ALleft or ALright */ 253 uchar infloats; /* true if this has been added to a lay.floats */ 254 Ifloat* nextfloat; /* in list of floats */ 255 }; 256 257 258 struct Ispacer 259 { 260 Item item; /* (with tag ==Ispacertag) */ 261 int spkind; /* ISPnull, etc. */ 262 }; 263 264 /* Item state flags and value fields */ 265 enum { 266 /* IFbrk = 0x80000000, // forced break before this item */ 267 #define IFbrk 0x80000000 /* too big for sun */ 268 IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */ 269 IFnobrk = 0x20000000, /* break not allowed before this item */ 270 IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */ 271 IFcright = 0x08000000, /* clear right floats (IFbrk set too) */ 272 IFwrap = 0x04000000, /* in a wrapping (non-pre) line */ 273 IFhang = 0x02000000, /* in a hanging (into left indent) item */ 274 IFrjust = 0x01000000, /* right justify current line */ 275 IFcjust = 0x00800000, /* center justify current line */ 276 IFsmap = 0x00400000, /* image is server-side map */ 277 IFindentshift = 8, 278 IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */ 279 IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */ 280 }; 281 282 /* Bias added to Itext's voff field */ 283 enum { Voffbias = 128 }; 284 285 /* Spacer kinds */ 286 enum { 287 ISPnull, /* 0 height and width */ 288 ISPvline, /* height and ascent of current font */ 289 ISPhspace, /* width of space in current font */ 290 ISPgeneral /* other purposes (e.g., between markers and list) */ 291 }; 292 293 /* Generic attributes and events (not many elements will have any of these set) */ 294 struct Genattr 295 { 296 Rune* id; 297 Rune* class; 298 Rune* style; 299 Rune* title; 300 SEvent* events; 301 }; 302 303 struct SEvent 304 { 305 SEvent* next; /* in list of events */ 306 int type; /* SEonblur, etc. */ 307 Rune* script; 308 }; 309 310 enum { 311 SEonblur, SEonchange, SEonclick, SEondblclick, 312 SEonfocus, SEonkeypress, SEonkeyup, SEonload, 313 SEonmousedown, SEonmousemove, SEonmouseout, 314 SEonmouseover, SEonmouseup, SEonreset, SEonselect, 315 SEonsubmit, SEonunload, 316 Numscriptev 317 }; 318 319 /* Form field types */ 320 enum { 321 Ftext, 322 Fpassword, 323 Fcheckbox, 324 Fradio, 325 Fsubmit, 326 Fhidden, 327 Fimage, 328 Freset, 329 Ffile, 330 Fbutton, 331 Fselect, 332 Ftextarea 333 }; 334 335 /* Information about a field in a form */ 336 struct Formfield 337 { 338 Formfield* next; /* in list of fields for a form */ 339 int ftype; /* Ftext, Fpassword, etc. */ 340 int fieldid; /* serial no. of field within its form */ 341 Form* form; /* containing form */ 342 Rune* name; /* name attr */ 343 Rune* value; /* value attr */ 344 int size; /* size attr */ 345 int maxlength; /* maxlength attr */ 346 int rows; /* rows attr */ 347 int cols; /* cols attr */ 348 uchar flags; /* FFchecked, etc. */ 349 Option* options; /* for Fselect fields */ 350 Item* image; /* image item, for Fimage fields */ 351 int ctlid; /* identifies control for this field in layout */ 352 SEvent* events; /* same as genattr->events of containing item */ 353 }; 354 355 enum { 356 FFchecked = (1<<7), 357 FFmultiple = (1<<6) 358 }; 359 360 /* Option holds info about an option in a "select" form field */ 361 struct Option 362 { 363 Option* next; /* next in list of options for a field */ 364 int selected; /* true if selected initially */ 365 Rune* value; /* value attr */ 366 Rune* display; /* display string */ 367 }; 368 369 /* Form holds info about a form */ 370 struct Form 371 { 372 Form* next; /* in list of forms for document */ 373 int formid; /* serial no. of form within its doc */ 374 Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */ 375 Rune* action; /* action attr */ 376 int target; /* target attr as targetid */ 377 int method; /* HGet or HPost */ 378 int nfields; /* number of fields */ 379 Formfield* fields; /* field's forms, in input order */ 380 }; 381 382 /* Flags used in various table structures */ 383 enum { 384 TFparsing = (1<<7), 385 TFnowrap = (1<<6), 386 TFisth = (1<<5) 387 }; 388 389 390 /* Information about a table */ 391 struct Table 392 { 393 Table* next; /* next in list of document's tables */ 394 int tableid; /* serial no. of table within its doc */ 395 Tablerow* rows; /* array of row specs (list during parsing) */ 396 int nrow; /* total number of rows */ 397 Tablecol* cols; /* array of column specs */ 398 int ncol; /* total number of columns */ 399 Tablecell* cells; /* list of unique cells */ 400 int ncell; /* total number of cells */ 401 Tablecell*** grid; /* 2-D array of cells */ 402 Align align; /* alignment spec for whole table */ 403 Dimen width; /* width spec for whole table */ 404 int border; /* border attr */ 405 int cellspacing; /* cellspacing attr */ 406 int cellpadding; /* cellpadding attr */ 407 Background background; /* table background */ 408 Item* caption; /* linked list of Items, giving caption */ 409 uchar caption_place; /* ALtop or ALbottom */ 410 Lay* caption_lay; /* layout of caption */ 411 int totw; /* total width */ 412 int toth; /* total height */ 413 int caph; /* caption height */ 414 int availw; /* used for previous 3 sizes */ 415 Token* tabletok; /* token that started the table */ 416 uchar flags; /* Lchanged, perhaps */ 417 }; 418 419 420 struct Tablecol 421 { 422 int width; 423 Align align; 424 Point pos; 425 }; 426 427 428 struct Tablerow 429 { 430 Tablerow* next; /* Next in list of rows, during parsing */ 431 Tablecell* cells; /* Cells in row, linked through nextinrow */ 432 int height; 433 int ascent; 434 Align align; 435 Background background; 436 Point pos; 437 uchar flags; /* 0 or TFparsing */ 438 }; 439 440 441 /* A Tablecell is one cell of a table. */ 442 /* It may span multiple rows and multiple columns. */ 443 /* Cells are linked on two lists: the list for all the cells of */ 444 /* a document (the next pointers), and the list of all the */ 445 /* cells that start in a given row (the nextinrow pointers) */ 446 struct Tablecell 447 { 448 Tablecell* next; /* next in list of table's cells */ 449 Tablecell* nextinrow; /* next in list of row's cells */ 450 int cellid; /* serial no. of cell within table */ 451 Item* content; /* contents before layout */ 452 Lay* lay; /* layout of cell */ 453 int rowspan; /* number of rows spanned by this cell */ 454 int colspan; /* number of cols spanned by this cell */ 455 Align align; /* alignment spec */ 456 uchar flags; /* TFparsing, TFnowrap, TFisth */ 457 Dimen wspec; /* suggested width */ 458 int hspec; /* suggested height */ 459 Background background; /* cell background */ 460 int minw; /* minimum possible width */ 461 int maxw; /* maximum width */ 462 int ascent; /* cell's ascent */ 463 int row; /* row of upper left corner */ 464 int col; /* col of upper left corner */ 465 Point pos; /* nw corner of cell contents, in cell */ 466 }; 467 468 /* Anchor is for info about hyperlinks that go somewhere */ 469 struct Anchor 470 { 471 Anchor* next; /* next in list of document's anchors */ 472 int index; /* serial no. of anchor within its doc */ 473 Rune* name; /* name attr */ 474 Rune* href; /* href attr */ 475 int target; /* target attr as targetid */ 476 }; 477 478 479 /* DestAnchor is for info about hyperlinks that are destinations */ 480 struct DestAnchor 481 { 482 DestAnchor* next; /* next in list of document's destanchors */ 483 int index; /* serial no. of anchor within its doc */ 484 Rune* name; /* name attr */ 485 Item* item; /* the destination */ 486 }; 487 488 489 /* Maps (client side) */ 490 struct Map 491 { 492 Map* next; /* next in list of document's maps */ 493 Rune* name; /* map name */ 494 Area* areas; /* list of map areas */ 495 }; 496 497 498 struct Area 499 { 500 Area* next; /* next in list of a map's areas */ 501 int shape; /* SHrect, etc. */ 502 Rune* href; /* associated hypertext link */ 503 int target; /* associated target frame */ 504 Dimen* coords; /* array of coords for shape */ 505 int ncoords; /* size of coords array */ 506 }; 507 508 /* Area shapes */ 509 enum { 510 SHrect, SHcircle, SHpoly 511 }; 512 513 /* Fonts are represented by integers: style*NumSize + size */ 514 515 /* Font styles */ 516 enum { 517 FntR, /* roman */ 518 FntI, /* italic */ 519 FntB, /* bold */ 520 FntT, /* typewriter */ 521 NumStyle 522 }; 523 524 /* Font sizes */ 525 enum { 526 Tiny, 527 Small, 528 Normal, 529 Large, 530 Verylarge, 531 NumSize 532 }; 533 534 enum { 535 NumFnt = (NumStyle*NumSize), 536 DefFnt = (FntR*NumSize+Normal) 537 }; 538 539 /* Lines are needed through some text items, for underlining or strikethrough */ 540 enum { 541 ULnone, ULunder, ULmid 542 }; 543 544 /* Kidinfo flags */ 545 enum { 546 FRnoresize = (1<<0), 547 FRnoscroll = (1<<1), 548 FRhscroll = (1<<2), 549 FRvscroll = (1<<3), 550 FRhscrollauto = (1<<4), 551 FRvscrollauto = (1<<5) 552 }; 553 554 /* Information about child frame or frameset */ 555 struct Kidinfo 556 { 557 Kidinfo* next; /* in list of kidinfos for a frameset */ 558 int isframeset; 559 560 /* fields for "frame" */ 561 Rune* src; /* only nil if a "dummy" frame or this is frameset */ 562 Rune* name; /* always non-empty if this isn't frameset */ 563 int marginw; 564 int marginh; 565 int framebd; 566 int flags; 567 568 /* fields for "frameset" */ 569 Dimen* rows; /* array of row dimensions */ 570 int nrows; /* length of rows */ 571 Dimen* cols; /* array of col dimensions */ 572 int ncols; /* length of cols */ 573 Kidinfo* kidinfos; 574 Kidinfo* nextframeset; /* parsing stack */ 575 }; 576 577 578 /* Document info (global information about HTML page) */ 579 struct Docinfo 580 { 581 /* stuff from HTTP headers, doc head, and body tag */ 582 Rune* src; /* original source of doc */ 583 Rune* base; /* base URL of doc */ 584 Rune* doctitle; /* from <title> element */ 585 Background background; /* background specification */ 586 Iimage* backgrounditem; /* Image Item for doc background image, or nil */ 587 int text; /* doc foreground (text) color */ 588 int link; /* unvisited hyperlink color */ 589 int vlink; /* visited hyperlink color */ 590 int alink; /* highlighting hyperlink color */ 591 int target; /* target frame default */ 592 int chset; /* ISO_8859, etc. */ 593 int mediatype; /* TextHtml, etc. */ 594 int scripttype; /* TextJavascript, etc. */ 595 int hasscripts; /* true if scripts used */ 596 Rune* refresh; /* content of <http-equiv=Refresh ...> */ 597 Kidinfo* kidinfo; /* if a frameset */ 598 int frameid; /* id of document frame */ 599 600 /* info needed to respond to user actions */ 601 Anchor* anchors; /* list of href anchors */ 602 DestAnchor* dests; /* list of destination anchors */ 603 Form* forms; /* list of forms */ 604 Table* tables; /* list of tables */ 605 Map* maps; /* list of maps */ 606 Iimage* images; /* list of image items (through nextimage links) */ 607 }; 608 609 extern int dimenkind(Dimen d); 610 extern int dimenspec(Dimen d); 611 extern void freedocinfo(Docinfo* d); 612 extern void freeitems(Item* ithead); 613 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); 614 extern void printitems(Item* items, char* msg); 615 extern int targetid(Rune* s); 616 extern Rune* targetname(int targid); 617 extern int validitems(Item* i); 618 619 /* #pragma varargck type "I" Item* */ 620 621 /* Control print output */ 622 extern int warn; 623 extern int dbglex; 624 extern int dbgbuild; 625 626 /* To be provided by caller */ 627 /* emalloc and erealloc should not return if can't get memory. */ 628 /* emalloc should zero its memory. */ 629 extern void* emalloc(ulong); 630 extern void* erealloc(void* p, ulong size); 631 #ifdef __cpluspplus 632 } 633 #endif 634 #endif 635