1 #ifndef _HTML_H_
2 #define _HTML_H_ 1
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 AUTOLIB(html)
8 /*
9  #pragma lib "libhtml.a"
10  #pragma src "/sys/src/libhtml"
11 */
12 
13 /* UTILS */
14 extern uchar*	fromStr(Rune* buf, int n, int chset);
15 extern Rune*	toStr(uchar* buf, int n, int chset);
16 
17 /* Common LEX and BUILD enums */
18 
19 /* Media types */
20 enum
21 {
22 	ApplMsword,
23 	ApplOctets,
24 	ApplPdf,
25 	ApplPostscript,
26 	ApplRtf,
27 	ApplFramemaker,
28 	ApplMsexcel,
29 	ApplMspowerpoint,
30 	UnknownType,
31 	Audio32kadpcm,
32 	AudioBasic,
33 	ImageCgm,
34 	ImageG3fax,
35 	ImageGif,
36 	ImageIef,
37 	ImageJpeg,
38 	ImagePng,
39 	ImageTiff,
40 	ImageXBit,
41 	ImageXBit2,
42 	ImageXBitmulti,
43 	ImageXXBitmap,
44 	ModelVrml,
45 	MultiDigest,
46 	MultiMixed,
47 	TextCss,
48 	TextEnriched,
49 	TextHtml,
50 	TextJavascript,
51 	TextPlain,
52 	TextRichtext,
53 	TextSgml,
54 	TextTabSeparatedValues,
55 	TextXml,
56 	VideoMpeg,
57 	VideoQuicktime,
58 	NMEDIATYPES
59 };
60 
61 /* HTTP methods */
62 enum
63 {
64 	HGet,
65 	HPost
66 };
67 
68 /* Charsets */
69 enum
70 {
71 	UnknownCharset,
72 	US_Ascii,
73 	ISO_8859_1,
74 	UTF_8,
75 	Unicode,
76 	NCHARSETS
77 };
78 
79 /* Frame Target IDs */
80 enum {
81 	FTtop,
82 	FTself,
83 	FTparent,
84 	FTblank
85 };
86 
87 /* LEX */
88 typedef struct Token Token;
89 typedef struct Attr Attr;
90 
91 /* BUILD */
92 
93 typedef struct Item Item;
94 typedef struct Itext Itext;
95 typedef struct Irule Irule;
96 typedef struct Iimage Iimage;
97 typedef struct Iformfield Iformfield;
98 typedef struct Itable Itable;
99 typedef struct Ifloat Ifloat;
100 typedef struct Ispacer Ispacer;
101 typedef struct Genattr Genattr;
102 typedef struct SEvent SEvent;
103 typedef struct Formfield Formfield;
104 typedef struct Option Option;
105 typedef struct Form Form;
106 typedef struct Table Table;
107 typedef struct Tablecol Tablecol;
108 typedef struct Tablerow Tablerow;
109 typedef struct Tablecell Tablecell;
110 typedef struct Align Align;
111 typedef struct Dimen Dimen;
112 typedef struct Anchor Anchor;
113 typedef struct DestAnchor DestAnchor;
114 typedef struct Map Map;
115 typedef struct Area Area;
116 typedef struct Background Background;
117 typedef struct Kidinfo Kidinfo;
118 typedef struct Docinfo Docinfo;
119 typedef struct Stack Stack;
120 typedef struct Pstate Pstate;
121 typedef struct ItemSource ItemSource;
122 typedef struct Lay Lay;	/* defined in Layout module */
123 
124 /* Alignment types */
125 enum {
126 	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
128 };
129 
130 struct Align
131 {
132 	uchar	halign;	/* one of ALnone, ALleft, etc. */
133 	uchar	valign;	/* one of ALnone, ALtop, etc. */
134 };
135 
136 /* A Dimen holds a dimension specification, especially for those */
137 /* cases when a number can be followed by a % or a * to indicate */
138 /* percentage of total or relative weight. */
139 /* Dnone means no dimension was specified */
140 
141 /* To fit in a word, use top bits to identify kind, rest for value */
142 enum {
143 	Dnone =		0,
144 	Dpixels =		(1<<29),
145 	Dpercent =	(2<<29),
146 	Drelative =	(3<<29),
147 	Dkindmask =	(3<<29),
148 	Dspecmask =	(~Dkindmask)
149 };
150 
151 struct Dimen
152 {
153 	int	kindspec;		/* kind | spec */
154 };
155 
156 /* Background is either an image or a color. */
157 /* If both are set, the image has precedence. */
158 struct Background
159 {
160 	Rune*	image;	/* url */
161 	int		color;
162 };
163 
164 
165 /* There are about a half dozen Item variants. */
166 /* The all look like this at the start (using Plan 9 C's */
167 /* anonymous structure member mechanism), */
168 /* and then the tag field dictates what extra fields there are. */
169 struct Item
170 {
171 	Item*	next;		/* successor in list of items */
172 	int		width;	/* width in pixels (0 for floating items) */
173 	int		height;	/* height in pixels */
174 	Rectangle	r;
175 	int		ascent;	/* ascent (from top to baseline) in pixels */
176 	int		anchorid;	/* if nonzero, which anchor we're in */
177 	int		state;	/* flags and values (see below) */
178 	Genattr*	genattr;	/* generic attributes and events */
179 	int		tag;		/* variant discriminator: Itexttag, etc. */
180 };
181 
182 /* Item variant tags */
183 enum {
184 	Itexttag,
185 	Iruletag,
186 	Iimagetag,
187 	Iformfieldtag,
188 	Itabletag,
189 	Ifloattag,
190 	Ispacertag
191 };
192 
193 struct Itext
194 {
195 	Item item;				/* (with tag ==Itexttag) */
196 	Rune*	s;			/* the characters */
197 	int		fnt;			/* style*NumSize+size (see font stuff, below) */
198 	int		fg;			/* Pixel (color) for text */
199 	uchar	voff;			/* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
200 	uchar	ul;			/* ULnone, ULunder, or ULmid */
201 };
202 
203 struct Irule
204 {
205 	Item item;				/* (with tag ==Iruletag) */
206 	uchar	align;		/* alignment spec */
207 	uchar	noshade;		/* if true, don't shade */
208 	int		size;			/* size attr (rule height) */
209 	Dimen	wspec;		/* width spec */
210 };
211 
212 
213 struct Iimage
214 {
215 	Item item;				/* (with tag ==Iimagetag) */
216 	Rune*	imsrc;		/* image src url */
217 	int		imwidth;		/* spec width (actual, if no spec) */
218 	int		imheight;		/* spec height (actual, if no spec) */
219 	Rune*	altrep;		/* alternate representation, in absence of image */
220 	Map*	map;			/* if non-nil, client side map */
221 	int		ctlid;			/* if animated */
222 	uchar	align;		/* vertical alignment */
223 	uchar	hspace;		/* in pixels; buffer space on each side */
224 	uchar	vspace;		/* in pixels; buffer space on top and bottom */
225 	uchar	border;		/* in pixels: border width to draw around image */
226 	Iimage*	nextimage;	/* next in list of document's images */
227 	void *aux;
228 };
229 
230 
231 struct Iformfield
232 {
233 	Item item;				/* (with tag ==Iformfieldtag) */
234 	Formfield*	formfield;
235 	void *aux;
236 };
237 
238 
239 struct Itable
240 {
241 	Item item;				/* (with tag ==Itabletag) */
242 	Table*	table;
243 };
244 
245 
246 struct Ifloat
247 {
248 	Item _item;				/* (with tag ==Ifloattag) */
249 	Item*	item;			/* table or image item that floats */
250 	int		x;			/* x coord of top (from right, if ALright) */
251 	int		y;			/* y coord of top */
252 	uchar	side;			/* margin it floats to: ALleft or ALright */
253 	uchar	infloats;		/* true if this has been added to a lay.floats */
254 	Ifloat*	nextfloat;		/* in list of floats */
255 };
256 
257 
258 struct Ispacer
259 {
260 	Item item;				/* (with tag ==Ispacertag) */
261 	int		spkind;		/* ISPnull, etc. */
262 };
263 
264 /* Item state flags and value fields */
265 enum {
266 /*	IFbrk =			0x80000000,	// forced break before this item */
267 #define	IFbrk		0x80000000 /* too big for sun */
268 	IFbrksp =			0x40000000,	/* add 1 line space to break (IFbrk set too) */
269 	IFnobrk =			0x20000000,	/* break not allowed before this item */
270 	IFcleft =			0x10000000,	/* clear left floats (IFbrk set too) */
271 	IFcright =			0x08000000,	/* clear right floats (IFbrk set too) */
272 	IFwrap =			0x04000000,	/* in a wrapping (non-pre) line */
273 	IFhang =			0x02000000,	/* in a hanging (into left indent) item */
274 	IFrjust =			0x01000000,	/* right justify current line */
275 	IFcjust =			0x00800000,	/* center justify current line */
276 	IFsmap =			0x00400000,	/* image is server-side map */
277 	IFindentshift =		8,
278 	IFindentmask =		(255<<IFindentshift),	/* current indent, in tab stops */
279 	IFhangmask =		255			/* current hang into left indent, in 1/10th tabstops */
280 };
281 
282 /* Bias added to Itext's voff field */
283 enum { Voffbias = 128 };
284 
285 /* Spacer kinds */
286 enum {
287 	ISPnull,			/* 0 height and width */
288 	ISPvline,			/* height and ascent of current font */
289 	ISPhspace,		/* width of space in current font */
290 	ISPgeneral		/* other purposes (e.g., between markers and list) */
291 };
292 
293 /* Generic attributes and events (not many elements will have any of these set) */
294 struct Genattr
295 {
296 	Rune*	id;
297 	Rune*	class;
298 	Rune*	style;
299 	Rune*	title;
300 	SEvent*	events;
301 };
302 
303 struct SEvent
304 {
305 	SEvent*	next;		/* in list of events */
306 	int		type;		/* SEonblur, etc. */
307 	Rune*	script;
308 };
309 
310 enum {
311 	SEonblur, SEonchange, SEonclick, SEondblclick,
312 	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
313 	SEonmousedown, SEonmousemove, SEonmouseout,
314 	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
315 	SEonsubmit, SEonunload,
316 	Numscriptev
317 };
318 
319 /* Form field types */
320 enum {
321 	Ftext,
322 	Fpassword,
323 	Fcheckbox,
324 	Fradio,
325 	Fsubmit,
326 	Fhidden,
327 	Fimage,
328 	Freset,
329 	Ffile,
330 	Fbutton,
331 	Fselect,
332 	Ftextarea
333 };
334 
335 /* Information about a field in a form */
336 struct Formfield
337 {
338 	Formfield*	next;		/* in list of fields for a form */
339 	int			ftype;	/* Ftext, Fpassword, etc. */
340 	int			fieldid;	/* serial no. of field within its form */
341 	Form*		form;	/* containing form */
342 	Rune*		name;	/* name attr */
343 	Rune*		value;	/* value attr */
344 	int			size;		/* size attr */
345 	int			maxlength;	/* maxlength attr */
346 	int			rows;	/* rows attr */
347 	int			cols;		/* cols attr */
348 	uchar		flags;	/* FFchecked, etc. */
349 	Option*		options;	/* for Fselect fields */
350 	Item*		image;	/* image item, for Fimage fields */
351 	int			ctlid;		/* identifies control for this field in layout */
352 	SEvent*		events;	/* same as genattr->events of containing item */
353 };
354 
355 enum {
356 	FFchecked =	(1<<7),
357 	FFmultiple =	(1<<6)
358 };
359 
360 /* Option holds info about an option in a "select" form field */
361 struct Option
362 {
363 	Option*	next;			/* next in list of options for a field */
364 	int		selected;		/* true if selected initially */
365 	Rune*	value;		/* value attr */
366 	Rune*	display;		/* display string */
367 };
368 
369 /* Form holds info about a form */
370 struct Form
371 {
372 	Form*		next;		/* in list of forms for document */
373 	int			formid;	/* serial no. of form within its doc */
374 	Rune*		name;	/* name or id attr (netscape uses name, HTML 4.0 uses id) */
375 	Rune*		action;	/* action attr */
376 	int			target;	/* target attr as targetid */
377 	int			method;	/* HGet or HPost */
378 	int			nfields;	/* number of fields */
379 	Formfield*	fields;	/* field's forms, in input order */
380 };
381 
382 /* Flags used in various table structures */
383 enum {
384 	TFparsing =	(1<<7),
385 	TFnowrap =	(1<<6),
386 	TFisth =		(1<<5)
387 };
388 
389 
390 /* Information about a table */
391 struct Table
392 {
393 	Table*		next;			/* next in list of document's tables */
394 	int			tableid;		/* serial no. of table within its doc */
395 	Tablerow*	rows;		/* array of row specs (list during parsing) */
396 	int			nrow;		/* total number of rows */
397 	Tablecol*		cols;			/* array of column specs */
398 	int			ncol;			/* total number of columns */
399 	Tablecell*		cells;			/* list of unique cells */
400 	int			ncell;		/* total number of cells */
401 	Tablecell***	grid;			/* 2-D array of cells */
402 	Align		align;		/* alignment spec for whole table */
403 	Dimen		width;		/* width spec for whole table */
404 	int			border;		/* border attr */
405 	int			cellspacing;	/* cellspacing attr */
406 	int			cellpadding;	/* cellpadding attr */
407 	Background	background;	/* table background */
408 	Item*		caption;		/* linked list of Items, giving caption */
409 	uchar		caption_place;	/* ALtop or ALbottom */
410 	Lay*			caption_lay;	/* layout of caption */
411 	int			totw;			/* total width */
412 	int			toth;			/* total height */
413 	int			caph;		/* caption height */
414 	int			availw;		/* used for previous 3 sizes */
415 	Token*		tabletok;		/* token that started the table */
416 	uchar		flags;		/* Lchanged, perhaps */
417 };
418 
419 
420 struct Tablecol
421 {
422 	int		width;
423 	Align	align;
424 	Point		pos;
425 };
426 
427 
428 struct Tablerow
429 {
430 	Tablerow*	next;			/* Next in list of rows, during parsing */
431 	Tablecell*		cells;			/* Cells in row, linked through nextinrow */
432 	int			height;
433 	int			ascent;
434 	Align		align;
435 	Background	background;
436 	Point			pos;
437 	uchar		flags;		/* 0 or TFparsing */
438 };
439 
440 
441 /* A Tablecell is one cell of a table. */
442 /* It may span multiple rows and multiple columns. */
443 /* Cells are linked on two lists: the list for all the cells of */
444 /* a document (the next pointers), and the list of all the */
445 /* cells that start in a given row (the nextinrow pointers) */
446 struct Tablecell
447 {
448 	Tablecell*		next;			/* next in list of table's cells */
449 	Tablecell*		nextinrow;	/* next in list of row's cells */
450 	int			cellid;		/* serial no. of cell within table */
451 	Item*		content;		/* contents before layout */
452 	Lay*			lay;			/* layout of cell */
453 	int			rowspan;		/* number of rows spanned by this cell */
454 	int			colspan;		/* number of cols spanned by this cell */
455 	Align		align;		/* alignment spec */
456 	uchar		flags;		/* TFparsing, TFnowrap, TFisth */
457 	Dimen		wspec;		/* suggested width */
458 	int			hspec;		/* suggested height */
459 	Background	background;	/* cell background */
460 	int			minw;		/* minimum possible width */
461 	int			maxw;		/* maximum width */
462 	int			ascent;		/* cell's ascent */
463 	int			row;			/* row of upper left corner */
464 	int			col;			/* col of upper left corner */
465 	Point			pos;			/* nw corner of cell contents, in cell */
466 };
467 
468 /* Anchor is for info about hyperlinks that go somewhere */
469 struct Anchor
470 {
471 	Anchor*		next;		/* next in list of document's anchors */
472 	int			index;	/* serial no. of anchor within its doc */
473 	Rune*		name;	/* name attr */
474 	Rune*		href;		/* href attr */
475 	int			target;	/* target attr as targetid */
476 };
477 
478 
479 /* DestAnchor is for info about hyperlinks that are destinations */
480 struct DestAnchor
481 {
482 	DestAnchor*	next;		/* next in list of document's destanchors */
483 	int			index;	/* serial no. of anchor within its doc */
484 	Rune*		name;	/* name attr */
485 	Item*		item;		/* the destination */
486 };
487 
488 
489 /* Maps (client side) */
490 struct Map
491 {
492 	Map*	next;			/* next in list of document's maps */
493 	Rune*	name;		/* map name */
494 	Area*	areas;		/* list of map areas */
495 };
496 
497 
498 struct Area
499 {
500 	Area*		next;		/* next in list of a map's areas */
501 	int			shape;	/* SHrect, etc. */
502 	Rune*		href;		/* associated hypertext link */
503 	int			target;	/* associated target frame */
504 	Dimen*		coords;	/* array of coords for shape */
505 	int			ncoords;	/* size of coords array */
506 };
507 
508 /* Area shapes */
509 enum {
510 	SHrect, SHcircle, SHpoly
511 };
512 
513 /* Fonts are represented by integers: style*NumSize + size */
514 
515 /* Font styles */
516 enum {
517 	FntR,			/* roman */
518 	FntI,			/* italic */
519 	FntB,			/* bold */
520 	FntT,			/* typewriter */
521 	NumStyle
522 };
523 
524 /* Font sizes */
525 enum {
526 	Tiny,
527 	Small,
528 	Normal,
529 	Large,
530 	Verylarge,
531 	NumSize
532 };
533 
534 enum {
535 	NumFnt = (NumStyle*NumSize),
536 	DefFnt = (FntR*NumSize+Normal)
537 };
538 
539 /* Lines are needed through some text items, for underlining or strikethrough */
540 enum {
541 	ULnone, ULunder, ULmid
542 };
543 
544 /* Kidinfo flags */
545 enum {
546 	FRnoresize =	(1<<0),
547 	FRnoscroll =	(1<<1),
548 	FRhscroll = 	(1<<2),
549 	FRvscroll =	(1<<3),
550 	FRhscrollauto = (1<<4),
551 	FRvscrollauto =	(1<<5)
552 };
553 
554 /* Information about child frame or frameset */
555 struct Kidinfo
556 {
557 	Kidinfo*		next;		/* in list of kidinfos for a frameset */
558 	int			isframeset;
559 
560 	/* fields for "frame" */
561 	Rune*		src;		/* only nil if a "dummy" frame or this is frameset */
562 	Rune*		name;	/* always non-empty if this isn't frameset */
563 	int			marginw;
564 	int			marginh;
565 	int			framebd;
566 	int			flags;
567 
568 	/* fields for "frameset" */
569 	Dimen*		rows;	/* array of row dimensions */
570 	int			nrows;	/* length of rows */
571 	Dimen*		cols;		/* array of col dimensions */
572 	int			ncols;	/* length of cols */
573 	Kidinfo*		kidinfos;
574 	Kidinfo*		nextframeset;	/* parsing stack */
575 };
576 
577 
578 /* Document info (global information about HTML page) */
579 struct Docinfo
580 {
581 	/* stuff from HTTP headers, doc head, and body tag */
582 	Rune*		src;				/* original source of doc */
583 	Rune*		base;			/* base URL of doc */
584 	Rune*		doctitle;			/* from <title> element */
585 	Background	background;		/* background specification */
586 	Iimage*		backgrounditem;	/* Image Item for doc background image, or nil */
587 	int			text;				/* doc foreground (text) color */
588 	int			link;				/* unvisited hyperlink color */
589 	int			vlink;			/* visited hyperlink color */
590 	int			alink;			/* highlighting hyperlink color */
591 	int			target;			/* target frame default */
592 	int			chset;			/* ISO_8859, etc. */
593 	int			mediatype;		/* TextHtml, etc. */
594 	int			scripttype;		/* TextJavascript, etc. */
595 	int			hasscripts;		/* true if scripts used */
596 	Rune*		refresh;			/* content of <http-equiv=Refresh ...> */
597 	Kidinfo*		kidinfo;			/* if a frameset */
598 	int			frameid;			/* id of document frame */
599 
600 	/* info needed to respond to user actions */
601 	Anchor*		anchors;			/* list of href anchors */
602 	DestAnchor*	dests;			/* list of destination anchors */
603 	Form*		forms;			/* list of forms */
604 	Table*		tables;			/* list of tables */
605 	Map*		maps;			/* list of maps */
606 	Iimage*		images;			/* list of image items (through nextimage links) */
607 };
608 
609 extern int			dimenkind(Dimen d);
610 extern int			dimenspec(Dimen d);
611 extern void		freedocinfo(Docinfo* d);
612 extern void		freeitems(Item* ithead);
613 extern Item*		parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
614 extern void		printitems(Item* items, char* msg);
615 extern int			targetid(Rune* s);
616 extern Rune*		targetname(int targid);
617 extern int			validitems(Item* i);
618 
619 /* #pragma varargck	type "I"	Item* */
620 
621 /* Control print output */
622 extern int			warn;
623 extern int			dbglex;
624 extern int			dbgbuild;
625 
626 /* To be provided by caller */
627 /* emalloc and erealloc should not return if can't get memory. */
628 /* emalloc should zero its memory. */
629 extern void*	emalloc(ulong);
630 extern void*	erealloc(void* p, ulong size);
631 #ifdef __cpluspplus
632 }
633 #endif
634 #endif
635