1 /*********************************************************************
2 decorate.c:
3 sanitize a tree of nodes produced by html,
4 and decorate the tree with the corresponding js objects.
5 A <form> tag has a corresponding Form object in the js world, etc.
6 This is done for the html that is on the initial web page,
7 and any html that is produced by javascript via
8 foo.innerHTML = string or document.write(string).
9 *********************************************************************/
10 
11 #include "eb.h"
12 
13 /* The current (foreground) edbrowse window and frame.
14  * These are replaced with stubs when run within the javascript process. */
15 struct ebWindow *cw;
16 Frame *cf;
17 int gfsn;
18 
19 /* traverse the tree of nodes with a callback function */
20 nodeFunction traverse_callback;
21 
22 /* possible callback functions in this file */
23 static void prerenderNode(Tag *node, bool opentag);
24 static void jsNode(Tag *node, bool opentag);
25 static void pushAttributes(const Tag *t);
26 
27 static void processStyles(jsobjtype so, const char *stylestring);
28 
29 static bool treeOverflow;
30 
traverseNode(Tag * node)31 static void traverseNode(Tag *node)
32 {
33 	Tag *child;
34 
35 	if (node->visited) {
36 		treeOverflow = true;
37 		debugPrint(4, "node revisit %s %d", node->info->name,
38 			   node->seqno);
39 		return;
40 	}
41 	node->visited = true;
42 
43 	(*traverse_callback) (node, true);
44 	for (child = node->firstchild; child; child = child->sibling)
45 		traverseNode(child);
46 	(*traverse_callback) (node, false);
47 }				/* traverseNode */
48 
traverseAll(int start)49 void traverseAll(int start)
50 {
51 	Tag *t;
52 	int i;
53 
54 	treeOverflow = false;
55 	for (i = start; i < cw->numTags; ++i) {
56 		t = tagList[i];
57 		t->visited = false;
58 	}
59 
60 	for (i = start; i < cw->numTags; ++i) {
61 		t = tagList[i];
62 		if (!t->parent && !t->slash && !t->dead)
63 			traverseNode(t);
64 	}
65 
66 	if (treeOverflow)
67 		debugPrint(3, "malformed tree!");
68 }				/* traverseAll */
69 
70 static int nopt;		/* number of options */
71 /* None of these tags nest, so it is reasonable to talk about
72  * the current open tag. */
73 static Tag *currentForm, *currentSel, *currentOpt, *currentStyle;
74 static Tag *currentTitle, *currentScript, *currentTA;
75 static Tag *currentA;
76 static char *radioCheck;
77 static int radio_l;
78 
attribVal(const Tag * t,const char * name)79 const char *attribVal(const Tag *t, const char *name)
80 {
81 	const char *v;
82 	int j;
83 	if (!t->attributes)
84 		return 0;
85 	j = stringInListCI(t->attributes, name);
86 	if (j < 0)
87 		return 0;
88 	v = t->atvals[j];
89 	return v;
90 }				/* attribVal */
91 
attribPresent(const Tag * t,const char * name)92 static bool attribPresent(const Tag *t, const char *name)
93 {
94 	int j = stringInListCI(t->attributes, name);
95 	return (j >= 0);
96 }				/* attribPresent */
97 
linkinTree(Tag * parent,Tag * child)98 static void linkinTree(Tag *parent, Tag *child)
99 {
100 	Tag *c, *d;
101 	child->parent = parent;
102 
103 	if (!parent->firstchild) {
104 		parent->firstchild = child;
105 		return;
106 	}
107 
108 	for (c = parent->firstchild; c; c = c->sibling) {
109 		d = c;
110 	}
111 	d->sibling = child;
112 }				/* linkinTree */
113 
makeButton(void)114 static void makeButton(void)
115 {
116 	Tag *t = newTag(cf, "input");
117 	t->controller = currentForm;
118 	t->itype = INP_SUBMIT;
119 	t->value = emptyString;
120 	t->step = 1;
121 	linkinTree(currentForm, t);
122 }				/* makeButton */
123 
findOpenTag(Tag * t,int action)124 Tag *findOpenTag(Tag *t, int action)
125 {
126 	int count = 0;
127 	while ((t = t->parent)) {
128 		if (t->action == action)
129 			return t;
130 		if (++count == 10000) {	// tree shouldn't be this deep
131 			debugPrint(1, "infinite loop in findOpenTag()");
132 			break;
133 		}
134 	}
135 	return 0;
136 }				/* findOpenTag */
137 
findOpenSection(Tag * t)138 static Tag *findOpenSection(Tag *t)
139 {
140 	int count = 0;
141 	while ((t = t->parent)) {
142 		if (t->action == TAGACT_TBODY || t->action == TAGACT_THEAD ||
143 		    t->action == TAGACT_TFOOT)
144 			return t;
145 		if (++count == 10000) {	// tree shouldn't be this deep
146 			debugPrint(1, "infinite loop in findOpenTag()");
147 			break;
148 		}
149 	}
150 	return 0;
151 }				/* findOpenSection */
152 
findOpenList(Tag * t)153 Tag *findOpenList(Tag *t)
154 {
155 	while ((t = t->parent))
156 		if (t->action == TAGACT_OL || t->action == TAGACT_UL)
157 			return t;
158 	return 0;
159 }				/* findOpenList */
160 
161 /*********************************************************************
162 tidy workaround functions.
163 Consider html like this.
164 <body>
165 <A href=http://www.edbrowse.org>Link1
166 <A href=http://www.edbrowse.org>Link2
167 <A href=http://www.edbrowse.org>Link3
168 </body>
169 Each anchor should close the one before, thus rendering as
170  {Link1} {Link2} {Link3}
171 But tidy does not do this; it allows anchors to nest, thus
172  {Link1{Link2{Link3}}}
173 Not a serious problem really, it just looks funny.
174 And yes, html like this does appear in the wild.
175 This routine restructures the tree to move the inner anchor
176 back up to the same level as the outer anchor.
177 *********************************************************************/
178 
nestedAnchors(int start)179 static void nestedAnchors(int start)
180 {
181 	Tag *a1, *a2, *p, *c;
182 	int j;
183 
184 	for (j = start; j < cw->numTags; ++j) {
185 		a2 = tagList[j];
186 		if (a2->action != TAGACT_A)
187 			continue;
188 		a1 = findOpenTag(a2, TAGACT_A);
189 		if (!a1)
190 			continue;
191 
192 /* delete a2 from the tree */
193 		p = a2->parent;
194 		a2->parent = 0;
195 		if (p->firstchild == a2)
196 			p->firstchild = a2->sibling;
197 		else {
198 			c = p->firstchild;
199 			while (c->sibling) {
200 				if (c->sibling == a2) {
201 					c->sibling = a2->sibling;
202 					break;
203 				}
204 				c = c->sibling;
205 			}
206 		}
207 		a2->sibling = 0;
208 
209 /* then link a2 up next to a1 */
210 		a2->parent = a1->parent;
211 		a2->sibling = a1->sibling;
212 		a1->sibling = a2;
213 	}
214 }				/* nestedAnchors */
215 
216 /*********************************************************************
217 Tables are suppose to have bodies, I guess.
218 So <table><tr> becomes <table><tbody><tr>
219 Find each table and look at its children.
220 Note the tags between sections, where section is tHead, tBody, or tFoot.
221 If that span includes <tr>, then put those tags under a new tBody.
222 *********************************************************************/
223 
224 static void insert_tbody1(Tag *s1, Tag *s2,
225 			  Tag *tbl);
226 static bool tagBelow(Tag *t, int action);
227 
insert_tbody(int start)228 static void insert_tbody(int start)
229 {
230 	int i, end = cw->numTags;
231 	Tag *tbl, *s1, *s2;
232 
233 	for (i = start; i < end; ++i) {
234 		tbl = tagList[i];
235 		if (tbl->action != TAGACT_TABLE)
236 			continue;
237 		s1 = 0;
238 		do {
239 			s2 = (s1 ? s1->sibling : tbl->firstchild);
240 			while (s2 && s2->action != TAGACT_TBODY
241 			       && s2->action != TAGACT_THEAD
242 			       && s2->action != TAGACT_TFOOT)
243 				s2 = s2->sibling;
244 			insert_tbody1(s1, s2, tbl);
245 			s1 = s2;
246 		} while (s1);
247 	}
248 }
249 
insert_tbody1(Tag * s1,Tag * s2,Tag * tbl)250 static void insert_tbody1(Tag *s1, Tag *s2,
251 			  Tag *tbl)
252 {
253 	Tag *s1a = (s1 ? s1->sibling : tbl->firstchild);
254 	Tag *u, *uprev, *ns;	// new section
255 
256 	if (s1a == s2)		// nothing between
257 		return;
258 
259 // Look for the direct html <table><tr><th>.
260 // If th is anywhere else down the path, we won't find it.
261 	if (!s1 && s1a->action == TAGACT_TR &&
262 	    (u = s1a->firstchild) && stringEqual(u->info->name, "th")) {
263 		ns = newTag(cf, "thead");
264 		tbl->firstchild = ns;
265 		ns->parent = tbl;
266 		ns->firstchild = s1a;
267 		s1a->parent = ns;
268 		ns->sibling = s1a->sibling;
269 		s1a->sibling = 0;
270 		s1 = ns;
271 		s1a = s1->sibling;
272 	}
273 
274 	for (u = s1a; u != s2; u = u->sibling)
275 		if (tagBelow(u, TAGACT_TR))
276 			break;
277 	if (u == s2)		// no rows below
278 		return;
279 
280 	ns = newTag(cf, "tbody");
281 	for (u = s1a; u != s2; u = u->sibling)
282 		uprev = u, u->parent = ns;
283 	if (s1)
284 		s1->sibling = ns;
285 	else
286 		tbl->firstchild = ns;
287 	if (s2)
288 		uprev->sibling = 0, ns->sibling = s2;
289 	ns->firstchild = s1a;
290 	ns->parent = tbl;
291 }
292 
293 /*********************************************************************
294 Bad html will derail tidy, so that <a><div>stuff</div></a>
295 will push div outside the anchor, to render as  {} stuff
296 m.facebook.com is loaded with them.
297 Here is a tiny example.
298 
299 <body>
300 <input type=button name=whatever value=hohaa>
301 <a href="#bottom"><div>Cognitive business is here</div></a>
302 </body>
303 
304 This routine puts it back.
305 An anchor with no children followd by div
306 moves div under the anchor.
307 For a while I had this function commented out, like it caused a problem,
308 but I can't see why or how, so it's back, and facebook looks better.
309 
310 As an after kludge, don't move <div> under <a> if <div> has an anchor beneath it.
311 That could create nested anchors, which we already worked hard to get rid of.   Eeeeeeesh.
312 
313 This and other tidy workaround functions are based on heuristics,
314 and suffer from false positives and false negatives,
315 the former being the more serious problem -
316 i.e. we rearrange the tree when we shouldn't.
317 Even when we do the right thing, there is another problem,
318 innerHTML is wrong, and doesn't match the tree of nodes
319 or the original source.
320 innerHTML comes to us from tidy, after it has fixed (sometimes broken) things.
321 Add <script> to the above, browse, jdb, and look at document.body.innerHTML.
322 It does not match the source, in fact it represents the tree *before* we fixed it.
323 There really isn't anything I can do about that.
324 In so many ways, the better approach is to fix tidy, but sometimes that is out of our hands.
325 *********************************************************************/
326 
tagBelow(Tag * t,int action)327 static bool tagBelow(Tag *t, int action)
328 {
329 	Tag *c;
330 
331 	if (t->action == action)
332 		return true;
333 	for (c = t->firstchild; c; c = c->sibling)
334 		if (tagBelow(c, action))
335 			return true;
336 	return false;
337 }				/* tagBelow */
338 
emptyAnchors(int start)339 static void emptyAnchors(int start)
340 {
341 	int j;
342 	Tag *a0, *div, *up;
343 
344 	for (j = start; j < cw->numTags; ++j) {
345 		a0 = tagList[j];
346 		if (a0->action != TAGACT_A || a0->firstchild)
347 			continue;
348 // anchor no children
349 		for (up = a0; up; up = up->parent)
350 			if (up->sibling)
351 				break;
352 		if (!up || !(div = up->sibling) || div->action != TAGACT_DIV)
353 			continue;
354 // div follows
355 /* would moving this create nested anchors? */
356 		if (tagBelow(div, TAGACT_A))
357 			continue;
358 /* shouldn't have inputs or forms in an anchor. */
359 		if (tagBelow(div, TAGACT_INPUT))
360 			continue;
361 		if (tagBelow(div, TAGACT_FORM))
362 			continue;
363 		up->sibling = div->sibling;
364 		a0->firstchild = div;
365 		div->parent = a0;
366 		div->sibling = 0;
367 	}
368 }				/* emptyAnchors */
369 
370 /*********************************************************************
371 If a form is in a table, but not in tr or td, it closes immediately,
372 and all the following inputs are orphaned.
373 Check for an empty form beneath table, and move all the following siblings
374 down into the form.
375 *********************************************************************/
376 
tableForm(int start)377 static void tableForm(int start)
378 {
379 	int j;
380 	Tag *form, *table, *t;
381 
382 	for (j = start; j < cw->numTags; ++j) {
383 		form = tagList[j];
384 		if (form->action != TAGACT_FORM || form->firstchild)
385 			continue;
386 		t = form;
387 		for (table = form->sibling; table; table = table->sibling) {
388 			if (table->action == TAGACT_TABLE &&
389 			    tagBelow(table, TAGACT_INPUT)) {
390 /* table with inputs below; move it to form */
391 /* hope this doesn't break anything */
392 				table->parent = form;
393 				form->firstchild = table;
394 				t->sibling = table->sibling;
395 				table->sibling = 0;
396 				break;
397 			}
398 			t = table;
399 		}
400 	}
401 }				/* tableForm */
402 
formControl(Tag * t,bool namecheck)403 void formControl(Tag *t, bool namecheck)
404 {
405 	int itype = t->itype;
406 	char *myname = (t->name ? t->name : t->id);
407 	Tag *cform = currentForm;
408 	if (!cform) {
409 /* nodes could be created dynamically, not through html */
410 		cform = findOpenTag(t, TAGACT_FORM);
411 	}
412 	if (cform)
413 		t->controller = cform;
414 	else if (itype != INP_BUTTON && itype != INP_SUBMIT && !htmlGenerated)
415 		debugPrint(3, "%s is not part of a fill-out form",
416 			   t->info->desc);
417 	if (namecheck && !myname && !htmlGenerated)
418 		debugPrint(3, "%s does not have a name", t->info->desc);
419 }				/* formControl */
420 
421 const char *const inp_types[] = {
422 	"reset", "button", "image", "submit",
423 	"hidden", "text", "file",
424 	"select", "textarea", "radio", "checkbox",
425 	0
426 };
427 
428 /*********************************************************************
429 Here are some other input types that should have additional syntax checks
430 performed on them, but as far as this version of edbrowse is concerned,
431 they are equivalent to text. Just here to suppress warnings.
432 List taken from https://www.tutorialspoint.com/html/html_input_tag.htm
433 *********************************************************************/
434 
435 const char *const inp_others[] = {
436 	"no_minor", "date", "datetime", "datetime-local",
437 	"month", "week", "time", "email", "range",
438 	"search", "tel", "url", "number", "password",
439 	0
440 };
441 
442 /* helper function for input tag */
htmlInputHelper(Tag * t)443 void htmlInputHelper(Tag *t)
444 {
445 	int n = INP_TEXT;
446 	int len;
447 	char *myname = (t->name ? t->name : t->id);
448 	const char *s = attribVal(t, "type");
449 	bool isbutton = stringEqual(t->info->name, "button");
450 
451 	t->itype = (isbutton ? INP_BUTTON : INP_TEXT);
452 	if (s && *s) {
453 		n = stringInListCI(inp_types, s);
454 		if (n < 0) {
455 			n = stringInListCI(inp_others, s);
456 			if (n < 0)
457 				debugPrint(3, "unrecognized input type %s", s);
458 			else
459 				t->itype = INP_TEXT, t->itype_minor = n;
460 			if (n == INP_PW)
461 				t->masked = true;
462 		} else
463 			t->itype = n;
464 	}
465 // button no type means submit
466 	if (!s && isbutton)
467 		t->itype = INP_SUBMIT;
468 
469 	s = attribVal(t, "maxlength");
470 	len = 0;
471 	if (s)
472 		len = stringIsNum(s);
473 	if (len > 0)
474 		t->lic = len;
475 
476 // No preset value on file, for security reasons.
477 // <input type=file value=/etc/passwd> then submit via onload().
478 	if (n == INP_FILE) {
479 		nzFree(t->value);
480 		t->value = 0;
481 		cnzFree(t->rvalue);
482 		t->rvalue = 0;
483 	}
484 
485 /* In this case an empty value should be "", not null */
486 	if (t->value == 0)
487 		t->value = emptyString;
488 	if (t->rvalue == 0)
489 		t->rvalue = cloneString(t->value);
490 
491 	if (n == INP_RADIO && t->checked && radioCheck && myname) {
492 		char namebuf[200];
493 		if (strlen(myname) < sizeof(namebuf) - 3) {
494 			if (!*radioCheck)
495 				stringAndChar(&radioCheck, &radio_l, '|');
496 			sprintf(namebuf, "|%s|", t->name);
497 			if (strstr(radioCheck, namebuf)) {
498 				debugPrint(3,
499 					   "multiple radio buttons have been selected");
500 				return;
501 			}
502 			stringAndString(&radioCheck, &radio_l, namebuf + 1);
503 		}
504 	}
505 
506 	/* Even the submit fields can have a name, but they don't have to */
507 	formControl(t, (n > INP_SUBMIT));
508 }				/* htmlInputHelper */
509 
510 /* return an allocated string containing the text entries for the checked options */
displayOptions(const Tag * sel)511 char *displayOptions(const Tag *sel)
512 {
513 	const Tag *t;
514 	char *opt;
515 	int opt_l;
516 
517 	opt = initString(&opt_l);
518 	for (t = cw->optlist; t; t = t->same) {
519 		if (t->controller != sel)
520 			continue;
521 		if (!t->checked)
522 			continue;
523 		if (*opt)
524 			stringAndChar(&opt, &opt_l, ',');
525 		stringAndString(&opt, &opt_l, t->textval);
526 	}
527 
528 	return opt;
529 }				/* displayOptions */
530 
prerenderNode(Tag * t,bool opentag)531 static void prerenderNode(Tag *t, bool opentag)
532 {
533 	int itype;		/* input type */
534 	int j;
535 	int action = t->action;
536 	const char *a;		/* usually an attribute */
537 	Tag *cdt;
538 
539 	debugPrint(6, "prend %c%s %d%s",
540 		   (opentag ? ' ' : '/'), t->info->name,
541 		   t->seqno, (t->step >= 1 ? "-" : ""));
542 
543 	if (t->step >= 1)
544 		return;
545 	if (!opentag)
546 		t->step = 1;
547 
548 	switch (action) {
549 	case TAGACT_NOSCRIPT:
550 // If javascript is enabled kill everything under noscript
551 		if (isJSAlive && !opentag)
552 			underKill(t);
553 		break;
554 
555 	case TAGACT_TEXT:
556 		if (!opentag || !t->textval)
557 			break;
558 
559 		if (currentTitle) {
560 			if (!cw->htmltitle) {
561 				cw->htmltitle = cloneString(t->textval);
562 				spaceCrunch(cw->htmltitle, true, false);
563 			}
564 			t->deleted = true;
565 			break;
566 		}
567 
568 		if (currentOpt) {
569 			currentOpt->textval = cloneString(t->textval);
570 			spaceCrunch(currentOpt->textval, true, false);
571 			t->deleted = true;
572 			break;
573 		}
574 
575 		if (currentStyle) {
576 			t->deleted = true;
577 			break;
578 		}
579 
580 		if (currentScript) {
581 			currentScript->textval = cloneString(t->textval);
582 			t->deleted = true;
583 			break;
584 		}
585 
586 		if (currentTA) {
587 			currentTA->value = cloneString(t->textval);
588 /* Sometimes tidy lops off the last newline character; it depends on
589  * the tag following. And even if it didn't end in nl in the original html,
590  * <textarea>foobar</textarea>, it probably should,
591  * as it goes into a new buffer. */
592 			j = strlen(currentTA->value);
593 			if (j && currentTA->value[j - 1] != '\n') {
594 				currentTA->value =
595 				    reallocMem(currentTA->value, j + 2);
596 				currentTA->value[j] = '\n';
597 				currentTA->value[j + 1] = 0;
598 			}
599 // Don't need leading whitespace.
600 			leftClipString(currentTA->value);
601 			currentTA->rvalue = cloneString(currentTA->value);
602 			t->deleted = true;
603 			break;
604 		}
605 
606 /* text is on the page */
607 		if (currentA) {
608 			char *s;
609 			for (s = t->textval; *s; ++s)
610 				if (isalnumByte(*s)) {
611 					currentA->textin = true;
612 					break;
613 				}
614 		}
615 		break;
616 
617 	case TAGACT_TITLE:
618 		currentTitle = (opentag ? t : 0);
619 		break;
620 
621 	case TAGACT_SCRIPT:
622 		currentScript = (opentag ? t : 0);
623 		break;
624 
625 	case TAGACT_A:
626 		currentA = (opentag ? t : 0);
627 		break;
628 
629 	case TAGACT_FORM:
630 		if (opentag) {
631 			currentForm = t;
632 			a = attribVal(t, "method");
633 			if (a) {
634 				if (stringEqualCI(a, "post"))
635 					t->post = true;
636 				else if (!stringEqualCI(a, "get"))
637 					debugPrint(3,
638 						   "form method should be get or post");
639 			}
640 			a = attribVal(t, "enctype");
641 			if (a) {
642 				if (stringEqualCI(a, "multipart/form-data"))
643 					t->mime = true;
644 				else if (!stringEqualCI
645 					 (a,
646 					  "application/x-www-form-urlencoded"))
647 					debugPrint(3,
648 						   "unrecognized enctype, plese use multipart/form-data or application/x-www-form-urlencoded");
649 			}
650 			if ((a = t->href)) {
651 				const char *prot = getProtURL(a);
652 				if (prot) {
653 					if (stringEqualCI(prot, "mailto"))
654 						t->bymail = true;
655 					else if (stringEqualCI
656 						 (prot, "javascript"))
657 						t->javapost = true;
658 					else if (stringEqualCI(prot, "https"))
659 						t->secure = true;
660 					else if (!stringEqualCI(prot, "http") &&
661 						 !stringEqualCI(prot, "gopher"))
662 						debugPrint(3,
663 							   "form cannot submit using protocol %s",
664 							   prot);
665 				}
666 			}
667 
668 			nzFree(radioCheck);
669 			radioCheck = initString(&radio_l);
670 		}
671 		if (!opentag && currentForm) {
672 			if (t->ninp && !t->submitted) {
673 				makeButton();
674 				t->submitted = true;
675 			}
676 			currentForm = 0;
677 		}
678 		break;
679 
680 	case TAGACT_INPUT:
681 		if (!opentag)
682 			break;
683 		htmlInputHelper(t);
684 		itype = t->itype;
685 		if (itype == INP_HIDDEN)
686 			break;
687 		if (currentForm) {
688 			++currentForm->ninp;
689 			if (itype == INP_SUBMIT || itype == INP_IMAGE)
690 				currentForm->submitted = true;
691 			if (itype == INP_BUTTON && t->onclick)
692 				currentForm->submitted = true;
693 			if (itype > INP_HIDDEN && itype <= INP_SELECT
694 			    && t->onchange)
695 				currentForm->submitted = true;
696 		}
697 		break;
698 
699 	case TAGACT_OPTION:
700 		if (!opentag) {
701 			currentOpt = 0;
702 			break;
703 		}
704 		if (!currentSel) {
705 			debugPrint(3,
706 				   "option appears outside a select statement");
707 			break;
708 		}
709 		currentOpt = t;
710 		t->controller = currentSel;
711 		t->lic = nopt++;
712 		if (attribPresent(t, "selected")) {
713 			if (currentSel->lic && !currentSel->multiple)
714 				debugPrint(3, "multiple options are selected");
715 			else {
716 				t->checked = t->rchecked = true;
717 				++currentSel->lic;
718 			}
719 		}
720 		if (!t->value)
721 			t->value = emptyString;
722 		t->textval = emptyString;
723 		break;
724 
725 	case TAGACT_STYLE:
726 		if (!opentag) {
727 			currentStyle = 0;
728 			break;
729 		}
730 		currentStyle = t;
731 		break;
732 
733 	case TAGACT_SELECT:
734 		if (opentag) {
735 			currentSel = t;
736 			nopt = 0;
737 			t->itype = INP_SELECT;
738 			formControl(t, true);
739 		} else {
740 			currentSel = 0;
741 			t->action = TAGACT_INPUT;
742 			t->value = displayOptions(t);
743 		}
744 		break;
745 
746 	case TAGACT_TA:
747 		if (opentag) {
748 			currentTA = t;
749 			t->itype = INP_TA;
750 			formControl(t, true);
751 		} else {
752 			t->action = TAGACT_INPUT;
753 			if (!t->value) {
754 /* This can only happen it no text inside, <textarea></textarea> */
755 /* like the other value fields, it can't be null */
756 				t->rvalue = t->value = emptyString;
757 			}
758 #if 0
759 // When textarea buffers were allocated at browse time, before the ib command
760 			if (whichproc == 'e')
761 				t->lic = sideBuffer(0, t->value, -1, 0);
762 #endif
763 			currentTA = 0;
764 		}
765 		break;
766 
767 	case TAGACT_META:
768 		if (opentag) {
769 /* This function doesn't do anything inside the js process.
770  * It only works when scanning the original web page.
771  * Thus I assume meta tags that set cookies, or keywords, or description,
772  * or a refresh directive, are there from the get-go.
773  * If js was going to generate a cookie it would just set document.cookie,
774  * it wouldn't build a meta tag to set the cookie and then
775  * appendChild it to head, right? */
776 			htmlMetaHelper(t);
777 		}
778 		break;
779 
780 	case TAGACT_TBODY:
781 	case TAGACT_THEAD:
782 	case TAGACT_TFOOT:
783 		if (opentag)
784 			t->controller = findOpenTag(t, TAGACT_TABLE);
785 		break;
786 
787 	case TAGACT_TR:
788 		if (opentag) {
789 			t->controller = findOpenSection(t);
790 			if (!t->controller)
791 				t->controller = findOpenTag(t, TAGACT_TABLE);
792 		}
793 		break;
794 
795 	case TAGACT_TD:
796 		if (opentag)
797 			t->controller = findOpenTag(t, TAGACT_TR);
798 		break;
799 
800 	case TAGACT_SPAN:
801 		if (!opentag)
802 			break;
803 		if (!(a = t->jclass))
804 			break;
805 		if (stringEqualCI(a, "sup"))
806 			action = TAGACT_SUP;
807 		if (stringEqualCI(a, "sub"))
808 			action = TAGACT_SUB;
809 		if (stringEqualCI(a, "ovb"))
810 			action = TAGACT_OVB;
811 		t->action = action;
812 		break;
813 
814 	case TAGACT_OL:
815 /* look for start parameter for numbered list */
816 		if (opentag) {
817 			a = attribVal(t, "start");
818 			if (a && (j = stringIsNum(a)) >= 0)
819 				t->slic = j - 1;
820 		}
821 		break;
822 
823 	case TAGACT_FRAME:
824 		if (opentag)
825 			break;
826 // If somebody wrote <frame><p>foo</frame>, those tags should be excised.
827 		underKill(t);
828 		cdt = newTag(cf, "document");
829 		t->firstchild = cdt;
830 		cdt->parent = t;
831 		break;
832 
833 	case TAGACT_MUSIC:
834 		if (opentag)
835 			break;
836 // If somebody wrote <audio><p>foo</audio>, those tags should be excised.
837 // However <source> tags should be kept and/or expanded. Not yet implemented.
838 		underKill(t);
839 		break;
840 
841 	}			/* switch */
842 }				/* prerenderNode */
843 
prerender(int start)844 void prerender(int start)
845 {
846 /* some cleanup routines to rearrange the tree */
847 	nestedAnchors(start);
848 	emptyAnchors(start);
849 	insert_tbody(start);
850 	tableForm(start);
851 
852 	currentForm = currentSel = currentOpt = NULL;
853 	currentTitle = currentScript = currentTA = NULL;
854 	currentStyle = NULL;
855 	nzFree(radioCheck);
856 	radioCheck = 0;
857 	traverse_callback = prerenderNode;
858 	traverseAll(start);
859 	currentForm = NULL;
860 	nzFree(radioCheck);
861 	radioCheck = 0;
862 }				/* prerender */
863 
864 /* create a new url with constructor */
instantiate_url(const Frame * f,jsobjtype parent,const char * name,const char * url)865 jsobjtype instantiate_url(const Frame *f, jsobjtype parent, const char *name, const char *url)
866 {
867 	jsobjtype uo;		/* url object */
868 	uo = instantiate(f, parent, name, "URL");
869 	if (uo)
870 		set_property_string(f, uo, "href", url);
871 	return uo;
872 }				/* instantiate_url */
873 
874 static char fakePropLast[24];
875 static jsobjtype fakePropParent;
fakePropName(void)876 static const char *fakePropName(void)
877 {
878 	static int idx = 0;
879 	++idx;
880 	sprintf(fakePropLast, "gc$%c%d", whichproc, idx);
881 	return fakePropLast;
882 }				/*fakePropName */
883 
establish_js_option(jsobjtype obj,int idx)884 static jsobjtype establish_js_option(jsobjtype obj, int idx)
885 {
886 	jsobjtype oa;		/* option array */
887 	jsobjtype oo;		/* option object */
888 	jsobjtype so;		// style object
889 	jsobjtype ato;		// attributes object
890 	jsobjtype fo;		/* form object */
891 
892 	if ((oa = get_property_object(cf, obj, "options")) == NULL)
893 		return NULL;
894 	if ((oo = instantiate_array_element(cf, oa, idx, "Option")) == NULL)
895 		return NULL;
896 
897 	set_property_object(cf, oo, "parentNode", oa);
898 
899 /* option.form = select.form */
900 	fo = get_property_object(cf, obj, "form");
901 	if (fo)
902 		set_property_object(cf, oo, "form", fo);
903 	instantiate_array(cf, oo, "childNodes");
904 	ato = instantiate(cf, oo, "attributes", "NamedNodeMap");
905 	set_property_object(cf, ato, "owner", oo);
906 	so = instantiate(cf, oo, "style", "CSSStyleDeclaration");
907 	set_property_object(cf, so, "element", oo);
908 
909 	return oo;
910 }				/* establish_js_option */
911 
establish_inner(jsobjtype obj,const char * start,const char * end,bool isText)912 static void establish_inner(jsobjtype obj, const char *start, const char *end,
913 			    bool isText)
914 {
915 	const char *s = emptyString;
916 	const char *name = (isText ? "value" : "innerHTML");
917 	if (start) {
918 		s = start;
919 		if (end)
920 			s = pullString(start, end - start);
921 	}
922 	set_property_string(cf, obj, name, s);
923 	if (start && end)
924 		nzFree((char *)s);
925 // If this is a textarea, we haven't yet set up the innerHTML
926 // getter and seter
927 	if (isText)
928 		set_property_string(cf, obj, "innerHTML", emptyString);
929 }				/* establish_inner */
930 
domLink(Tag * t,const char * classname,const char * href,const char * list,jsobjtype owner,bool isradio)931 static void domLink(Tag *t, const char *classname,	/* instantiate this class */
932 		    const char *href, const char *list,	/* next member of this array */
933 		    jsobjtype owner, bool isradio)
934 {
935 	jsobjtype alist = 0;
936 	jsobjtype io = 0;	/* input object */
937 	int length;
938 	bool dupname = false;
939 /* some strings from the html tag */
940 	const char *symname = t->name;
941 	const char *idname = t->id;
942 	const char *membername = 0;	/* usually symname */
943 	const char *href_url = t->href;
944 	const char *tcn = t->jclass;
945 	const char *stylestring = attribVal(t, "style");
946 	jsobjtype so = 0;	/* obj.style */
947 	jsobjtype ato = 0;	/* obj.attributes */
948 	char upname[MAXTAGNAME];
949 
950 	debugPrint(5, "domLink %s.%d name %s",
951 		   classname, isradio, (symname ? symname : emptyString));
952 
953 	if (symname && typeof_property(cf, owner, symname)) {
954 /*********************************************************************
955 This could be a duplicate name.
956 Yes, that really happens.
957 Link to the first tag having this name,
958 and link the second tag under a fake name so gc won't throw it away.
959 Or - it could be a duplicate name because multiple radio buttons
960 all share the same name.
961 The first time we create the array,
962 and thereafter we just link under that array.
963 Or - and this really does happen -
964 an input tag could have the name action, colliding with form.action.
965 don't overwrite form.action, or anything else that pre-exists.
966 *********************************************************************/
967 
968 		if (isradio) {
969 /* name present and radio buttons, name should be the array of buttons */
970 			io = get_property_object(cf, owner, symname);
971 			if (io == NULL)
972 				return;
973 		} else {
974 /* don't know why the duplicate name */
975 			dupname = true;
976 		}
977 	}
978 
979 /* The input object is nonzero if&only if the input is a radio button,
980  * and not the first button in the set, thus it isce the array containing
981  * these buttons. */
982 	if (io == NULL) {
983 /*********************************************************************
984 Ok, the above condition does not hold.
985 We'll be creating a new object under owner, but through what name?
986 The name= tag, unless it's a duplicate,
987 or id= if there is no name=, or a fake name just to protect it from gc.
988 That's how it was for a long time, but I think we only do this on form.
989 *********************************************************************/
990 		if (t->action == TAGACT_INPUT && list) {
991 			if (!symname && idname)
992 				membername = idname;
993 			else if (symname && !dupname)
994 				membername = symname;
995 /* id= or name= must not displace submit, reset, or action in form.
996  * Example www.startpage.com, where id=submit.
997  * nor should it collide with another attribute, such as document.cookie and
998  * <div ID=cookie> in www.orange.com.
999  * This call checks for the name in the object and its prototype. */
1000 			if (membername && has_property(cf, owner, membername)) {
1001 				debugPrint(3, "membername overload %s.%s",
1002 					   classname, membername);
1003 				membername = NULL;
1004 			}
1005 		}
1006 		if (!membername) {
1007 			membername = fakePropName();
1008 			fakePropParent = owner;
1009 		}
1010 
1011 		if (isradio) {	// the first radio button
1012 			io = instantiate_array(cf, owner, membername);
1013 			if (io == NULL)
1014 				return;
1015 			set_property_string(cf, io, "type", "radio");
1016 		} else {
1017 /* A standard input element, just create it. */
1018 			jsobjtype ca;	// child array
1019 			io = instantiate(cf, owner, membername, classname);
1020 			if (io == NULL)
1021 				return;
1022 /* not an array; needs the childNodes array beneath it for the children */
1023 			ca = instantiate_array(cf, io, "childNodes");
1024 // childNodes and options are the same for Select
1025 			if (stringEqual(classname, "Select"))
1026 				set_property_object(cf, io, "options", ca);
1027 		}
1028 
1029 /* deal with the 'styles' here.
1030 object will get 'style' regardless of whether there is
1031 anything to put under it, just like it gets childNodes whether
1032 or not there are any.  After that, there is a conditional step.
1033 If this node contains style='' of one or more name-value pairs,
1034 call out to process those and add them to the object.
1035 Don't do any of this if the tag is itself <style>. */
1036 		if (t->action != TAGACT_STYLE) {
1037 			so = instantiate(cf, io, "style", "CSSStyleDeclaration");
1038 			set_property_object(cf, so, "element", io);
1039 /* now if there are any style pairs to unpack,
1040  processStyles can rely on obj.style existing */
1041 			if (stylestring)
1042 				processStyles(so, stylestring);
1043 		}
1044 
1045 /* Other attributes that are expected by pages, even if they
1046  * aren't populated at domLink-time */
1047 		if (!tcn)
1048 			tcn = emptyString;
1049 		set_property_string(cf, io, "class", tcn);
1050 		set_property_string(cf, io, "last$class", tcn);
1051 		ato = instantiate(cf, io, "attributes", "NamedNodeMap");
1052 		set_property_object(cf, ato, "owner", io);
1053 		set_property_object(cf, io, "ownerDocument", cf->docobj);
1054 		instantiate(cf, io, "dataset", "Object");
1055 
1056 // only anchors with href go into links[]
1057 		if (list && stringEqual(list, "links") &&
1058 		    !attribPresent(t, "href"))
1059 			list = 0;
1060 
1061 		if (list)
1062 			alist = get_property_object(cf, owner, list);
1063 		if (alist) {
1064 			length = get_arraylength(cf, alist);
1065 			if (length < 0)
1066 				return;
1067 			set_array_element_object(cf, alist, length, io);
1068 			if (symname && !dupname
1069 			    && !has_property(cf, alist, symname))
1070 				set_property_object(cf, alist, symname, io);
1071 #if 0
1072 			if (idname && symname != idname
1073 			    && !has_property(cf, alist, idname))
1074 				set_property_object(cf, alist, idname, io);
1075 #endif
1076 		}		/* list indicated */
1077 	}
1078 
1079 	if (isradio) {
1080 /* drop down to the element within the radio array, and return that element */
1081 /* w becomes the object associated with this radio button */
1082 /* io is, by assumption, an array */
1083 		jsobjtype w;
1084 		length = get_arraylength(cf, io);
1085 		if (length < 0)
1086 			return;
1087 		w = instantiate_array_element(cf, io, length, "Element");
1088 		if (w == NULL)
1089 			return;
1090 		io = w;
1091 	}
1092 
1093 	set_property_string(cf, io, "name", (symname ? symname : emptyString));
1094 	set_property_string(cf, io, "id", (idname ? idname : emptyString));
1095 	set_property_string(cf, io, "last$id", (idname ? idname : emptyString));
1096 
1097 	if (href && href_url)
1098 // This use to be instantiate_url, but with the new side effects
1099 // on Anchor, Image, etc, we can just set the string.
1100 		set_property_string(cf, io, href, href_url);
1101 
1102 	if (t->action == TAGACT_INPUT) {
1103 /* link back to the form that owns the element */
1104 		set_property_object(cf, io, "form", owner);
1105 	}
1106 
1107 	connectTagObject(t, io);
1108 
1109 	strcpy(upname, t->info->name);
1110 	caseShift(upname, 'u');
1111 	set_property_string(cf, io, "nodeName", upname);
1112 	set_property_string(cf, io, "tagName", upname);
1113 	set_property_number(cf, io, "nodeType", 1);
1114 }				/* domLink */
1115 
1116 static const char defvl[] = "defaultValue";
1117 static const char defck[] = "defaultChecked";
1118 static const char defsel[] = "defaultSelected";
1119 
formControlJS(Tag * t)1120 static void formControlJS(Tag *t)
1121 {
1122 	const char *typedesc;
1123 	int itype = t->itype;
1124 	bool isradio = (itype == INP_RADIO);
1125 	bool isselect = (itype == INP_SELECT);
1126 	const char *whichclass = (isselect ? "Select" : "Element");
1127 	const Tag *form = t->controller;
1128 
1129 	if (form && form->jv)
1130 		domLink(t, whichclass, 0, "elements", form->jv, isradio);
1131 	else
1132 		domLink(t, whichclass, 0, 0, cf->docobj, isradio);
1133 	if (!t->jv)
1134 		return;
1135 
1136 	if (itype <= INP_RADIO && !isselect) {
1137 		set_property_string(cf, t->jv, "value", t->value);
1138 		if (itype != INP_FILE) {
1139 /* No default value on file, for security reasons */
1140 			set_property_string(cf, t->jv, defvl, t->value);
1141 		}		/* not file */
1142 	}
1143 
1144 	if (isselect)
1145 		typedesc = t->multiple ? "select-multiple" : "select-one";
1146 	else
1147 		typedesc = inp_types[itype];
1148 	set_property_string(cf, t->jv, "type", typedesc);
1149 
1150 	if (itype >= INP_RADIO) {
1151 		set_property_bool(cf, t->jv, "checked", t->checked);
1152 		set_property_bool(cf, t->jv, defck, t->checked);
1153 	}
1154 }				/* formControlJS */
1155 
optionJS(Tag * t)1156 static void optionJS(Tag *t)
1157 {
1158 	Tag *sel = t->controller;
1159 	const char *tx = t->textval;
1160 	const char *cl = t->jclass;
1161 
1162 	if (!sel)
1163 		return;
1164 
1165 	if (!tx) {
1166 		debugPrint(3, "empty option");
1167 	} else {
1168 		if (!t->value)
1169 			t->value = cloneString(tx);
1170 	}
1171 
1172 /* no point if the controlling select doesn't have a js object */
1173 	if (!sel->jv)
1174 		return;
1175 
1176 	connectTagObject(t, establish_js_option(sel->jv, t->lic));
1177 	set_property_string(cf, t->jv, "text", t->textval);
1178 	set_property_string(cf, t->jv, "value", t->value);
1179 	set_property_string(cf, t->jv, "nodeName", "OPTION");
1180 	set_property_number(cf, t->jv, "nodeType", 1);
1181 	set_property_bool(cf, t->jv, "selected", t->checked);
1182 	set_property_bool(cf, t->jv, defsel, t->checked);
1183 	if (!cl)
1184 		cl = emptyString;
1185 	set_property_string(cf, t->jv, "class", cl);
1186 	set_property_string(cf, t->jv, "last$class", cl);
1187 
1188 	if (t->checked && !sel->multiple)
1189 		set_property_number(cf, sel->jv, "selectedIndex", t->lic);
1190 }				/* optionJS */
1191 
link_css(Tag * t)1192 static void link_css(Tag *t)
1193 {
1194 	struct i_get g;
1195 	char *b;
1196 	int blen;
1197 	const char *a;
1198 	const char *a1 = attribVal(t, "type");
1199 	const char *a2 = attribVal(t, "rel");
1200 	const char *altsource;
1201 
1202 	if (a1)
1203 		set_property_string(cf, t->jv, "type", a1);
1204 	if (a2)
1205 		set_property_string(cf, t->jv, "rel", a2);
1206 	if (!t->href)
1207 		return;
1208 	if ((!a1 || !stringEqualCI(a1, "text/css")) &&
1209 	    (!a2 || !stringEqualCI(a2, "stylesheet")))
1210 		return;
1211 
1212 // Fetch the css file so we can apply its attributes.
1213 	a = NULL;
1214 	altsource = fetchReplace(t->href);
1215 	if (!altsource)
1216 		altsource = t->href;
1217 	if (browseLocal && !isURL(altsource)) {
1218 		debugPrint(3, "css source %s", altsource);
1219 		if (!fileIntoMemory(altsource, &b, &blen)) {
1220 			if (debugLevel >= 1)
1221 				i_printf(MSG_GetLocalCSS);
1222 		} else {
1223 			a = force_utf8(b, blen);
1224 			if (!a)
1225 				a = b;
1226 			else
1227 				nzFree(b);
1228 		}
1229 	} else {
1230 		debugPrint(3, "css source %s", t->href);
1231 		memset(&g, 0, sizeof(g));
1232 		g.thisfile = cf->fileName;
1233 		g.uriEncoded = true;
1234 		g.url = t->href;
1235 		if (httpConnect(&g)) {
1236 			if (g.code == 200) {
1237 				a = force_utf8(g.buffer, g.length);
1238 				if (!a)
1239 					a = g.buffer;
1240 				else
1241 					nzFree(g.buffer);
1242 // acid3 test[0] says we don't process this file if it's content type is
1243 // text/html. Should I test for anything outside of text/css?
1244 // For now I insist it be missing or text/css or text/plain.
1245 // A similar test is performed in css.c after httpConnect.
1246 				if (g.content[0]
1247 				    && !stringEqual(g.content, "text/css")
1248 				    && !stringEqual(g.content, "text/plain")) {
1249 					debugPrint(3,
1250 						   "css suppressed because content type is %s",
1251 						   g.content);
1252 					cnzFree(a);
1253 					a = NULL;
1254 				}
1255 			} else {
1256 				nzFree(g.buffer);
1257 				if (debugLevel >= 3)
1258 					i_printf(MSG_GetCSS, g.url, g.code);
1259 			}
1260 		} else {
1261 			if (debugLevel >= 3)
1262 				i_printf(MSG_GetCSS2);
1263 		}
1264 	}
1265 	if (a) {
1266 		set_property_string(cf, t->jv, "css$data", a);
1267 // indicate we can run the onload function, if there is one
1268 		t->lic = 1;
1269 	}
1270 	cnzFree(a);
1271 }				/* link_css */
1272 
1273 static jsobjtype innerParent;
1274 
jsNode(Tag * t,bool opentag)1275 static void jsNode(Tag *t, bool opentag)
1276 {
1277 	const struct tagInfo *ti = t->info;
1278 	int action = t->action;
1279 	const Tag *above;
1280 	const char *a;
1281 	bool linked_in;
1282 
1283 // run reindex at table close
1284 	if (action == TAGACT_TABLE && !opentag && t->jv)
1285 		run_function_onearg(cf, cf->winobj, "rowReindex", t->jv);
1286 
1287 /* all the js variables are on the open tag */
1288 	if (!opentag)
1289 		return;
1290 	if (t->step >= 2)
1291 		return;
1292 	t->step = 2;
1293 
1294 /*********************************************************************
1295 If js is off, and you don't decorate this tree,
1296 then js is turned on later, and you parse and decorate a frame,
1297 it might also decorate this tree in the wrong context.
1298 Needless to say that's not good!
1299 *********************************************************************/
1300 	if (t->f0 != cf)
1301 		return;
1302 
1303 	debugPrint(6, "decorate %s %d", t->info->name, t->seqno);
1304 	fakePropLast[0] = 0;
1305 
1306 	switch (action) {
1307 
1308 	case TAGACT_TEXT:
1309 		connectTagObject(t,
1310 				 instantiate(cf, cf->docobj, fakePropName(),      "TextNode"));
1311 // nodeName and nodeType set in constructor
1312 		if (t->jv) {
1313 			const char *w = t->textval;
1314 			if (!w)
1315 				w = emptyString;
1316 			set_property_string(cf, t->jv, "data", w);
1317 			w = (t->jclass ? t->jclass : emptyString);
1318 			set_property_string(cf, t->jv, "class", w);
1319 			set_property_string(cf, t->jv, "last$class", w);
1320 		}
1321 		break;
1322 
1323 	case TAGACT_HTML:
1324 		domLink(t, "HTML", 0, 0, cf->docobj, 0);
1325 		cf->htmltag = t;
1326 		break;
1327 
1328 	case TAGACT_META:
1329 		domLink(t, "Meta", 0, "metas", cf->docobj, 0);
1330 		break;
1331 
1332 	case TAGACT_STYLE:
1333 		domLink(t, "CSSStyleDeclaration", 0, "styles", cf->docobj, 0);
1334 		a = attribVal(t, "type");
1335 		if (!a)
1336 			a = emptyString;
1337 		set_property_string(cf, t->jv, "type", a);
1338 		break;
1339 
1340 	case TAGACT_SCRIPT:
1341 		domLink(t, "Script", "src", "scripts", cf->docobj, 0);
1342 		a = attribVal(t, "type");
1343 		if (a)
1344 			set_property_string(cf, t->jv, "type", a);
1345 		a = attribVal(t, "text");
1346 		if (a) {
1347 			set_property_string(cf, t->jv, "text", a);
1348 		} else {
1349 			set_property_string(cf, t->jv, "text", "");
1350 		}
1351 		a = attribVal(t, "src");
1352 		if (a) {
1353 			set_property_string(cf, t->jv, "src", a);
1354 			if (down_jsbg && a[0])	// from another source, let's get it started
1355 				prepareScript(t);
1356 		} else {
1357 			set_property_string(cf, t->jv, "src", "");
1358 		}
1359 		break;
1360 
1361 	case TAGACT_FORM:
1362 		domLink(t, "Form", "action", "forms", cf->docobj, 0);
1363 		break;
1364 
1365 	case TAGACT_INPUT:
1366 		formControlJS(t);
1367 		if (t->itype == INP_TA)
1368 			establish_inner(t->jv, t->value, 0, true);
1369 		break;
1370 
1371 	case TAGACT_OPTION:
1372 		optionJS(t);
1373 // The parent child relationship has already been established,
1374 // don't break, just return;
1375 		return;
1376 
1377 	case TAGACT_A:
1378 		domLink(t, "Anchor", "href", "links", cf->docobj, 0);
1379 		break;
1380 
1381 	case TAGACT_HEAD:
1382 		domLink(t, "Head", 0, "heads", cf->docobj, 0);
1383 		cf->headtag = t;
1384 		break;
1385 
1386 	case TAGACT_BODY:
1387 		domLink(t, "Body", 0, "bodies", cf->docobj, 0);
1388 		cf->bodytag = t;
1389 		break;
1390 
1391 	case TAGACT_OL:
1392 	case TAGACT_UL:
1393 	case TAGACT_DL:
1394 		domLink(t, "Lister", 0, 0, cf->docobj, 0);
1395 		break;
1396 
1397 	case TAGACT_LI:
1398 		domLink(t, "Listitem", 0, 0, cf->docobj, 0);
1399 		break;
1400 
1401 	case TAGACT_TABLE:
1402 		domLink(t, "Table", 0, "tables", cf->docobj, 0);
1403 		break;
1404 
1405 	case TAGACT_TBODY:
1406 		if ((above = t->controller) && above->jv)
1407 			domLink(t, "tBody", 0, "tBodies", above->jv, 0);
1408 		break;
1409 
1410 	case TAGACT_THEAD:
1411 		if ((above = t->controller) && above->jv) {
1412 			domLink(t, "tHead", 0, 0, above->jv, 0);
1413 			set_property_object(cf, above->jv, "tHead", t->jv);
1414 		}
1415 		break;
1416 
1417 	case TAGACT_TFOOT:
1418 		if ((above = t->controller) && above->jv) {
1419 			domLink(t, "tFoot", 0, 0, above->jv, 0);
1420 			set_property_object(cf, above->jv, "tFoot", t->jv);
1421 		}
1422 		break;
1423 
1424 	case TAGACT_TR:
1425 		if ((above = t->controller) && above->jv)
1426 			domLink(t, "tRow", 0, "rows", above->jv, 0);
1427 		break;
1428 
1429 	case TAGACT_TD:
1430 		if ((above = t->controller) && above->jv)
1431 			domLink(t, "Cell", 0, "cells", above->jv, 0);
1432 		break;
1433 
1434 	case TAGACT_DIV:
1435 		domLink(t, "Div", 0, "divs", cf->docobj, 0);
1436 		break;
1437 
1438 	case TAGACT_LABEL:
1439 		domLink(t, "Label", 0, "labels", cf->docobj, 0);
1440 		break;
1441 
1442 	case TAGACT_OBJECT:
1443 		domLink(t, "HtmlObj", 0, "htmlobjs", cf->docobj, 0);
1444 		break;
1445 
1446 	case TAGACT_UNKNOWN:
1447 		domLink(t, "HTMLElement", 0, 0, cf->docobj, 0);
1448 		break;
1449 
1450 	case TAGACT_SPAN:
1451 	case TAGACT_SUB:
1452 	case TAGACT_SUP:
1453 	case TAGACT_OVB:
1454 		domLink(t, "Span", 0, "spans", cf->docobj, 0);
1455 		break;
1456 
1457 	case TAGACT_AREA:
1458 		domLink(t, "Area", "href", "links", cf->docobj, 0);
1459 		break;
1460 
1461 	case TAGACT_FRAME:
1462 // about:blank means a blank frame with no sourcefile.
1463 		if (stringEqual(t->href, "about:blank")) {
1464 			nzFree(t->href);
1465 			t->href = 0;
1466 		}
1467 		domLink(t, "Frame", "src", "frames", cf->winobj, 0);
1468 		break;
1469 
1470 	case TAGACT_IMAGE:
1471 		domLink(t, "Image", "src", "images", cf->docobj, 0);
1472 		break;
1473 
1474 	case TAGACT_P:
1475 		domLink(t, "P", 0, "paragraphs", cf->docobj, 0);
1476 		break;
1477 
1478 	case TAGACT_HEADER:
1479 		domLink(t, "Header", 0, "headers", cf->docobj, 0);
1480 		break;
1481 
1482 	case TAGACT_FOOTER:
1483 		domLink(t, "Footer", 0, "footers", cf->docobj, 0);
1484 		break;
1485 
1486 	case TAGACT_TITLE:
1487 		if (cw->htmltitle)
1488 			set_property_string(cf, cf->docobj, "title", cw->htmltitle);
1489 		domLink(t, "Title", 0, 0, cf->docobj, 0);
1490 		break;
1491 
1492 	case TAGACT_LINK:
1493 		domLink(t, "Link", "href", 0, cf->docobj, 0);
1494 		link_css(t);
1495 		break;
1496 
1497 	case TAGACT_MUSIC:
1498 		domLink(t, "Audio", "src", 0, cf->docobj, 0);
1499 		break;
1500 
1501 	default:
1502 // Don't know what this tag is, or it's not semantically important,
1503 // so just call it an html element.
1504 		domLink(t, "Element", 0, 0, cf->docobj, 0);
1505 		if (t->action == TAGACT_BASE && t->href)
1506 			instantiate_url(cf, t->jv, "href", t->href);
1507 		break;
1508 	}			/* switch */
1509 
1510 	if (!t->jv)
1511 		return;		/* nothing else to do */
1512 
1513 /* js tree mirrors the dom tree. */
1514 	linked_in = false;
1515 
1516 	if (t->parent && t->parent->jv) {
1517 		run_function_onearg(cf, t->parent->jv, "eb$apch1", t->jv);
1518 		linked_in = true;
1519 // special code for frame.contentDocument.
1520 		if (t->parent->action == TAGACT_FRAME) {
1521 			set_property_object(cf, t->parent->jv,
1522 					    "contentDocument", t->jv);
1523 			set_property_object(cf, t->parent->jv,
1524 					    "contentWindow", t->jv);
1525 		}
1526 	}
1527 
1528 	if (action == TAGACT_HTML) {
1529 		run_function_onearg(cf, cf->docobj, "eb$apch1", t->jv);
1530 		linked_in = true;
1531 	}
1532 
1533 	if (!t->parent && innerParent) {
1534 // this is the top of innerHTML or some such.
1535 // It is never html head or body, as those are skipped.
1536 		run_function_onearg(cf, innerParent, "eb$apch1", t->jv);
1537 		linked_in = true;
1538 	}
1539 
1540 	if (linked_in && fakePropLast[0]) {
1541 // Node linked to document/gc to protect if from garbage collection,
1542 // but now it is linked to its parent.
1543 		delete_property(cf, fakePropParent, fakePropLast);
1544 	}
1545 
1546 	if (!linked_in) {
1547 		debugPrint(3, "tag %s not linked in", ti->name);
1548 		if (action == TAGACT_TEXT)
1549 			debugPrint(1, "text %s\n", t->textval);
1550 	}
1551 
1552 /* set innerHTML from the source html, if this tag supports it */
1553 	if (ti->bits & TAG_INNERHTML)
1554 		establish_inner(t->jv, t->innerHTML, 0, false);
1555 
1556 // If the tag has foo=bar as an attribute, pass this forward to javascript.
1557 	pushAttributes(t);
1558 }				/* jsNode */
1559 
pushAttributes(const Tag * t)1560 static void pushAttributes(const Tag *t)
1561 {
1562 	int i;
1563 	const char **a = t->attributes;
1564 	const char **v = t->atvals;
1565 	if (!a)
1566 		return;
1567 	for (i = 0; a[i]; ++i) {
1568 // There are some exceptions, some attributes that we handle individually.
1569 		static const char *const exclist[] = {
1570 			"name", "id", "class",
1571 			"checked", "value", "type",
1572 			"href", "src", "action",
1573 			0
1574 		};
1575 		static const char *const dotrue[] = {
1576 			"required",
1577 			"multiple", "readonly", "disabled", "async", 0
1578 		};
1579 		static const char *const handlers[] = {
1580 			"onload", "onunload", "onclick", "onchange",
1581 			"onsubmit", "onreset",
1582 			0
1583 		};
1584 		const char *u;
1585 
1586 // Should we drop attribute name to lower case? I don't, for now.
1587 		u = v[i];
1588 		if (!u)
1589 			u = emptyString;
1590 
1591 // attributes on HTML tags that begin with "data-" should be available under a
1592 // "dataset" object in JS
1593 		if (strncmp(a[i], "data-", 5) == 0) {
1594 			jsobjtype dso = get_property_object(cf, t->jv, "dataset");
1595 			if (dso) {
1596 // must convert to camelCase
1597 				char *a2 = cloneString(a[i] + 5);
1598 				camelCase(a2);
1599 				set_property_string(cf, dso, a2, u);
1600 				nzFree(a2);
1601 				run_function_onestring(cf, t->jv, "markAttribute",        a[i]);
1602 			}
1603 			continue;
1604 		}
1605 
1606 		if (stringEqual(a[i], "style"))	// no clue
1607 			continue;
1608 
1609 // Maybe they wrote <a firstChild=foo>
1610 // See if the name is in the prototype, and not a handler,
1611 // as handlers have setters.
1612 		if (has_property(cf, t->jv, a[i]) && !typeof_property(cf, t->jv, a[i])
1613 		    && stringInList(handlers, a[i]) < 0) {
1614 			debugPrint(3, "html attribute overload %s.%s",
1615 				   t->info->name, a[i]);
1616 			continue;
1617 		}
1618 // There are some, like multiple or readonly, that should be set to true,
1619 // not the empty string.
1620 		if (!*u && stringInList(dotrue, a[i]) >= 0) {
1621 			set_property_bool(cf, t->jv, a[i], true);
1622 		} else {
1623 // standard attribute here
1624 			if (stringInListCI(exclist, a[i]) < 0)
1625 				set_property_string(cf, t->jv, a[i], u);
1626 		}
1627 		run_function_onestring(cf, t->jv, "markAttribute", a[i]);
1628 	}
1629 }				/* pushAttributes */
1630 
1631 /* decorate the tree of nodes with js objects */
decorate(int start)1632 void decorate(int start)
1633 {
1634 	traverse_callback = jsNode;
1635 	traverseAll(start);
1636 }				/* decorate */
1637 
1638 /* paranoia check on the number of tags */
tagCountCheck(void)1639 static void tagCountCheck(void)
1640 {
1641 	if (sizeof(int) == 4) {
1642 		if (cw->numTags > MAXLINES)
1643 			i_printfExit(MSG_LineLimit);
1644 	}
1645 }				/* tagCountCheck */
1646 
pushTag(Tag * t)1647 static void pushTag(Tag *t)
1648 {
1649 	int a = cw->allocTags;
1650 	if (cw->numTags == a) {
1651 		debugPrint(4, "%d tags, %d dead", a, cw->deadTags);
1652 /* make more room */
1653 		a = a / 2 * 3;
1654 		cw->tags =
1655 		    (Tag **)reallocMem(cw->tags, a * sizeof(t));
1656 		cw->allocTags = a;
1657 	}
1658 	tagList[cw->numTags++] = t;
1659 	tagCountCheck();
1660 }				/* pushTag */
1661 
1662 static void freeTag(Tag *t);
1663 
1664 // garbage collect the dead tags.
1665 // You must rerender after this runs, so that the buffer has no dead tags,
1666 // and the remaining tags have their new numbers embedded in the buffer.
tag_gc(void)1667 void tag_gc(void)
1668 {
1669 	int cx;			/* edbrowse context */
1670 	struct ebWindow *w, *save_cw;
1671 	Tag *t;
1672 	int i, j;
1673 
1674 	for (cx = 1; cx <= maxSession; ++cx) {
1675 		for (w = sessionList[cx].lw; w; w = w->prev) {
1676 			if (!w->tags)
1677 				continue;
1678 // Don't bother unless a third of the tags are dead.
1679 			if (w->deadTags * 3 < w->numTags)
1680 				continue;
1681 
1682 // sync any changed fields before we muck with the tags.
1683 			save_cw = cw;
1684 			cw = w;
1685 			cf = &(cw->f0);
1686 			jSyncup(true);
1687 			cw = save_cw;
1688 			cf = &(cw->f0);
1689 
1690 // ok let's crunch.
1691 			for (i = j = 0; i < w->numTags; ++i) {
1692 				t = w->tags[i];
1693 				if (t->dead) {
1694 					freeTag(t);
1695 				} else {
1696 					t->seqno = j;
1697 					w->tags[j++] = t;
1698 				}
1699 			}
1700 			debugPrint(4, "tag_gc from %d to %d", w->numTags, j);
1701 			w->numTags = j;
1702 			w->deadTags = 0;
1703 
1704 // We must rerender when we return to this window,
1705 // or at the input loop if this is the current window.
1706 // Tags have been renumbered, need to rebuild the text buffer accordingly.
1707 			w->mustrender = true;
1708 			if (w != cw)
1709 				w->nextrender = 0;
1710 		}
1711 	}
1712 }
1713 
1714 /* first three have to be in this order */
1715 const struct tagInfo availableTags[] = {
1716 	{"html", "html", TAGACT_HTML},
1717 	{"base", "base reference for relative URLs", TAGACT_BASE, 0, 4},
1718 	{"unknown0", "an html entity", TAGACT_UNKNOWN, 5, 1},
1719 	{"object", "an html object", TAGACT_OBJECT, 5, 3},
1720 	{"a", "an anchor", TAGACT_A, 0, 1},
1721 	{"htmlanchorelement", "an anchor element", TAGACT_A, 0, 1},
1722 	{"input", "an input item", TAGACT_INPUT, 0, 4},
1723 	{"element", "an input element", TAGACT_INPUT, 0, 4},
1724 	{"title", "the title", TAGACT_TITLE, 0, 0},
1725 	{"textarea", "an input text area", TAGACT_TA, 0, 0},
1726 	{"select", "an option list", TAGACT_SELECT, 0, 0},
1727 	{"option", "a select option", TAGACT_OPTION, 0, 0},
1728 	{"sub", "a subscript", TAGACT_SUB, 0, 0},
1729 	{"sup", "a superscript", TAGACT_SUP, 0, 0},
1730 	{"ovb", "an overbar", TAGACT_OVB, 0, 0},
1731 	{"font", "a font", TAGACT_NOP, 0, 0},
1732 	{"cite", "a citation", TAGACT_NOP, 0, 0},
1733 	{"tt", "teletype", TAGACT_NOP, 0, 0},
1734 	{"center", "centered text", TAGACT_P, 2, 5},
1735 	{"caption", "a caption", TAGACT_NOP, 5, 0},
1736 	{"head", "the html header information", TAGACT_HEAD, 0, 5},
1737 	{"body", "the html body", TAGACT_BODY, 0, 5},
1738 	{"text", "a text section", TAGACT_TEXT, 0, 4},
1739 	{"bgsound", "background music", TAGACT_MUSIC, 0, 4},
1740 	{"audio", "audio passage", TAGACT_MUSIC, 0, 4},
1741 	{"video", "video passage", TAGACT_MUSIC, 0, 4},
1742 	{"meta", "a meta tag", TAGACT_META, 0, 4},
1743 	{"style", "a style tag", TAGACT_STYLE, 0, 2},
1744 	{"link", "a link tag", TAGACT_LINK, 0, 4},
1745 	{"img", "an image", TAGACT_IMAGE, 0, 4},
1746 	{"image", "an image", TAGACT_IMAGE, 0, 4},
1747 	{"br", "a line break", TAGACT_BR, 1, 4},
1748 	{"p", "a paragraph", TAGACT_P, 2, 5},
1749 	{"header", "a header", TAGACT_HEADER, 2, 5},
1750 	{"footer", "a footer", TAGACT_FOOTER, 2, 5},
1751 	{"div", "a divided section", TAGACT_DIV, 5, 1},
1752 	{"map", "a map of images", TAGACT_NOP, 5, 0},
1753 	{"blockquote", "a quoted paragraph", TAGACT_NOP, 10, 1},
1754 	{"document", "a document", TAGACT_DOC, 5, 1},
1755 	{"fragment", "a document fragment", TAGACT_FRAG, 5, 1},
1756 	{"comment", "a comment", TAGACT_COMMENT, 0, 2},
1757 	{"h1", "a level 1 header", TAGACT_H, 10, 1},
1758 	{"h2", "a level 2 header", TAGACT_H, 10, 1},
1759 	{"h3", "a level 3 header", TAGACT_H, 10, 1},
1760 	{"h4", "a level 4 header", TAGACT_H, 10, 1},
1761 	{"h5", "a level 5 header", TAGACT_H, 10, 1},
1762 	{"h6", "a level 6 header", TAGACT_H, 10, 1},
1763 	{"dt", "a term", TAGACT_DT, 2, 4},
1764 	{"dd", "a definition", TAGACT_DD, 1, 4},
1765 	{"li", "a list item", TAGACT_LI, 1, 5},
1766 	{"ul", "a bullet list", TAGACT_UL, 10, 1},
1767 	{"dir", "a directory list", TAGACT_NOP, 5, 0},
1768 	{"menu", "a menu", TAGACT_NOP, 5, 0},
1769 	{"ol", "a numbered list", TAGACT_OL, 10, 1},
1770 	{"dl", "a definition list", TAGACT_DL, 10, 1},
1771 	{"hr", "a horizontal line", TAGACT_HR, 5, 4},
1772 	{"form", "a form", TAGACT_FORM, 10, 1},
1773 	{"button", "a button", TAGACT_INPUT, 0, 1},
1774 	{"frame", "a frame", TAGACT_FRAME, 2, 0},
1775 	{"iframe", "a frame", TAGACT_FRAME, 2, 1},
1776 	{"map", "an image map", TAGACT_MAP, 2, 4},
1777 	{"area", "an image map area", TAGACT_AREA, 0, 4},
1778 	{"table", "a table", TAGACT_TABLE, 10, 1},
1779 	{"tbody", "a table body", TAGACT_TBODY, 0, 1},
1780 	{"thead", "a table body", TAGACT_THEAD, 0, 1},
1781 	{"tfoot", "a table body", TAGACT_TFOOT, 0, 1},
1782 	{"tr", "a table row", TAGACT_TR, 5, 1},
1783 	{"td", "a table entry", TAGACT_TD, 0, 5},
1784 	{"th", "a table heading", TAGACT_TD, 0, 5},
1785 	{"pre", "a preformatted section", TAGACT_PRE, 10, 0},
1786 	{"listing", "a listing", TAGACT_PRE, 1, 0},
1787 	{"xmp", "an example", TAGACT_PRE, 1, 0},
1788 	{"fixed", "a fixed presentation", TAGACT_NOP, 1, 0},
1789 	{"code", "a block of code", TAGACT_NOP, 0, 0},
1790 	{"samp", "a block of sample text", TAGACT_NOP, 0, 0},
1791 	{"address", "an address block", TAGACT_NOP, 1, 0},
1792 	{"style", "a style block", TAGACT_NOP, 0, 2},
1793 	{"script", "a script", TAGACT_SCRIPT, 0, 1},
1794 	{"noscript", "no script section", TAGACT_NOSCRIPT, 0, 2},
1795 	{"noframes", "no frames section", TAGACT_NOP, 0, 2},
1796 	{"embed", "embedded html", TAGACT_MUSIC, 0, 4},
1797 	{"noembed", "no embed section", TAGACT_NOP, 0, 2},
1798 	{"em", "emphasized text", TAGACT_JS, 0, 0},
1799 	{"label", "a label", TAGACT_LABEL, 0, 0},
1800 	{"strike", "emphasized text", TAGACT_JS, 0, 0},
1801 	{"s", "emphasized text", TAGACT_JS, 0, 0},
1802 	{"strong", "emphasized text", TAGACT_JS, 0, 0},
1803 	{"b", "bold text", TAGACT_JS, 0, 0},
1804 	{"i", "italicized text", TAGACT_JS, 0, 0},
1805 	{"u", "underlined text", TAGACT_JS, 0, 0},
1806 	{"var", "variable text", TAGACT_JS, 0, 0},
1807 	{"kbd", "keyboard text", TAGACT_JS, 0, 0},
1808 	{"dfn", "definition text", TAGACT_JS, 0, 0},
1809 	{"q", "quoted text", TAGACT_JS, 0, 0},
1810 	{"abbr", "an abbreviation", TAGACT_JS, 0, 0},
1811 	{"span", "an html span", TAGACT_SPAN, 0, 1},
1812 	{"frameset", "a frame set", TAGACT_JS, 0, 0},
1813 	{"", NULL, 0}
1814 };
1815 
freeTag(Tag * t)1816 static void freeTag(Tag *t)
1817 {
1818 	char **a;
1819 // Even if js has been turned off, if this tag was previously connected to an
1820 // object, we should disconnect it.
1821 	if(t->jslink)
1822 		disconnectTagObject(t);
1823 	nzFree(t->textval);
1824 	nzFree(t->name);
1825 	nzFree(t->id);
1826 	nzFree(t->jclass);
1827 	nzFree(t->nodeName);
1828 	nzFree(t->value);
1829 	cnzFree(t->rvalue);
1830 	nzFree(t->href);
1831 	nzFree(t->js_file);
1832 	nzFree(t->innerHTML);
1833 
1834 	a = (char **)t->attributes;
1835 	if (a) {
1836 		while (*a) {
1837 			nzFree(*a);
1838 			++a;
1839 		}
1840 		free(t->attributes);
1841 	}
1842 
1843 	a = (char **)t->atvals;
1844 	if (a) {
1845 		while (*a) {
1846 			nzFree(*a);
1847 			++a;
1848 		}
1849 		free(t->atvals);
1850 	}
1851 
1852 	free(t);
1853 }
1854 
freeTags(struct ebWindow * w)1855 void freeTags(struct ebWindow *w)
1856 {
1857 	int i, n;
1858 	Tag *t, **e;
1859 
1860 /* if not browsing ... */
1861 	if (!(e = w->tags))
1862 		return;
1863 
1864 /* drop empty textarea buffers created by this session */
1865 	for (t = w->inputlist; t; t = t->same) {
1866 		if (t->action != TAGACT_INPUT)
1867 			continue;
1868 		if (t->itype != INP_TA)
1869 			continue;
1870 		if (!(n = t->lic))
1871 			continue;
1872 		freeEmptySideBuffer(n);
1873 	}			/* loop over tags */
1874 
1875 	for (i = 0; i < w->numTags; ++i, ++e) {
1876 		t = *e;
1877 		disconnectTagObject(t);
1878 		freeTag(t);
1879 	}
1880 
1881 	free(w->tags);
1882 	w->tags = 0;
1883 	w->numTags = w->allocTags = w->deadTags = 0;
1884 	w->inputlist = w->scriptlist = w->optlist = w->linklist = 0;
1885 }				/* freeTags */
1886 
newTag(const Frame * f,const char * name)1887 Tag *newTag(const Frame *f, const char *name)
1888 {
1889 	Tag *t, *t1, *t2 = 0;
1890 	const struct tagInfo *ti;
1891 	static int gsn = 0;
1892 
1893 	for (ti = availableTags; ti->name[0]; ++ti)
1894 		if (stringEqualCI(ti->name, name))
1895 			break;
1896 
1897 	if (!ti->name[0]) {
1898 		debugPrint(4, "warning, created node %s reverts to generic",
1899 			   name);
1900 		ti = availableTags + 2;
1901 	}
1902 
1903 	t = (Tag *)allocZeroMem(sizeof(Tag));
1904 	t->action = ti->action;
1905 	t->f0 = (Frame *) f;		/* set owning frame */
1906 	t->info = ti;
1907 	t->seqno = cw->numTags;
1908 	t->gsn = ++gsn;
1909 	t->nodeName = cloneString(name);
1910 	pushTag(t);
1911 	if (t->action == TAGACT_SCRIPT) {
1912 		for (t1 = cw->scriptlist; t1; t1 = t1->same)
1913 			if (!t1->slash)
1914 				t2 = t1;
1915 		if (t2)
1916 			t2->same = t;
1917 		else
1918 			cw->scriptlist = t;
1919 	}
1920 	if (t->action == TAGACT_LINK) {
1921 		for (t1 = cw->linklist; t1; t1 = t1->same)
1922 			if (!t1->slash)
1923 				t2 = t1;
1924 		if (t2)
1925 			t2->same = t;
1926 		else
1927 			cw->linklist = t;
1928 	}
1929 	if (t->action == TAGACT_INPUT || t->action == TAGACT_SELECT ||
1930 	    t->action == TAGACT_TA) {
1931 		for (t1 = cw->inputlist; t1; t1 = t1->same)
1932 			if (!t1->slash)
1933 				t2 = t1;
1934 		if (t2)
1935 			t2->same = t;
1936 		else
1937 			cw->inputlist = t;
1938 	}
1939 	if (t->action == TAGACT_OPTION) {
1940 		for (t1 = cw->optlist; t1; t1 = t1->same)
1941 			if (!t1->slash)
1942 				t2 = t1;
1943 		if (t2)
1944 			t2->same = t;
1945 		else
1946 			cw->optlist = t;
1947 	}
1948 	return t;
1949 }				/* newTag */
1950 
1951 /* reserve space for 512 tags */
initTagArray(void)1952 void initTagArray(void)
1953 {
1954 	cw->numTags = 0;
1955 	cw->allocTags = 512;
1956 	cw->deadTags = 0;
1957 	cw->tags =
1958 	    (Tag **)allocMem(cw->allocTags *
1959 					sizeof(Tag *));
1960 }				/* initTagArray */
1961 
1962 bool htmlGenerated;
1963 static Tag *treeAttach;
1964 static int tree_pos;
1965 static bool treeDisable;
1966 static void intoTree(Tag *parent);
1967 static const int tdb = 5;	// tree debug level
1968 
htmlNodesIntoTree(int start,Tag * attach)1969 void htmlNodesIntoTree(int start, Tag *attach)
1970 {
1971 	treeAttach = attach;
1972 	tree_pos = start;
1973 	treeDisable = false;
1974 	debugPrint(tdb, "@@tree of nodes");
1975 	intoTree(0);
1976 	debugPrint(tdb, "}\n@@end tree");
1977 }				/* htmlNodesIntoTree */
1978 
1979 /* Convert a list of html nodes, properly nested open close, into a tree.
1980  * Attach the tree to an existing tree here, for document.write etc,
1981  * or just build the tree if attach is null. */
intoTree(Tag * parent)1982 static void intoTree(Tag *parent)
1983 {
1984 	Tag *t, *prev = 0;
1985 	int j;
1986 	const char *v;
1987 	int action;
1988 
1989 	if (!parent)
1990 		debugPrint(tdb, "root {");
1991 	else
1992 		debugPrint(tdb, "%s %d {", parent->info->name, parent->seqno);
1993 
1994 	while (tree_pos < cw->numTags) {
1995 		t = tagList[tree_pos++];
1996 		if (t->slash) {
1997 			if (parent) {
1998 				parent->balance = t, t->balance = parent;
1999 				t->dead = parent->dead;
2000 				if (t->dead)
2001 					++cw->deadTags;
2002 			}
2003 			debugPrint(tdb, "}");
2004 			return;
2005 		}
2006 
2007 		if (treeDisable) {
2008 			debugPrint(tdb, "node skip %s", t->info->name);
2009 			t->dead = true;
2010 			++cw->deadTags;
2011 			intoTree(t);
2012 			continue;
2013 		}
2014 
2015 		if (htmlGenerated) {
2016 /*Some things are different if the html is generated, not part of the original web page.
2017  * You can skip past <head> altogether, including its
2018  * tidy generated descendants, and you want to pass through <body>
2019  * to the children below. */
2020 			action = t->action;
2021 			if (action == TAGACT_HEAD) {
2022 				debugPrint(tdb, "node skip %s", t->info->name);
2023 				t->dead = true;
2024 				++cw->deadTags;
2025 				treeDisable = true;
2026 				intoTree(t);
2027 				treeDisable = false;
2028 				continue;
2029 			}
2030 			if (action == TAGACT_HTML || action == TAGACT_BODY) {
2031 				debugPrint(tdb, "node pass %s", t->info->name);
2032 				t->dead = true;
2033 				++cw->deadTags;
2034 				intoTree(t);
2035 				continue;
2036 			}
2037 
2038 /* this node is ok, but if parent is a pass through node... */
2039 			if (parent == 0 ||	/* this shouldn't happen */
2040 			    parent->action == TAGACT_BODY) {
2041 /* link up to treeAttach */
2042 				const char *w = "root";
2043 				if (treeAttach)
2044 					w = treeAttach->info->name;
2045 				debugPrint(tdb, "node up %s to %s",
2046 					   t->info->name, w);
2047 				t->parent = treeAttach;
2048 				if (treeAttach) {
2049 					Tag *c =
2050 					    treeAttach->firstchild;
2051 					if (!c)
2052 						treeAttach->firstchild = t;
2053 					else {
2054 						while (c->sibling)
2055 							c = c->sibling;
2056 						c->sibling = t;
2057 					}
2058 				}
2059 				goto checkattributes;
2060 			}
2061 		}
2062 
2063 /* regular linking through the parent node */
2064 /* Could be treeAttach if this is a frame inside a window */
2065 		t->parent = (parent ? parent : treeAttach);
2066 		if (prev) {
2067 			prev->sibling = t;
2068 		} else if (parent) {
2069 			parent->firstchild = t;
2070 		} else if (treeAttach) {
2071 			treeAttach->firstchild = t;
2072 		}
2073 		prev = t;
2074 
2075 checkattributes:
2076 /* check for some common attributes here */
2077 		action = t->action;
2078 		if (stringInListCI(t->attributes, "onclick") >= 0)
2079 			t->onclick = t->doorway = true;
2080 		if (stringInListCI(t->attributes, "onchange") >= 0)
2081 			t->onchange = t->doorway = true;
2082 		if (stringInListCI(t->attributes, "onsubmit") >= 0)
2083 			t->onsubmit = t->doorway = true;
2084 		if (stringInListCI(t->attributes, "onreset") >= 0)
2085 			t->onreset = t->doorway = true;
2086 		if (stringInListCI(t->attributes, "onload") >= 0)
2087 			t->onload = t->doorway = true;
2088 		if (stringInListCI(t->attributes, "onunload") >= 0)
2089 			t->onunload = t->doorway = true;
2090 		if (stringInListCI(t->attributes, "checked") >= 0)
2091 			t->checked = t->rchecked = true;
2092 		if (stringInListCI(t->attributes, "readonly") >= 0)
2093 			t->rdonly = true;
2094 		if (stringInListCI(t->attributes, "disabled") >= 0)
2095 			t->disabled = true;
2096 		if (stringInListCI(t->attributes, "multiple") >= 0)
2097 			t->multiple = true;
2098 		if (stringInListCI(t->attributes, "async") >= 0)
2099 			t->async = true;
2100 		if ((j = stringInListCI(t->attributes, "name")) >= 0) {
2101 /* temporarily, make another copy; some day we'll just point to the value */
2102 			v = t->atvals[j];
2103 			if (v && !*v)
2104 				v = 0;
2105 			t->name = cloneString(v);
2106 		}
2107 		if ((j = stringInListCI(t->attributes, "id")) >= 0) {
2108 			v = t->atvals[j];
2109 			if (v && !*v)
2110 				v = 0;
2111 			t->id = cloneString(v);
2112 		}
2113 		if ((j = stringInListCI(t->attributes, "class")) >= 0) {
2114 			v = t->atvals[j];
2115 			if (v && !*v)
2116 				v = 0;
2117 			t->jclass = cloneString(v);
2118 		}
2119 		if ((j = stringInListCI(t->attributes, "value")) >= 0) {
2120 			v = t->atvals[j];
2121 			if (v && !*v)
2122 				v = 0;
2123 			t->value = cloneString(v);
2124 			t->rvalue = cloneString(v);
2125 		}
2126 // Resolve href against the base, but wait a minute, what if it's <p href=blah>
2127 // and we're not suppose to resolve it? I don't ask about the parent node.
2128 // Well, in general, I don't carry the href attribute into the js node.
2129 // I only do it when it is relevant, such as <a> or <area>.
2130 // See the exceptions in pushAttributes() in this file.
2131 // I know, it's confusing.
2132 		if ((j = stringInListCI(t->attributes, "href")) >= 0) {
2133 			v = t->atvals[j];
2134 			if (v && !*v)
2135 				v = 0;
2136 			if (v) {
2137 				v = resolveURL(cf->hbase, v);
2138 				cnzFree(t->atvals[j]);
2139 				t->atvals[j] = v;
2140 				if (action == TAGACT_BASE && !cf->baseset) {
2141 					nzFree(cf->hbase);
2142 					cf->hbase = cloneString(v);
2143 					cf->baseset = true;
2144 				}
2145 				t->href = cloneString(v);
2146 			}
2147 		}
2148 		if ((j = stringInListCI(t->attributes, "src")) >= 0) {
2149 			v = t->atvals[j];
2150 			if (v && !*v)
2151 				v = 0;
2152 			if (v) {
2153 				v = resolveURL(cf->hbase, v);
2154 				cnzFree(t->atvals[j]);
2155 				t->atvals[j] = v;
2156 				if (!t->href)
2157 					t->href = cloneString(v);
2158 			}
2159 		}
2160 		if ((j = stringInListCI(t->attributes, "action")) >= 0) {
2161 			v = t->atvals[j];
2162 			if (v && !*v)
2163 				v = 0;
2164 			if (v) {
2165 				v = resolveURL(cf->hbase, v);
2166 				cnzFree(t->atvals[j]);
2167 				t->atvals[j] = v;
2168 				if (!t->href)
2169 					t->href = cloneString(v);
2170 			}
2171 		}
2172 
2173 /* href=javascript:foo() is another doorway into js */
2174 		if (t->href && memEqualCI(t->href, "javascript:", 11))
2175 			t->doorway = true;
2176 /* And of course the primary doorway */
2177 		if (action == TAGACT_SCRIPT) {
2178 			t->doorway = true;
2179 			t->scriptgen = htmlGenerated;
2180 		}
2181 
2182 		intoTree(t);
2183 	}
2184 }				/* intoTree */
2185 
underKill(Tag * t)2186 void underKill(Tag *t)
2187 {
2188 	Tag *u, *v;
2189 	for (u = t->firstchild; u; u = v) {
2190 		v = u->sibling;
2191 		u->sibling = u->parent = 0;
2192 		u->deleted = true;
2193 		if (!u->jv)
2194 			killTag(u);
2195 	}
2196 	t->firstchild = NULL;
2197 }
2198 
killTag(Tag * t)2199 void killTag(Tag *t)
2200 {
2201 	Tag *c, *parent;
2202 	debugPrint(4, "kill tag %s %d", t->info->name, t->seqno);
2203 	t->dead = true;
2204 	++cw->deadTags;
2205 	if (t->balance) {
2206 		t->balance->dead = true;
2207 		++cw->deadTags;
2208 	}
2209 	t->deleted = true;
2210 	t->jv = NULL;
2211 
2212 // unlink it from the tree above.
2213 	parent = t->parent;
2214 	if (parent) {
2215 		t->parent = NULL;
2216 		if (parent->firstchild == t)
2217 			parent->firstchild = t->sibling;
2218 		else {
2219 			c = parent->firstchild;
2220 			if (c) {
2221 				for (; c->sibling; c = c->sibling) {
2222 					if (c->sibling != t)
2223 						continue;
2224 					c->sibling = t->sibling;
2225 					break;
2226 				}
2227 			}
2228 		}
2229 	}
2230 
2231 	underKill(t);
2232 }
2233 
2234 /* Parse some html, as generated by innerHTML or document.write. */
html_from_setter(jsobjtype inner,const char * h)2235 void html_from_setter(jsobjtype inner, const char *h)
2236 {
2237 	Tag *t = NULL;
2238 	int l = 0;
2239 	debugPrint(4, "Generated {%s}", h);
2240 	t = tagFromJavaVar(inner);
2241 	if (!t) {
2242 		debugPrint(1,
2243 			   "innerHTML finds no tag for %p, cannot parse",
2244 			   inner);
2245 		return;
2246 	}
2247 	debugPrint(4, "parse under %s %d", t->info->name, t->seqno);
2248 	l = cw->numTags;
2249 
2250 /* Cut all the children away from t */
2251 	underKill(t);
2252 
2253 	html2nodes(h, false);
2254 	htmlGenerated = true;
2255 	htmlNodesIntoTree(l, t);
2256 	prerender(0);
2257 	innerParent = inner;
2258 	decorate(0);
2259 	innerParent = 0;
2260 }				/* html_from_setter */
2261 
processStyles(jsobjtype so,const char * stylestring)2262 static void processStyles(jsobjtype so, const char *stylestring)
2263 {
2264 	char *workstring = cloneString(stylestring);
2265 	char *s;		// gets truncated to the style name
2266 	char *sv;
2267 	char *next;
2268 	for (s = workstring; *s; s = next) {
2269 		next = strchr(s, ';');
2270 		if (!next) {
2271 			next = s + strlen(s);
2272 		} else {
2273 			*next++ = 0;
2274 			skipWhite2(&next);
2275 		}
2276 		sv = strchr(s, ':');
2277 		// if there was something there, but it didn't
2278 		// adhere to the expected syntax, skip this pair
2279 		if (sv) {
2280 			*sv++ = '\0';
2281 			skipWhite2(&sv);
2282 			trimWhite(s);
2283 			trimWhite(sv);
2284 // the property name has to be nonempty
2285 			if (*s) {
2286 				camelCase(s);
2287 				set_property_string(cf, so, s, sv);
2288 // Should we set a specification level here, perhaps high,
2289 // so the css sheets don't overwrite it?
2290 // sv + "$$scy" = 99999;
2291 			}
2292 		}
2293 	}
2294 	nzFree(workstring);
2295 }				/* processStyles */
2296