1 /*********************************************************************
2 decorate.c:
3 sanitize a tree of nodes produced by html,
4 and decorate the tree with the corresponding js objects.
5 A <form> tag has a corresponding Form object in the js world, etc.
6 This is done for the html that is on the initial web page,
7 and any html that is produced by javascript via
8 foo.innerHTML = string or document.write(string).
9 *********************************************************************/
10
11 #include "eb.h"
12
13 /* The current (foreground) edbrowse window and frame.
14 * These are replaced with stubs when run within the javascript process. */
15 struct ebWindow *cw;
16 Frame *cf;
17 int gfsn;
18
19 /* traverse the tree of nodes with a callback function */
20 nodeFunction traverse_callback;
21
22 /* possible callback functions in this file */
23 static void prerenderNode(Tag *node, bool opentag);
24 static void jsNode(Tag *node, bool opentag);
25 static void pushAttributes(const Tag *t);
26
27 static void processStyles(jsobjtype so, const char *stylestring);
28
29 static bool treeOverflow;
30
traverseNode(Tag * node)31 static void traverseNode(Tag *node)
32 {
33 Tag *child;
34
35 if (node->visited) {
36 treeOverflow = true;
37 debugPrint(4, "node revisit %s %d", node->info->name,
38 node->seqno);
39 return;
40 }
41 node->visited = true;
42
43 (*traverse_callback) (node, true);
44 for (child = node->firstchild; child; child = child->sibling)
45 traverseNode(child);
46 (*traverse_callback) (node, false);
47 } /* traverseNode */
48
traverseAll(int start)49 void traverseAll(int start)
50 {
51 Tag *t;
52 int i;
53
54 treeOverflow = false;
55 for (i = start; i < cw->numTags; ++i) {
56 t = tagList[i];
57 t->visited = false;
58 }
59
60 for (i = start; i < cw->numTags; ++i) {
61 t = tagList[i];
62 if (!t->parent && !t->slash && !t->dead)
63 traverseNode(t);
64 }
65
66 if (treeOverflow)
67 debugPrint(3, "malformed tree!");
68 } /* traverseAll */
69
70 static int nopt; /* number of options */
71 /* None of these tags nest, so it is reasonable to talk about
72 * the current open tag. */
73 static Tag *currentForm, *currentSel, *currentOpt, *currentStyle;
74 static Tag *currentTitle, *currentScript, *currentTA;
75 static Tag *currentA;
76 static char *radioCheck;
77 static int radio_l;
78
attribVal(const Tag * t,const char * name)79 const char *attribVal(const Tag *t, const char *name)
80 {
81 const char *v;
82 int j;
83 if (!t->attributes)
84 return 0;
85 j = stringInListCI(t->attributes, name);
86 if (j < 0)
87 return 0;
88 v = t->atvals[j];
89 return v;
90 } /* attribVal */
91
attribPresent(const Tag * t,const char * name)92 static bool attribPresent(const Tag *t, const char *name)
93 {
94 int j = stringInListCI(t->attributes, name);
95 return (j >= 0);
96 } /* attribPresent */
97
linkinTree(Tag * parent,Tag * child)98 static void linkinTree(Tag *parent, Tag *child)
99 {
100 Tag *c, *d;
101 child->parent = parent;
102
103 if (!parent->firstchild) {
104 parent->firstchild = child;
105 return;
106 }
107
108 for (c = parent->firstchild; c; c = c->sibling) {
109 d = c;
110 }
111 d->sibling = child;
112 } /* linkinTree */
113
makeButton(void)114 static void makeButton(void)
115 {
116 Tag *t = newTag(cf, "input");
117 t->controller = currentForm;
118 t->itype = INP_SUBMIT;
119 t->value = emptyString;
120 t->step = 1;
121 linkinTree(currentForm, t);
122 } /* makeButton */
123
findOpenTag(Tag * t,int action)124 Tag *findOpenTag(Tag *t, int action)
125 {
126 int count = 0;
127 while ((t = t->parent)) {
128 if (t->action == action)
129 return t;
130 if (++count == 10000) { // tree shouldn't be this deep
131 debugPrint(1, "infinite loop in findOpenTag()");
132 break;
133 }
134 }
135 return 0;
136 } /* findOpenTag */
137
findOpenSection(Tag * t)138 static Tag *findOpenSection(Tag *t)
139 {
140 int count = 0;
141 while ((t = t->parent)) {
142 if (t->action == TAGACT_TBODY || t->action == TAGACT_THEAD ||
143 t->action == TAGACT_TFOOT)
144 return t;
145 if (++count == 10000) { // tree shouldn't be this deep
146 debugPrint(1, "infinite loop in findOpenTag()");
147 break;
148 }
149 }
150 return 0;
151 } /* findOpenSection */
152
findOpenList(Tag * t)153 Tag *findOpenList(Tag *t)
154 {
155 while ((t = t->parent))
156 if (t->action == TAGACT_OL || t->action == TAGACT_UL)
157 return t;
158 return 0;
159 } /* findOpenList */
160
161 /*********************************************************************
162 tidy workaround functions.
163 Consider html like this.
164 <body>
165 <A href=http://www.edbrowse.org>Link1
166 <A href=http://www.edbrowse.org>Link2
167 <A href=http://www.edbrowse.org>Link3
168 </body>
169 Each anchor should close the one before, thus rendering as
170 {Link1} {Link2} {Link3}
171 But tidy does not do this; it allows anchors to nest, thus
172 {Link1{Link2{Link3}}}
173 Not a serious problem really, it just looks funny.
174 And yes, html like this does appear in the wild.
175 This routine restructures the tree to move the inner anchor
176 back up to the same level as the outer anchor.
177 *********************************************************************/
178
nestedAnchors(int start)179 static void nestedAnchors(int start)
180 {
181 Tag *a1, *a2, *p, *c;
182 int j;
183
184 for (j = start; j < cw->numTags; ++j) {
185 a2 = tagList[j];
186 if (a2->action != TAGACT_A)
187 continue;
188 a1 = findOpenTag(a2, TAGACT_A);
189 if (!a1)
190 continue;
191
192 /* delete a2 from the tree */
193 p = a2->parent;
194 a2->parent = 0;
195 if (p->firstchild == a2)
196 p->firstchild = a2->sibling;
197 else {
198 c = p->firstchild;
199 while (c->sibling) {
200 if (c->sibling == a2) {
201 c->sibling = a2->sibling;
202 break;
203 }
204 c = c->sibling;
205 }
206 }
207 a2->sibling = 0;
208
209 /* then link a2 up next to a1 */
210 a2->parent = a1->parent;
211 a2->sibling = a1->sibling;
212 a1->sibling = a2;
213 }
214 } /* nestedAnchors */
215
216 /*********************************************************************
217 Tables are suppose to have bodies, I guess.
218 So <table><tr> becomes <table><tbody><tr>
219 Find each table and look at its children.
220 Note the tags between sections, where section is tHead, tBody, or tFoot.
221 If that span includes <tr>, then put those tags under a new tBody.
222 *********************************************************************/
223
224 static void insert_tbody1(Tag *s1, Tag *s2,
225 Tag *tbl);
226 static bool tagBelow(Tag *t, int action);
227
insert_tbody(int start)228 static void insert_tbody(int start)
229 {
230 int i, end = cw->numTags;
231 Tag *tbl, *s1, *s2;
232
233 for (i = start; i < end; ++i) {
234 tbl = tagList[i];
235 if (tbl->action != TAGACT_TABLE)
236 continue;
237 s1 = 0;
238 do {
239 s2 = (s1 ? s1->sibling : tbl->firstchild);
240 while (s2 && s2->action != TAGACT_TBODY
241 && s2->action != TAGACT_THEAD
242 && s2->action != TAGACT_TFOOT)
243 s2 = s2->sibling;
244 insert_tbody1(s1, s2, tbl);
245 s1 = s2;
246 } while (s1);
247 }
248 }
249
insert_tbody1(Tag * s1,Tag * s2,Tag * tbl)250 static void insert_tbody1(Tag *s1, Tag *s2,
251 Tag *tbl)
252 {
253 Tag *s1a = (s1 ? s1->sibling : tbl->firstchild);
254 Tag *u, *uprev, *ns; // new section
255
256 if (s1a == s2) // nothing between
257 return;
258
259 // Look for the direct html <table><tr><th>.
260 // If th is anywhere else down the path, we won't find it.
261 if (!s1 && s1a->action == TAGACT_TR &&
262 (u = s1a->firstchild) && stringEqual(u->info->name, "th")) {
263 ns = newTag(cf, "thead");
264 tbl->firstchild = ns;
265 ns->parent = tbl;
266 ns->firstchild = s1a;
267 s1a->parent = ns;
268 ns->sibling = s1a->sibling;
269 s1a->sibling = 0;
270 s1 = ns;
271 s1a = s1->sibling;
272 }
273
274 for (u = s1a; u != s2; u = u->sibling)
275 if (tagBelow(u, TAGACT_TR))
276 break;
277 if (u == s2) // no rows below
278 return;
279
280 ns = newTag(cf, "tbody");
281 for (u = s1a; u != s2; u = u->sibling)
282 uprev = u, u->parent = ns;
283 if (s1)
284 s1->sibling = ns;
285 else
286 tbl->firstchild = ns;
287 if (s2)
288 uprev->sibling = 0, ns->sibling = s2;
289 ns->firstchild = s1a;
290 ns->parent = tbl;
291 }
292
293 /*********************************************************************
294 Bad html will derail tidy, so that <a><div>stuff</div></a>
295 will push div outside the anchor, to render as {} stuff
296 m.facebook.com is loaded with them.
297 Here is a tiny example.
298
299 <body>
300 <input type=button name=whatever value=hohaa>
301 <a href="#bottom"><div>Cognitive business is here</div></a>
302 </body>
303
304 This routine puts it back.
305 An anchor with no children followd by div
306 moves div under the anchor.
307 For a while I had this function commented out, like it caused a problem,
308 but I can't see why or how, so it's back, and facebook looks better.
309
310 As an after kludge, don't move <div> under <a> if <div> has an anchor beneath it.
311 That could create nested anchors, which we already worked hard to get rid of. Eeeeeeesh.
312
313 This and other tidy workaround functions are based on heuristics,
314 and suffer from false positives and false negatives,
315 the former being the more serious problem -
316 i.e. we rearrange the tree when we shouldn't.
317 Even when we do the right thing, there is another problem,
318 innerHTML is wrong, and doesn't match the tree of nodes
319 or the original source.
320 innerHTML comes to us from tidy, after it has fixed (sometimes broken) things.
321 Add <script> to the above, browse, jdb, and look at document.body.innerHTML.
322 It does not match the source, in fact it represents the tree *before* we fixed it.
323 There really isn't anything I can do about that.
324 In so many ways, the better approach is to fix tidy, but sometimes that is out of our hands.
325 *********************************************************************/
326
tagBelow(Tag * t,int action)327 static bool tagBelow(Tag *t, int action)
328 {
329 Tag *c;
330
331 if (t->action == action)
332 return true;
333 for (c = t->firstchild; c; c = c->sibling)
334 if (tagBelow(c, action))
335 return true;
336 return false;
337 } /* tagBelow */
338
emptyAnchors(int start)339 static void emptyAnchors(int start)
340 {
341 int j;
342 Tag *a0, *div, *up;
343
344 for (j = start; j < cw->numTags; ++j) {
345 a0 = tagList[j];
346 if (a0->action != TAGACT_A || a0->firstchild)
347 continue;
348 // anchor no children
349 for (up = a0; up; up = up->parent)
350 if (up->sibling)
351 break;
352 if (!up || !(div = up->sibling) || div->action != TAGACT_DIV)
353 continue;
354 // div follows
355 /* would moving this create nested anchors? */
356 if (tagBelow(div, TAGACT_A))
357 continue;
358 /* shouldn't have inputs or forms in an anchor. */
359 if (tagBelow(div, TAGACT_INPUT))
360 continue;
361 if (tagBelow(div, TAGACT_FORM))
362 continue;
363 up->sibling = div->sibling;
364 a0->firstchild = div;
365 div->parent = a0;
366 div->sibling = 0;
367 }
368 } /* emptyAnchors */
369
370 /*********************************************************************
371 If a form is in a table, but not in tr or td, it closes immediately,
372 and all the following inputs are orphaned.
373 Check for an empty form beneath table, and move all the following siblings
374 down into the form.
375 *********************************************************************/
376
tableForm(int start)377 static void tableForm(int start)
378 {
379 int j;
380 Tag *form, *table, *t;
381
382 for (j = start; j < cw->numTags; ++j) {
383 form = tagList[j];
384 if (form->action != TAGACT_FORM || form->firstchild)
385 continue;
386 t = form;
387 for (table = form->sibling; table; table = table->sibling) {
388 if (table->action == TAGACT_TABLE &&
389 tagBelow(table, TAGACT_INPUT)) {
390 /* table with inputs below; move it to form */
391 /* hope this doesn't break anything */
392 table->parent = form;
393 form->firstchild = table;
394 t->sibling = table->sibling;
395 table->sibling = 0;
396 break;
397 }
398 t = table;
399 }
400 }
401 } /* tableForm */
402
formControl(Tag * t,bool namecheck)403 void formControl(Tag *t, bool namecheck)
404 {
405 int itype = t->itype;
406 char *myname = (t->name ? t->name : t->id);
407 Tag *cform = currentForm;
408 if (!cform) {
409 /* nodes could be created dynamically, not through html */
410 cform = findOpenTag(t, TAGACT_FORM);
411 }
412 if (cform)
413 t->controller = cform;
414 else if (itype != INP_BUTTON && itype != INP_SUBMIT && !htmlGenerated)
415 debugPrint(3, "%s is not part of a fill-out form",
416 t->info->desc);
417 if (namecheck && !myname && !htmlGenerated)
418 debugPrint(3, "%s does not have a name", t->info->desc);
419 } /* formControl */
420
421 const char *const inp_types[] = {
422 "reset", "button", "image", "submit",
423 "hidden", "text", "file",
424 "select", "textarea", "radio", "checkbox",
425 0
426 };
427
428 /*********************************************************************
429 Here are some other input types that should have additional syntax checks
430 performed on them, but as far as this version of edbrowse is concerned,
431 they are equivalent to text. Just here to suppress warnings.
432 List taken from https://www.tutorialspoint.com/html/html_input_tag.htm
433 *********************************************************************/
434
435 const char *const inp_others[] = {
436 "no_minor", "date", "datetime", "datetime-local",
437 "month", "week", "time", "email", "range",
438 "search", "tel", "url", "number", "password",
439 0
440 };
441
442 /* helper function for input tag */
htmlInputHelper(Tag * t)443 void htmlInputHelper(Tag *t)
444 {
445 int n = INP_TEXT;
446 int len;
447 char *myname = (t->name ? t->name : t->id);
448 const char *s = attribVal(t, "type");
449 bool isbutton = stringEqual(t->info->name, "button");
450
451 t->itype = (isbutton ? INP_BUTTON : INP_TEXT);
452 if (s && *s) {
453 n = stringInListCI(inp_types, s);
454 if (n < 0) {
455 n = stringInListCI(inp_others, s);
456 if (n < 0)
457 debugPrint(3, "unrecognized input type %s", s);
458 else
459 t->itype = INP_TEXT, t->itype_minor = n;
460 if (n == INP_PW)
461 t->masked = true;
462 } else
463 t->itype = n;
464 }
465 // button no type means submit
466 if (!s && isbutton)
467 t->itype = INP_SUBMIT;
468
469 s = attribVal(t, "maxlength");
470 len = 0;
471 if (s)
472 len = stringIsNum(s);
473 if (len > 0)
474 t->lic = len;
475
476 // No preset value on file, for security reasons.
477 // <input type=file value=/etc/passwd> then submit via onload().
478 if (n == INP_FILE) {
479 nzFree(t->value);
480 t->value = 0;
481 cnzFree(t->rvalue);
482 t->rvalue = 0;
483 }
484
485 /* In this case an empty value should be "", not null */
486 if (t->value == 0)
487 t->value = emptyString;
488 if (t->rvalue == 0)
489 t->rvalue = cloneString(t->value);
490
491 if (n == INP_RADIO && t->checked && radioCheck && myname) {
492 char namebuf[200];
493 if (strlen(myname) < sizeof(namebuf) - 3) {
494 if (!*radioCheck)
495 stringAndChar(&radioCheck, &radio_l, '|');
496 sprintf(namebuf, "|%s|", t->name);
497 if (strstr(radioCheck, namebuf)) {
498 debugPrint(3,
499 "multiple radio buttons have been selected");
500 return;
501 }
502 stringAndString(&radioCheck, &radio_l, namebuf + 1);
503 }
504 }
505
506 /* Even the submit fields can have a name, but they don't have to */
507 formControl(t, (n > INP_SUBMIT));
508 } /* htmlInputHelper */
509
510 /* return an allocated string containing the text entries for the checked options */
displayOptions(const Tag * sel)511 char *displayOptions(const Tag *sel)
512 {
513 const Tag *t;
514 char *opt;
515 int opt_l;
516
517 opt = initString(&opt_l);
518 for (t = cw->optlist; t; t = t->same) {
519 if (t->controller != sel)
520 continue;
521 if (!t->checked)
522 continue;
523 if (*opt)
524 stringAndChar(&opt, &opt_l, ',');
525 stringAndString(&opt, &opt_l, t->textval);
526 }
527
528 return opt;
529 } /* displayOptions */
530
prerenderNode(Tag * t,bool opentag)531 static void prerenderNode(Tag *t, bool opentag)
532 {
533 int itype; /* input type */
534 int j;
535 int action = t->action;
536 const char *a; /* usually an attribute */
537 Tag *cdt;
538
539 debugPrint(6, "prend %c%s %d%s",
540 (opentag ? ' ' : '/'), t->info->name,
541 t->seqno, (t->step >= 1 ? "-" : ""));
542
543 if (t->step >= 1)
544 return;
545 if (!opentag)
546 t->step = 1;
547
548 switch (action) {
549 case TAGACT_NOSCRIPT:
550 // If javascript is enabled kill everything under noscript
551 if (isJSAlive && !opentag)
552 underKill(t);
553 break;
554
555 case TAGACT_TEXT:
556 if (!opentag || !t->textval)
557 break;
558
559 if (currentTitle) {
560 if (!cw->htmltitle) {
561 cw->htmltitle = cloneString(t->textval);
562 spaceCrunch(cw->htmltitle, true, false);
563 }
564 t->deleted = true;
565 break;
566 }
567
568 if (currentOpt) {
569 currentOpt->textval = cloneString(t->textval);
570 spaceCrunch(currentOpt->textval, true, false);
571 t->deleted = true;
572 break;
573 }
574
575 if (currentStyle) {
576 t->deleted = true;
577 break;
578 }
579
580 if (currentScript) {
581 currentScript->textval = cloneString(t->textval);
582 t->deleted = true;
583 break;
584 }
585
586 if (currentTA) {
587 currentTA->value = cloneString(t->textval);
588 /* Sometimes tidy lops off the last newline character; it depends on
589 * the tag following. And even if it didn't end in nl in the original html,
590 * <textarea>foobar</textarea>, it probably should,
591 * as it goes into a new buffer. */
592 j = strlen(currentTA->value);
593 if (j && currentTA->value[j - 1] != '\n') {
594 currentTA->value =
595 reallocMem(currentTA->value, j + 2);
596 currentTA->value[j] = '\n';
597 currentTA->value[j + 1] = 0;
598 }
599 // Don't need leading whitespace.
600 leftClipString(currentTA->value);
601 currentTA->rvalue = cloneString(currentTA->value);
602 t->deleted = true;
603 break;
604 }
605
606 /* text is on the page */
607 if (currentA) {
608 char *s;
609 for (s = t->textval; *s; ++s)
610 if (isalnumByte(*s)) {
611 currentA->textin = true;
612 break;
613 }
614 }
615 break;
616
617 case TAGACT_TITLE:
618 currentTitle = (opentag ? t : 0);
619 break;
620
621 case TAGACT_SCRIPT:
622 currentScript = (opentag ? t : 0);
623 break;
624
625 case TAGACT_A:
626 currentA = (opentag ? t : 0);
627 break;
628
629 case TAGACT_FORM:
630 if (opentag) {
631 currentForm = t;
632 a = attribVal(t, "method");
633 if (a) {
634 if (stringEqualCI(a, "post"))
635 t->post = true;
636 else if (!stringEqualCI(a, "get"))
637 debugPrint(3,
638 "form method should be get or post");
639 }
640 a = attribVal(t, "enctype");
641 if (a) {
642 if (stringEqualCI(a, "multipart/form-data"))
643 t->mime = true;
644 else if (!stringEqualCI
645 (a,
646 "application/x-www-form-urlencoded"))
647 debugPrint(3,
648 "unrecognized enctype, plese use multipart/form-data or application/x-www-form-urlencoded");
649 }
650 if ((a = t->href)) {
651 const char *prot = getProtURL(a);
652 if (prot) {
653 if (stringEqualCI(prot, "mailto"))
654 t->bymail = true;
655 else if (stringEqualCI
656 (prot, "javascript"))
657 t->javapost = true;
658 else if (stringEqualCI(prot, "https"))
659 t->secure = true;
660 else if (!stringEqualCI(prot, "http") &&
661 !stringEqualCI(prot, "gopher"))
662 debugPrint(3,
663 "form cannot submit using protocol %s",
664 prot);
665 }
666 }
667
668 nzFree(radioCheck);
669 radioCheck = initString(&radio_l);
670 }
671 if (!opentag && currentForm) {
672 if (t->ninp && !t->submitted) {
673 makeButton();
674 t->submitted = true;
675 }
676 currentForm = 0;
677 }
678 break;
679
680 case TAGACT_INPUT:
681 if (!opentag)
682 break;
683 htmlInputHelper(t);
684 itype = t->itype;
685 if (itype == INP_HIDDEN)
686 break;
687 if (currentForm) {
688 ++currentForm->ninp;
689 if (itype == INP_SUBMIT || itype == INP_IMAGE)
690 currentForm->submitted = true;
691 if (itype == INP_BUTTON && t->onclick)
692 currentForm->submitted = true;
693 if (itype > INP_HIDDEN && itype <= INP_SELECT
694 && t->onchange)
695 currentForm->submitted = true;
696 }
697 break;
698
699 case TAGACT_OPTION:
700 if (!opentag) {
701 currentOpt = 0;
702 break;
703 }
704 if (!currentSel) {
705 debugPrint(3,
706 "option appears outside a select statement");
707 break;
708 }
709 currentOpt = t;
710 t->controller = currentSel;
711 t->lic = nopt++;
712 if (attribPresent(t, "selected")) {
713 if (currentSel->lic && !currentSel->multiple)
714 debugPrint(3, "multiple options are selected");
715 else {
716 t->checked = t->rchecked = true;
717 ++currentSel->lic;
718 }
719 }
720 if (!t->value)
721 t->value = emptyString;
722 t->textval = emptyString;
723 break;
724
725 case TAGACT_STYLE:
726 if (!opentag) {
727 currentStyle = 0;
728 break;
729 }
730 currentStyle = t;
731 break;
732
733 case TAGACT_SELECT:
734 if (opentag) {
735 currentSel = t;
736 nopt = 0;
737 t->itype = INP_SELECT;
738 formControl(t, true);
739 } else {
740 currentSel = 0;
741 t->action = TAGACT_INPUT;
742 t->value = displayOptions(t);
743 }
744 break;
745
746 case TAGACT_TA:
747 if (opentag) {
748 currentTA = t;
749 t->itype = INP_TA;
750 formControl(t, true);
751 } else {
752 t->action = TAGACT_INPUT;
753 if (!t->value) {
754 /* This can only happen it no text inside, <textarea></textarea> */
755 /* like the other value fields, it can't be null */
756 t->rvalue = t->value = emptyString;
757 }
758 #if 0
759 // When textarea buffers were allocated at browse time, before the ib command
760 if (whichproc == 'e')
761 t->lic = sideBuffer(0, t->value, -1, 0);
762 #endif
763 currentTA = 0;
764 }
765 break;
766
767 case TAGACT_META:
768 if (opentag) {
769 /* This function doesn't do anything inside the js process.
770 * It only works when scanning the original web page.
771 * Thus I assume meta tags that set cookies, or keywords, or description,
772 * or a refresh directive, are there from the get-go.
773 * If js was going to generate a cookie it would just set document.cookie,
774 * it wouldn't build a meta tag to set the cookie and then
775 * appendChild it to head, right? */
776 htmlMetaHelper(t);
777 }
778 break;
779
780 case TAGACT_TBODY:
781 case TAGACT_THEAD:
782 case TAGACT_TFOOT:
783 if (opentag)
784 t->controller = findOpenTag(t, TAGACT_TABLE);
785 break;
786
787 case TAGACT_TR:
788 if (opentag) {
789 t->controller = findOpenSection(t);
790 if (!t->controller)
791 t->controller = findOpenTag(t, TAGACT_TABLE);
792 }
793 break;
794
795 case TAGACT_TD:
796 if (opentag)
797 t->controller = findOpenTag(t, TAGACT_TR);
798 break;
799
800 case TAGACT_SPAN:
801 if (!opentag)
802 break;
803 if (!(a = t->jclass))
804 break;
805 if (stringEqualCI(a, "sup"))
806 action = TAGACT_SUP;
807 if (stringEqualCI(a, "sub"))
808 action = TAGACT_SUB;
809 if (stringEqualCI(a, "ovb"))
810 action = TAGACT_OVB;
811 t->action = action;
812 break;
813
814 case TAGACT_OL:
815 /* look for start parameter for numbered list */
816 if (opentag) {
817 a = attribVal(t, "start");
818 if (a && (j = stringIsNum(a)) >= 0)
819 t->slic = j - 1;
820 }
821 break;
822
823 case TAGACT_FRAME:
824 if (opentag)
825 break;
826 // If somebody wrote <frame><p>foo</frame>, those tags should be excised.
827 underKill(t);
828 cdt = newTag(cf, "document");
829 t->firstchild = cdt;
830 cdt->parent = t;
831 break;
832
833 case TAGACT_MUSIC:
834 if (opentag)
835 break;
836 // If somebody wrote <audio><p>foo</audio>, those tags should be excised.
837 // However <source> tags should be kept and/or expanded. Not yet implemented.
838 underKill(t);
839 break;
840
841 } /* switch */
842 } /* prerenderNode */
843
prerender(int start)844 void prerender(int start)
845 {
846 /* some cleanup routines to rearrange the tree */
847 nestedAnchors(start);
848 emptyAnchors(start);
849 insert_tbody(start);
850 tableForm(start);
851
852 currentForm = currentSel = currentOpt = NULL;
853 currentTitle = currentScript = currentTA = NULL;
854 currentStyle = NULL;
855 nzFree(radioCheck);
856 radioCheck = 0;
857 traverse_callback = prerenderNode;
858 traverseAll(start);
859 currentForm = NULL;
860 nzFree(radioCheck);
861 radioCheck = 0;
862 } /* prerender */
863
864 /* create a new url with constructor */
instantiate_url(const Frame * f,jsobjtype parent,const char * name,const char * url)865 jsobjtype instantiate_url(const Frame *f, jsobjtype parent, const char *name, const char *url)
866 {
867 jsobjtype uo; /* url object */
868 uo = instantiate(f, parent, name, "URL");
869 if (uo)
870 set_property_string(f, uo, "href", url);
871 return uo;
872 } /* instantiate_url */
873
874 static char fakePropLast[24];
875 static jsobjtype fakePropParent;
fakePropName(void)876 static const char *fakePropName(void)
877 {
878 static int idx = 0;
879 ++idx;
880 sprintf(fakePropLast, "gc$%c%d", whichproc, idx);
881 return fakePropLast;
882 } /*fakePropName */
883
establish_js_option(jsobjtype obj,int idx)884 static jsobjtype establish_js_option(jsobjtype obj, int idx)
885 {
886 jsobjtype oa; /* option array */
887 jsobjtype oo; /* option object */
888 jsobjtype so; // style object
889 jsobjtype ato; // attributes object
890 jsobjtype fo; /* form object */
891
892 if ((oa = get_property_object(cf, obj, "options")) == NULL)
893 return NULL;
894 if ((oo = instantiate_array_element(cf, oa, idx, "Option")) == NULL)
895 return NULL;
896
897 set_property_object(cf, oo, "parentNode", oa);
898
899 /* option.form = select.form */
900 fo = get_property_object(cf, obj, "form");
901 if (fo)
902 set_property_object(cf, oo, "form", fo);
903 instantiate_array(cf, oo, "childNodes");
904 ato = instantiate(cf, oo, "attributes", "NamedNodeMap");
905 set_property_object(cf, ato, "owner", oo);
906 so = instantiate(cf, oo, "style", "CSSStyleDeclaration");
907 set_property_object(cf, so, "element", oo);
908
909 return oo;
910 } /* establish_js_option */
911
establish_inner(jsobjtype obj,const char * start,const char * end,bool isText)912 static void establish_inner(jsobjtype obj, const char *start, const char *end,
913 bool isText)
914 {
915 const char *s = emptyString;
916 const char *name = (isText ? "value" : "innerHTML");
917 if (start) {
918 s = start;
919 if (end)
920 s = pullString(start, end - start);
921 }
922 set_property_string(cf, obj, name, s);
923 if (start && end)
924 nzFree((char *)s);
925 // If this is a textarea, we haven't yet set up the innerHTML
926 // getter and seter
927 if (isText)
928 set_property_string(cf, obj, "innerHTML", emptyString);
929 } /* establish_inner */
930
domLink(Tag * t,const char * classname,const char * href,const char * list,jsobjtype owner,bool isradio)931 static void domLink(Tag *t, const char *classname, /* instantiate this class */
932 const char *href, const char *list, /* next member of this array */
933 jsobjtype owner, bool isradio)
934 {
935 jsobjtype alist = 0;
936 jsobjtype io = 0; /* input object */
937 int length;
938 bool dupname = false;
939 /* some strings from the html tag */
940 const char *symname = t->name;
941 const char *idname = t->id;
942 const char *membername = 0; /* usually symname */
943 const char *href_url = t->href;
944 const char *tcn = t->jclass;
945 const char *stylestring = attribVal(t, "style");
946 jsobjtype so = 0; /* obj.style */
947 jsobjtype ato = 0; /* obj.attributes */
948 char upname[MAXTAGNAME];
949
950 debugPrint(5, "domLink %s.%d name %s",
951 classname, isradio, (symname ? symname : emptyString));
952
953 if (symname && typeof_property(cf, owner, symname)) {
954 /*********************************************************************
955 This could be a duplicate name.
956 Yes, that really happens.
957 Link to the first tag having this name,
958 and link the second tag under a fake name so gc won't throw it away.
959 Or - it could be a duplicate name because multiple radio buttons
960 all share the same name.
961 The first time we create the array,
962 and thereafter we just link under that array.
963 Or - and this really does happen -
964 an input tag could have the name action, colliding with form.action.
965 don't overwrite form.action, or anything else that pre-exists.
966 *********************************************************************/
967
968 if (isradio) {
969 /* name present and radio buttons, name should be the array of buttons */
970 io = get_property_object(cf, owner, symname);
971 if (io == NULL)
972 return;
973 } else {
974 /* don't know why the duplicate name */
975 dupname = true;
976 }
977 }
978
979 /* The input object is nonzero if&only if the input is a radio button,
980 * and not the first button in the set, thus it isce the array containing
981 * these buttons. */
982 if (io == NULL) {
983 /*********************************************************************
984 Ok, the above condition does not hold.
985 We'll be creating a new object under owner, but through what name?
986 The name= tag, unless it's a duplicate,
987 or id= if there is no name=, or a fake name just to protect it from gc.
988 That's how it was for a long time, but I think we only do this on form.
989 *********************************************************************/
990 if (t->action == TAGACT_INPUT && list) {
991 if (!symname && idname)
992 membername = idname;
993 else if (symname && !dupname)
994 membername = symname;
995 /* id= or name= must not displace submit, reset, or action in form.
996 * Example www.startpage.com, where id=submit.
997 * nor should it collide with another attribute, such as document.cookie and
998 * <div ID=cookie> in www.orange.com.
999 * This call checks for the name in the object and its prototype. */
1000 if (membername && has_property(cf, owner, membername)) {
1001 debugPrint(3, "membername overload %s.%s",
1002 classname, membername);
1003 membername = NULL;
1004 }
1005 }
1006 if (!membername) {
1007 membername = fakePropName();
1008 fakePropParent = owner;
1009 }
1010
1011 if (isradio) { // the first radio button
1012 io = instantiate_array(cf, owner, membername);
1013 if (io == NULL)
1014 return;
1015 set_property_string(cf, io, "type", "radio");
1016 } else {
1017 /* A standard input element, just create it. */
1018 jsobjtype ca; // child array
1019 io = instantiate(cf, owner, membername, classname);
1020 if (io == NULL)
1021 return;
1022 /* not an array; needs the childNodes array beneath it for the children */
1023 ca = instantiate_array(cf, io, "childNodes");
1024 // childNodes and options are the same for Select
1025 if (stringEqual(classname, "Select"))
1026 set_property_object(cf, io, "options", ca);
1027 }
1028
1029 /* deal with the 'styles' here.
1030 object will get 'style' regardless of whether there is
1031 anything to put under it, just like it gets childNodes whether
1032 or not there are any. After that, there is a conditional step.
1033 If this node contains style='' of one or more name-value pairs,
1034 call out to process those and add them to the object.
1035 Don't do any of this if the tag is itself <style>. */
1036 if (t->action != TAGACT_STYLE) {
1037 so = instantiate(cf, io, "style", "CSSStyleDeclaration");
1038 set_property_object(cf, so, "element", io);
1039 /* now if there are any style pairs to unpack,
1040 processStyles can rely on obj.style existing */
1041 if (stylestring)
1042 processStyles(so, stylestring);
1043 }
1044
1045 /* Other attributes that are expected by pages, even if they
1046 * aren't populated at domLink-time */
1047 if (!tcn)
1048 tcn = emptyString;
1049 set_property_string(cf, io, "class", tcn);
1050 set_property_string(cf, io, "last$class", tcn);
1051 ato = instantiate(cf, io, "attributes", "NamedNodeMap");
1052 set_property_object(cf, ato, "owner", io);
1053 set_property_object(cf, io, "ownerDocument", cf->docobj);
1054 instantiate(cf, io, "dataset", "Object");
1055
1056 // only anchors with href go into links[]
1057 if (list && stringEqual(list, "links") &&
1058 !attribPresent(t, "href"))
1059 list = 0;
1060
1061 if (list)
1062 alist = get_property_object(cf, owner, list);
1063 if (alist) {
1064 length = get_arraylength(cf, alist);
1065 if (length < 0)
1066 return;
1067 set_array_element_object(cf, alist, length, io);
1068 if (symname && !dupname
1069 && !has_property(cf, alist, symname))
1070 set_property_object(cf, alist, symname, io);
1071 #if 0
1072 if (idname && symname != idname
1073 && !has_property(cf, alist, idname))
1074 set_property_object(cf, alist, idname, io);
1075 #endif
1076 } /* list indicated */
1077 }
1078
1079 if (isradio) {
1080 /* drop down to the element within the radio array, and return that element */
1081 /* w becomes the object associated with this radio button */
1082 /* io is, by assumption, an array */
1083 jsobjtype w;
1084 length = get_arraylength(cf, io);
1085 if (length < 0)
1086 return;
1087 w = instantiate_array_element(cf, io, length, "Element");
1088 if (w == NULL)
1089 return;
1090 io = w;
1091 }
1092
1093 set_property_string(cf, io, "name", (symname ? symname : emptyString));
1094 set_property_string(cf, io, "id", (idname ? idname : emptyString));
1095 set_property_string(cf, io, "last$id", (idname ? idname : emptyString));
1096
1097 if (href && href_url)
1098 // This use to be instantiate_url, but with the new side effects
1099 // on Anchor, Image, etc, we can just set the string.
1100 set_property_string(cf, io, href, href_url);
1101
1102 if (t->action == TAGACT_INPUT) {
1103 /* link back to the form that owns the element */
1104 set_property_object(cf, io, "form", owner);
1105 }
1106
1107 connectTagObject(t, io);
1108
1109 strcpy(upname, t->info->name);
1110 caseShift(upname, 'u');
1111 set_property_string(cf, io, "nodeName", upname);
1112 set_property_string(cf, io, "tagName", upname);
1113 set_property_number(cf, io, "nodeType", 1);
1114 } /* domLink */
1115
1116 static const char defvl[] = "defaultValue";
1117 static const char defck[] = "defaultChecked";
1118 static const char defsel[] = "defaultSelected";
1119
formControlJS(Tag * t)1120 static void formControlJS(Tag *t)
1121 {
1122 const char *typedesc;
1123 int itype = t->itype;
1124 bool isradio = (itype == INP_RADIO);
1125 bool isselect = (itype == INP_SELECT);
1126 const char *whichclass = (isselect ? "Select" : "Element");
1127 const Tag *form = t->controller;
1128
1129 if (form && form->jv)
1130 domLink(t, whichclass, 0, "elements", form->jv, isradio);
1131 else
1132 domLink(t, whichclass, 0, 0, cf->docobj, isradio);
1133 if (!t->jv)
1134 return;
1135
1136 if (itype <= INP_RADIO && !isselect) {
1137 set_property_string(cf, t->jv, "value", t->value);
1138 if (itype != INP_FILE) {
1139 /* No default value on file, for security reasons */
1140 set_property_string(cf, t->jv, defvl, t->value);
1141 } /* not file */
1142 }
1143
1144 if (isselect)
1145 typedesc = t->multiple ? "select-multiple" : "select-one";
1146 else
1147 typedesc = inp_types[itype];
1148 set_property_string(cf, t->jv, "type", typedesc);
1149
1150 if (itype >= INP_RADIO) {
1151 set_property_bool(cf, t->jv, "checked", t->checked);
1152 set_property_bool(cf, t->jv, defck, t->checked);
1153 }
1154 } /* formControlJS */
1155
optionJS(Tag * t)1156 static void optionJS(Tag *t)
1157 {
1158 Tag *sel = t->controller;
1159 const char *tx = t->textval;
1160 const char *cl = t->jclass;
1161
1162 if (!sel)
1163 return;
1164
1165 if (!tx) {
1166 debugPrint(3, "empty option");
1167 } else {
1168 if (!t->value)
1169 t->value = cloneString(tx);
1170 }
1171
1172 /* no point if the controlling select doesn't have a js object */
1173 if (!sel->jv)
1174 return;
1175
1176 connectTagObject(t, establish_js_option(sel->jv, t->lic));
1177 set_property_string(cf, t->jv, "text", t->textval);
1178 set_property_string(cf, t->jv, "value", t->value);
1179 set_property_string(cf, t->jv, "nodeName", "OPTION");
1180 set_property_number(cf, t->jv, "nodeType", 1);
1181 set_property_bool(cf, t->jv, "selected", t->checked);
1182 set_property_bool(cf, t->jv, defsel, t->checked);
1183 if (!cl)
1184 cl = emptyString;
1185 set_property_string(cf, t->jv, "class", cl);
1186 set_property_string(cf, t->jv, "last$class", cl);
1187
1188 if (t->checked && !sel->multiple)
1189 set_property_number(cf, sel->jv, "selectedIndex", t->lic);
1190 } /* optionJS */
1191
link_css(Tag * t)1192 static void link_css(Tag *t)
1193 {
1194 struct i_get g;
1195 char *b;
1196 int blen;
1197 const char *a;
1198 const char *a1 = attribVal(t, "type");
1199 const char *a2 = attribVal(t, "rel");
1200 const char *altsource;
1201
1202 if (a1)
1203 set_property_string(cf, t->jv, "type", a1);
1204 if (a2)
1205 set_property_string(cf, t->jv, "rel", a2);
1206 if (!t->href)
1207 return;
1208 if ((!a1 || !stringEqualCI(a1, "text/css")) &&
1209 (!a2 || !stringEqualCI(a2, "stylesheet")))
1210 return;
1211
1212 // Fetch the css file so we can apply its attributes.
1213 a = NULL;
1214 altsource = fetchReplace(t->href);
1215 if (!altsource)
1216 altsource = t->href;
1217 if (browseLocal && !isURL(altsource)) {
1218 debugPrint(3, "css source %s", altsource);
1219 if (!fileIntoMemory(altsource, &b, &blen)) {
1220 if (debugLevel >= 1)
1221 i_printf(MSG_GetLocalCSS);
1222 } else {
1223 a = force_utf8(b, blen);
1224 if (!a)
1225 a = b;
1226 else
1227 nzFree(b);
1228 }
1229 } else {
1230 debugPrint(3, "css source %s", t->href);
1231 memset(&g, 0, sizeof(g));
1232 g.thisfile = cf->fileName;
1233 g.uriEncoded = true;
1234 g.url = t->href;
1235 if (httpConnect(&g)) {
1236 if (g.code == 200) {
1237 a = force_utf8(g.buffer, g.length);
1238 if (!a)
1239 a = g.buffer;
1240 else
1241 nzFree(g.buffer);
1242 // acid3 test[0] says we don't process this file if it's content type is
1243 // text/html. Should I test for anything outside of text/css?
1244 // For now I insist it be missing or text/css or text/plain.
1245 // A similar test is performed in css.c after httpConnect.
1246 if (g.content[0]
1247 && !stringEqual(g.content, "text/css")
1248 && !stringEqual(g.content, "text/plain")) {
1249 debugPrint(3,
1250 "css suppressed because content type is %s",
1251 g.content);
1252 cnzFree(a);
1253 a = NULL;
1254 }
1255 } else {
1256 nzFree(g.buffer);
1257 if (debugLevel >= 3)
1258 i_printf(MSG_GetCSS, g.url, g.code);
1259 }
1260 } else {
1261 if (debugLevel >= 3)
1262 i_printf(MSG_GetCSS2);
1263 }
1264 }
1265 if (a) {
1266 set_property_string(cf, t->jv, "css$data", a);
1267 // indicate we can run the onload function, if there is one
1268 t->lic = 1;
1269 }
1270 cnzFree(a);
1271 } /* link_css */
1272
1273 static jsobjtype innerParent;
1274
jsNode(Tag * t,bool opentag)1275 static void jsNode(Tag *t, bool opentag)
1276 {
1277 const struct tagInfo *ti = t->info;
1278 int action = t->action;
1279 const Tag *above;
1280 const char *a;
1281 bool linked_in;
1282
1283 // run reindex at table close
1284 if (action == TAGACT_TABLE && !opentag && t->jv)
1285 run_function_onearg(cf, cf->winobj, "rowReindex", t->jv);
1286
1287 /* all the js variables are on the open tag */
1288 if (!opentag)
1289 return;
1290 if (t->step >= 2)
1291 return;
1292 t->step = 2;
1293
1294 /*********************************************************************
1295 If js is off, and you don't decorate this tree,
1296 then js is turned on later, and you parse and decorate a frame,
1297 it might also decorate this tree in the wrong context.
1298 Needless to say that's not good!
1299 *********************************************************************/
1300 if (t->f0 != cf)
1301 return;
1302
1303 debugPrint(6, "decorate %s %d", t->info->name, t->seqno);
1304 fakePropLast[0] = 0;
1305
1306 switch (action) {
1307
1308 case TAGACT_TEXT:
1309 connectTagObject(t,
1310 instantiate(cf, cf->docobj, fakePropName(), "TextNode"));
1311 // nodeName and nodeType set in constructor
1312 if (t->jv) {
1313 const char *w = t->textval;
1314 if (!w)
1315 w = emptyString;
1316 set_property_string(cf, t->jv, "data", w);
1317 w = (t->jclass ? t->jclass : emptyString);
1318 set_property_string(cf, t->jv, "class", w);
1319 set_property_string(cf, t->jv, "last$class", w);
1320 }
1321 break;
1322
1323 case TAGACT_HTML:
1324 domLink(t, "HTML", 0, 0, cf->docobj, 0);
1325 cf->htmltag = t;
1326 break;
1327
1328 case TAGACT_META:
1329 domLink(t, "Meta", 0, "metas", cf->docobj, 0);
1330 break;
1331
1332 case TAGACT_STYLE:
1333 domLink(t, "CSSStyleDeclaration", 0, "styles", cf->docobj, 0);
1334 a = attribVal(t, "type");
1335 if (!a)
1336 a = emptyString;
1337 set_property_string(cf, t->jv, "type", a);
1338 break;
1339
1340 case TAGACT_SCRIPT:
1341 domLink(t, "Script", "src", "scripts", cf->docobj, 0);
1342 a = attribVal(t, "type");
1343 if (a)
1344 set_property_string(cf, t->jv, "type", a);
1345 a = attribVal(t, "text");
1346 if (a) {
1347 set_property_string(cf, t->jv, "text", a);
1348 } else {
1349 set_property_string(cf, t->jv, "text", "");
1350 }
1351 a = attribVal(t, "src");
1352 if (a) {
1353 set_property_string(cf, t->jv, "src", a);
1354 if (down_jsbg && a[0]) // from another source, let's get it started
1355 prepareScript(t);
1356 } else {
1357 set_property_string(cf, t->jv, "src", "");
1358 }
1359 break;
1360
1361 case TAGACT_FORM:
1362 domLink(t, "Form", "action", "forms", cf->docobj, 0);
1363 break;
1364
1365 case TAGACT_INPUT:
1366 formControlJS(t);
1367 if (t->itype == INP_TA)
1368 establish_inner(t->jv, t->value, 0, true);
1369 break;
1370
1371 case TAGACT_OPTION:
1372 optionJS(t);
1373 // The parent child relationship has already been established,
1374 // don't break, just return;
1375 return;
1376
1377 case TAGACT_A:
1378 domLink(t, "Anchor", "href", "links", cf->docobj, 0);
1379 break;
1380
1381 case TAGACT_HEAD:
1382 domLink(t, "Head", 0, "heads", cf->docobj, 0);
1383 cf->headtag = t;
1384 break;
1385
1386 case TAGACT_BODY:
1387 domLink(t, "Body", 0, "bodies", cf->docobj, 0);
1388 cf->bodytag = t;
1389 break;
1390
1391 case TAGACT_OL:
1392 case TAGACT_UL:
1393 case TAGACT_DL:
1394 domLink(t, "Lister", 0, 0, cf->docobj, 0);
1395 break;
1396
1397 case TAGACT_LI:
1398 domLink(t, "Listitem", 0, 0, cf->docobj, 0);
1399 break;
1400
1401 case TAGACT_TABLE:
1402 domLink(t, "Table", 0, "tables", cf->docobj, 0);
1403 break;
1404
1405 case TAGACT_TBODY:
1406 if ((above = t->controller) && above->jv)
1407 domLink(t, "tBody", 0, "tBodies", above->jv, 0);
1408 break;
1409
1410 case TAGACT_THEAD:
1411 if ((above = t->controller) && above->jv) {
1412 domLink(t, "tHead", 0, 0, above->jv, 0);
1413 set_property_object(cf, above->jv, "tHead", t->jv);
1414 }
1415 break;
1416
1417 case TAGACT_TFOOT:
1418 if ((above = t->controller) && above->jv) {
1419 domLink(t, "tFoot", 0, 0, above->jv, 0);
1420 set_property_object(cf, above->jv, "tFoot", t->jv);
1421 }
1422 break;
1423
1424 case TAGACT_TR:
1425 if ((above = t->controller) && above->jv)
1426 domLink(t, "tRow", 0, "rows", above->jv, 0);
1427 break;
1428
1429 case TAGACT_TD:
1430 if ((above = t->controller) && above->jv)
1431 domLink(t, "Cell", 0, "cells", above->jv, 0);
1432 break;
1433
1434 case TAGACT_DIV:
1435 domLink(t, "Div", 0, "divs", cf->docobj, 0);
1436 break;
1437
1438 case TAGACT_LABEL:
1439 domLink(t, "Label", 0, "labels", cf->docobj, 0);
1440 break;
1441
1442 case TAGACT_OBJECT:
1443 domLink(t, "HtmlObj", 0, "htmlobjs", cf->docobj, 0);
1444 break;
1445
1446 case TAGACT_UNKNOWN:
1447 domLink(t, "HTMLElement", 0, 0, cf->docobj, 0);
1448 break;
1449
1450 case TAGACT_SPAN:
1451 case TAGACT_SUB:
1452 case TAGACT_SUP:
1453 case TAGACT_OVB:
1454 domLink(t, "Span", 0, "spans", cf->docobj, 0);
1455 break;
1456
1457 case TAGACT_AREA:
1458 domLink(t, "Area", "href", "links", cf->docobj, 0);
1459 break;
1460
1461 case TAGACT_FRAME:
1462 // about:blank means a blank frame with no sourcefile.
1463 if (stringEqual(t->href, "about:blank")) {
1464 nzFree(t->href);
1465 t->href = 0;
1466 }
1467 domLink(t, "Frame", "src", "frames", cf->winobj, 0);
1468 break;
1469
1470 case TAGACT_IMAGE:
1471 domLink(t, "Image", "src", "images", cf->docobj, 0);
1472 break;
1473
1474 case TAGACT_P:
1475 domLink(t, "P", 0, "paragraphs", cf->docobj, 0);
1476 break;
1477
1478 case TAGACT_HEADER:
1479 domLink(t, "Header", 0, "headers", cf->docobj, 0);
1480 break;
1481
1482 case TAGACT_FOOTER:
1483 domLink(t, "Footer", 0, "footers", cf->docobj, 0);
1484 break;
1485
1486 case TAGACT_TITLE:
1487 if (cw->htmltitle)
1488 set_property_string(cf, cf->docobj, "title", cw->htmltitle);
1489 domLink(t, "Title", 0, 0, cf->docobj, 0);
1490 break;
1491
1492 case TAGACT_LINK:
1493 domLink(t, "Link", "href", 0, cf->docobj, 0);
1494 link_css(t);
1495 break;
1496
1497 case TAGACT_MUSIC:
1498 domLink(t, "Audio", "src", 0, cf->docobj, 0);
1499 break;
1500
1501 default:
1502 // Don't know what this tag is, or it's not semantically important,
1503 // so just call it an html element.
1504 domLink(t, "Element", 0, 0, cf->docobj, 0);
1505 if (t->action == TAGACT_BASE && t->href)
1506 instantiate_url(cf, t->jv, "href", t->href);
1507 break;
1508 } /* switch */
1509
1510 if (!t->jv)
1511 return; /* nothing else to do */
1512
1513 /* js tree mirrors the dom tree. */
1514 linked_in = false;
1515
1516 if (t->parent && t->parent->jv) {
1517 run_function_onearg(cf, t->parent->jv, "eb$apch1", t->jv);
1518 linked_in = true;
1519 // special code for frame.contentDocument.
1520 if (t->parent->action == TAGACT_FRAME) {
1521 set_property_object(cf, t->parent->jv,
1522 "contentDocument", t->jv);
1523 set_property_object(cf, t->parent->jv,
1524 "contentWindow", t->jv);
1525 }
1526 }
1527
1528 if (action == TAGACT_HTML) {
1529 run_function_onearg(cf, cf->docobj, "eb$apch1", t->jv);
1530 linked_in = true;
1531 }
1532
1533 if (!t->parent && innerParent) {
1534 // this is the top of innerHTML or some such.
1535 // It is never html head or body, as those are skipped.
1536 run_function_onearg(cf, innerParent, "eb$apch1", t->jv);
1537 linked_in = true;
1538 }
1539
1540 if (linked_in && fakePropLast[0]) {
1541 // Node linked to document/gc to protect if from garbage collection,
1542 // but now it is linked to its parent.
1543 delete_property(cf, fakePropParent, fakePropLast);
1544 }
1545
1546 if (!linked_in) {
1547 debugPrint(3, "tag %s not linked in", ti->name);
1548 if (action == TAGACT_TEXT)
1549 debugPrint(1, "text %s\n", t->textval);
1550 }
1551
1552 /* set innerHTML from the source html, if this tag supports it */
1553 if (ti->bits & TAG_INNERHTML)
1554 establish_inner(t->jv, t->innerHTML, 0, false);
1555
1556 // If the tag has foo=bar as an attribute, pass this forward to javascript.
1557 pushAttributes(t);
1558 } /* jsNode */
1559
pushAttributes(const Tag * t)1560 static void pushAttributes(const Tag *t)
1561 {
1562 int i;
1563 const char **a = t->attributes;
1564 const char **v = t->atvals;
1565 if (!a)
1566 return;
1567 for (i = 0; a[i]; ++i) {
1568 // There are some exceptions, some attributes that we handle individually.
1569 static const char *const exclist[] = {
1570 "name", "id", "class",
1571 "checked", "value", "type",
1572 "href", "src", "action",
1573 0
1574 };
1575 static const char *const dotrue[] = {
1576 "required",
1577 "multiple", "readonly", "disabled", "async", 0
1578 };
1579 static const char *const handlers[] = {
1580 "onload", "onunload", "onclick", "onchange",
1581 "onsubmit", "onreset",
1582 0
1583 };
1584 const char *u;
1585
1586 // Should we drop attribute name to lower case? I don't, for now.
1587 u = v[i];
1588 if (!u)
1589 u = emptyString;
1590
1591 // attributes on HTML tags that begin with "data-" should be available under a
1592 // "dataset" object in JS
1593 if (strncmp(a[i], "data-", 5) == 0) {
1594 jsobjtype dso = get_property_object(cf, t->jv, "dataset");
1595 if (dso) {
1596 // must convert to camelCase
1597 char *a2 = cloneString(a[i] + 5);
1598 camelCase(a2);
1599 set_property_string(cf, dso, a2, u);
1600 nzFree(a2);
1601 run_function_onestring(cf, t->jv, "markAttribute", a[i]);
1602 }
1603 continue;
1604 }
1605
1606 if (stringEqual(a[i], "style")) // no clue
1607 continue;
1608
1609 // Maybe they wrote <a firstChild=foo>
1610 // See if the name is in the prototype, and not a handler,
1611 // as handlers have setters.
1612 if (has_property(cf, t->jv, a[i]) && !typeof_property(cf, t->jv, a[i])
1613 && stringInList(handlers, a[i]) < 0) {
1614 debugPrint(3, "html attribute overload %s.%s",
1615 t->info->name, a[i]);
1616 continue;
1617 }
1618 // There are some, like multiple or readonly, that should be set to true,
1619 // not the empty string.
1620 if (!*u && stringInList(dotrue, a[i]) >= 0) {
1621 set_property_bool(cf, t->jv, a[i], true);
1622 } else {
1623 // standard attribute here
1624 if (stringInListCI(exclist, a[i]) < 0)
1625 set_property_string(cf, t->jv, a[i], u);
1626 }
1627 run_function_onestring(cf, t->jv, "markAttribute", a[i]);
1628 }
1629 } /* pushAttributes */
1630
1631 /* decorate the tree of nodes with js objects */
decorate(int start)1632 void decorate(int start)
1633 {
1634 traverse_callback = jsNode;
1635 traverseAll(start);
1636 } /* decorate */
1637
1638 /* paranoia check on the number of tags */
tagCountCheck(void)1639 static void tagCountCheck(void)
1640 {
1641 if (sizeof(int) == 4) {
1642 if (cw->numTags > MAXLINES)
1643 i_printfExit(MSG_LineLimit);
1644 }
1645 } /* tagCountCheck */
1646
pushTag(Tag * t)1647 static void pushTag(Tag *t)
1648 {
1649 int a = cw->allocTags;
1650 if (cw->numTags == a) {
1651 debugPrint(4, "%d tags, %d dead", a, cw->deadTags);
1652 /* make more room */
1653 a = a / 2 * 3;
1654 cw->tags =
1655 (Tag **)reallocMem(cw->tags, a * sizeof(t));
1656 cw->allocTags = a;
1657 }
1658 tagList[cw->numTags++] = t;
1659 tagCountCheck();
1660 } /* pushTag */
1661
1662 static void freeTag(Tag *t);
1663
1664 // garbage collect the dead tags.
1665 // You must rerender after this runs, so that the buffer has no dead tags,
1666 // and the remaining tags have their new numbers embedded in the buffer.
tag_gc(void)1667 void tag_gc(void)
1668 {
1669 int cx; /* edbrowse context */
1670 struct ebWindow *w, *save_cw;
1671 Tag *t;
1672 int i, j;
1673
1674 for (cx = 1; cx <= maxSession; ++cx) {
1675 for (w = sessionList[cx].lw; w; w = w->prev) {
1676 if (!w->tags)
1677 continue;
1678 // Don't bother unless a third of the tags are dead.
1679 if (w->deadTags * 3 < w->numTags)
1680 continue;
1681
1682 // sync any changed fields before we muck with the tags.
1683 save_cw = cw;
1684 cw = w;
1685 cf = &(cw->f0);
1686 jSyncup(true);
1687 cw = save_cw;
1688 cf = &(cw->f0);
1689
1690 // ok let's crunch.
1691 for (i = j = 0; i < w->numTags; ++i) {
1692 t = w->tags[i];
1693 if (t->dead) {
1694 freeTag(t);
1695 } else {
1696 t->seqno = j;
1697 w->tags[j++] = t;
1698 }
1699 }
1700 debugPrint(4, "tag_gc from %d to %d", w->numTags, j);
1701 w->numTags = j;
1702 w->deadTags = 0;
1703
1704 // We must rerender when we return to this window,
1705 // or at the input loop if this is the current window.
1706 // Tags have been renumbered, need to rebuild the text buffer accordingly.
1707 w->mustrender = true;
1708 if (w != cw)
1709 w->nextrender = 0;
1710 }
1711 }
1712 }
1713
1714 /* first three have to be in this order */
1715 const struct tagInfo availableTags[] = {
1716 {"html", "html", TAGACT_HTML},
1717 {"base", "base reference for relative URLs", TAGACT_BASE, 0, 4},
1718 {"unknown0", "an html entity", TAGACT_UNKNOWN, 5, 1},
1719 {"object", "an html object", TAGACT_OBJECT, 5, 3},
1720 {"a", "an anchor", TAGACT_A, 0, 1},
1721 {"htmlanchorelement", "an anchor element", TAGACT_A, 0, 1},
1722 {"input", "an input item", TAGACT_INPUT, 0, 4},
1723 {"element", "an input element", TAGACT_INPUT, 0, 4},
1724 {"title", "the title", TAGACT_TITLE, 0, 0},
1725 {"textarea", "an input text area", TAGACT_TA, 0, 0},
1726 {"select", "an option list", TAGACT_SELECT, 0, 0},
1727 {"option", "a select option", TAGACT_OPTION, 0, 0},
1728 {"sub", "a subscript", TAGACT_SUB, 0, 0},
1729 {"sup", "a superscript", TAGACT_SUP, 0, 0},
1730 {"ovb", "an overbar", TAGACT_OVB, 0, 0},
1731 {"font", "a font", TAGACT_NOP, 0, 0},
1732 {"cite", "a citation", TAGACT_NOP, 0, 0},
1733 {"tt", "teletype", TAGACT_NOP, 0, 0},
1734 {"center", "centered text", TAGACT_P, 2, 5},
1735 {"caption", "a caption", TAGACT_NOP, 5, 0},
1736 {"head", "the html header information", TAGACT_HEAD, 0, 5},
1737 {"body", "the html body", TAGACT_BODY, 0, 5},
1738 {"text", "a text section", TAGACT_TEXT, 0, 4},
1739 {"bgsound", "background music", TAGACT_MUSIC, 0, 4},
1740 {"audio", "audio passage", TAGACT_MUSIC, 0, 4},
1741 {"video", "video passage", TAGACT_MUSIC, 0, 4},
1742 {"meta", "a meta tag", TAGACT_META, 0, 4},
1743 {"style", "a style tag", TAGACT_STYLE, 0, 2},
1744 {"link", "a link tag", TAGACT_LINK, 0, 4},
1745 {"img", "an image", TAGACT_IMAGE, 0, 4},
1746 {"image", "an image", TAGACT_IMAGE, 0, 4},
1747 {"br", "a line break", TAGACT_BR, 1, 4},
1748 {"p", "a paragraph", TAGACT_P, 2, 5},
1749 {"header", "a header", TAGACT_HEADER, 2, 5},
1750 {"footer", "a footer", TAGACT_FOOTER, 2, 5},
1751 {"div", "a divided section", TAGACT_DIV, 5, 1},
1752 {"map", "a map of images", TAGACT_NOP, 5, 0},
1753 {"blockquote", "a quoted paragraph", TAGACT_NOP, 10, 1},
1754 {"document", "a document", TAGACT_DOC, 5, 1},
1755 {"fragment", "a document fragment", TAGACT_FRAG, 5, 1},
1756 {"comment", "a comment", TAGACT_COMMENT, 0, 2},
1757 {"h1", "a level 1 header", TAGACT_H, 10, 1},
1758 {"h2", "a level 2 header", TAGACT_H, 10, 1},
1759 {"h3", "a level 3 header", TAGACT_H, 10, 1},
1760 {"h4", "a level 4 header", TAGACT_H, 10, 1},
1761 {"h5", "a level 5 header", TAGACT_H, 10, 1},
1762 {"h6", "a level 6 header", TAGACT_H, 10, 1},
1763 {"dt", "a term", TAGACT_DT, 2, 4},
1764 {"dd", "a definition", TAGACT_DD, 1, 4},
1765 {"li", "a list item", TAGACT_LI, 1, 5},
1766 {"ul", "a bullet list", TAGACT_UL, 10, 1},
1767 {"dir", "a directory list", TAGACT_NOP, 5, 0},
1768 {"menu", "a menu", TAGACT_NOP, 5, 0},
1769 {"ol", "a numbered list", TAGACT_OL, 10, 1},
1770 {"dl", "a definition list", TAGACT_DL, 10, 1},
1771 {"hr", "a horizontal line", TAGACT_HR, 5, 4},
1772 {"form", "a form", TAGACT_FORM, 10, 1},
1773 {"button", "a button", TAGACT_INPUT, 0, 1},
1774 {"frame", "a frame", TAGACT_FRAME, 2, 0},
1775 {"iframe", "a frame", TAGACT_FRAME, 2, 1},
1776 {"map", "an image map", TAGACT_MAP, 2, 4},
1777 {"area", "an image map area", TAGACT_AREA, 0, 4},
1778 {"table", "a table", TAGACT_TABLE, 10, 1},
1779 {"tbody", "a table body", TAGACT_TBODY, 0, 1},
1780 {"thead", "a table body", TAGACT_THEAD, 0, 1},
1781 {"tfoot", "a table body", TAGACT_TFOOT, 0, 1},
1782 {"tr", "a table row", TAGACT_TR, 5, 1},
1783 {"td", "a table entry", TAGACT_TD, 0, 5},
1784 {"th", "a table heading", TAGACT_TD, 0, 5},
1785 {"pre", "a preformatted section", TAGACT_PRE, 10, 0},
1786 {"listing", "a listing", TAGACT_PRE, 1, 0},
1787 {"xmp", "an example", TAGACT_PRE, 1, 0},
1788 {"fixed", "a fixed presentation", TAGACT_NOP, 1, 0},
1789 {"code", "a block of code", TAGACT_NOP, 0, 0},
1790 {"samp", "a block of sample text", TAGACT_NOP, 0, 0},
1791 {"address", "an address block", TAGACT_NOP, 1, 0},
1792 {"style", "a style block", TAGACT_NOP, 0, 2},
1793 {"script", "a script", TAGACT_SCRIPT, 0, 1},
1794 {"noscript", "no script section", TAGACT_NOSCRIPT, 0, 2},
1795 {"noframes", "no frames section", TAGACT_NOP, 0, 2},
1796 {"embed", "embedded html", TAGACT_MUSIC, 0, 4},
1797 {"noembed", "no embed section", TAGACT_NOP, 0, 2},
1798 {"em", "emphasized text", TAGACT_JS, 0, 0},
1799 {"label", "a label", TAGACT_LABEL, 0, 0},
1800 {"strike", "emphasized text", TAGACT_JS, 0, 0},
1801 {"s", "emphasized text", TAGACT_JS, 0, 0},
1802 {"strong", "emphasized text", TAGACT_JS, 0, 0},
1803 {"b", "bold text", TAGACT_JS, 0, 0},
1804 {"i", "italicized text", TAGACT_JS, 0, 0},
1805 {"u", "underlined text", TAGACT_JS, 0, 0},
1806 {"var", "variable text", TAGACT_JS, 0, 0},
1807 {"kbd", "keyboard text", TAGACT_JS, 0, 0},
1808 {"dfn", "definition text", TAGACT_JS, 0, 0},
1809 {"q", "quoted text", TAGACT_JS, 0, 0},
1810 {"abbr", "an abbreviation", TAGACT_JS, 0, 0},
1811 {"span", "an html span", TAGACT_SPAN, 0, 1},
1812 {"frameset", "a frame set", TAGACT_JS, 0, 0},
1813 {"", NULL, 0}
1814 };
1815
freeTag(Tag * t)1816 static void freeTag(Tag *t)
1817 {
1818 char **a;
1819 // Even if js has been turned off, if this tag was previously connected to an
1820 // object, we should disconnect it.
1821 if(t->jslink)
1822 disconnectTagObject(t);
1823 nzFree(t->textval);
1824 nzFree(t->name);
1825 nzFree(t->id);
1826 nzFree(t->jclass);
1827 nzFree(t->nodeName);
1828 nzFree(t->value);
1829 cnzFree(t->rvalue);
1830 nzFree(t->href);
1831 nzFree(t->js_file);
1832 nzFree(t->innerHTML);
1833
1834 a = (char **)t->attributes;
1835 if (a) {
1836 while (*a) {
1837 nzFree(*a);
1838 ++a;
1839 }
1840 free(t->attributes);
1841 }
1842
1843 a = (char **)t->atvals;
1844 if (a) {
1845 while (*a) {
1846 nzFree(*a);
1847 ++a;
1848 }
1849 free(t->atvals);
1850 }
1851
1852 free(t);
1853 }
1854
freeTags(struct ebWindow * w)1855 void freeTags(struct ebWindow *w)
1856 {
1857 int i, n;
1858 Tag *t, **e;
1859
1860 /* if not browsing ... */
1861 if (!(e = w->tags))
1862 return;
1863
1864 /* drop empty textarea buffers created by this session */
1865 for (t = w->inputlist; t; t = t->same) {
1866 if (t->action != TAGACT_INPUT)
1867 continue;
1868 if (t->itype != INP_TA)
1869 continue;
1870 if (!(n = t->lic))
1871 continue;
1872 freeEmptySideBuffer(n);
1873 } /* loop over tags */
1874
1875 for (i = 0; i < w->numTags; ++i, ++e) {
1876 t = *e;
1877 disconnectTagObject(t);
1878 freeTag(t);
1879 }
1880
1881 free(w->tags);
1882 w->tags = 0;
1883 w->numTags = w->allocTags = w->deadTags = 0;
1884 w->inputlist = w->scriptlist = w->optlist = w->linklist = 0;
1885 } /* freeTags */
1886
newTag(const Frame * f,const char * name)1887 Tag *newTag(const Frame *f, const char *name)
1888 {
1889 Tag *t, *t1, *t2 = 0;
1890 const struct tagInfo *ti;
1891 static int gsn = 0;
1892
1893 for (ti = availableTags; ti->name[0]; ++ti)
1894 if (stringEqualCI(ti->name, name))
1895 break;
1896
1897 if (!ti->name[0]) {
1898 debugPrint(4, "warning, created node %s reverts to generic",
1899 name);
1900 ti = availableTags + 2;
1901 }
1902
1903 t = (Tag *)allocZeroMem(sizeof(Tag));
1904 t->action = ti->action;
1905 t->f0 = (Frame *) f; /* set owning frame */
1906 t->info = ti;
1907 t->seqno = cw->numTags;
1908 t->gsn = ++gsn;
1909 t->nodeName = cloneString(name);
1910 pushTag(t);
1911 if (t->action == TAGACT_SCRIPT) {
1912 for (t1 = cw->scriptlist; t1; t1 = t1->same)
1913 if (!t1->slash)
1914 t2 = t1;
1915 if (t2)
1916 t2->same = t;
1917 else
1918 cw->scriptlist = t;
1919 }
1920 if (t->action == TAGACT_LINK) {
1921 for (t1 = cw->linklist; t1; t1 = t1->same)
1922 if (!t1->slash)
1923 t2 = t1;
1924 if (t2)
1925 t2->same = t;
1926 else
1927 cw->linklist = t;
1928 }
1929 if (t->action == TAGACT_INPUT || t->action == TAGACT_SELECT ||
1930 t->action == TAGACT_TA) {
1931 for (t1 = cw->inputlist; t1; t1 = t1->same)
1932 if (!t1->slash)
1933 t2 = t1;
1934 if (t2)
1935 t2->same = t;
1936 else
1937 cw->inputlist = t;
1938 }
1939 if (t->action == TAGACT_OPTION) {
1940 for (t1 = cw->optlist; t1; t1 = t1->same)
1941 if (!t1->slash)
1942 t2 = t1;
1943 if (t2)
1944 t2->same = t;
1945 else
1946 cw->optlist = t;
1947 }
1948 return t;
1949 } /* newTag */
1950
1951 /* reserve space for 512 tags */
initTagArray(void)1952 void initTagArray(void)
1953 {
1954 cw->numTags = 0;
1955 cw->allocTags = 512;
1956 cw->deadTags = 0;
1957 cw->tags =
1958 (Tag **)allocMem(cw->allocTags *
1959 sizeof(Tag *));
1960 } /* initTagArray */
1961
1962 bool htmlGenerated;
1963 static Tag *treeAttach;
1964 static int tree_pos;
1965 static bool treeDisable;
1966 static void intoTree(Tag *parent);
1967 static const int tdb = 5; // tree debug level
1968
htmlNodesIntoTree(int start,Tag * attach)1969 void htmlNodesIntoTree(int start, Tag *attach)
1970 {
1971 treeAttach = attach;
1972 tree_pos = start;
1973 treeDisable = false;
1974 debugPrint(tdb, "@@tree of nodes");
1975 intoTree(0);
1976 debugPrint(tdb, "}\n@@end tree");
1977 } /* htmlNodesIntoTree */
1978
1979 /* Convert a list of html nodes, properly nested open close, into a tree.
1980 * Attach the tree to an existing tree here, for document.write etc,
1981 * or just build the tree if attach is null. */
intoTree(Tag * parent)1982 static void intoTree(Tag *parent)
1983 {
1984 Tag *t, *prev = 0;
1985 int j;
1986 const char *v;
1987 int action;
1988
1989 if (!parent)
1990 debugPrint(tdb, "root {");
1991 else
1992 debugPrint(tdb, "%s %d {", parent->info->name, parent->seqno);
1993
1994 while (tree_pos < cw->numTags) {
1995 t = tagList[tree_pos++];
1996 if (t->slash) {
1997 if (parent) {
1998 parent->balance = t, t->balance = parent;
1999 t->dead = parent->dead;
2000 if (t->dead)
2001 ++cw->deadTags;
2002 }
2003 debugPrint(tdb, "}");
2004 return;
2005 }
2006
2007 if (treeDisable) {
2008 debugPrint(tdb, "node skip %s", t->info->name);
2009 t->dead = true;
2010 ++cw->deadTags;
2011 intoTree(t);
2012 continue;
2013 }
2014
2015 if (htmlGenerated) {
2016 /*Some things are different if the html is generated, not part of the original web page.
2017 * You can skip past <head> altogether, including its
2018 * tidy generated descendants, and you want to pass through <body>
2019 * to the children below. */
2020 action = t->action;
2021 if (action == TAGACT_HEAD) {
2022 debugPrint(tdb, "node skip %s", t->info->name);
2023 t->dead = true;
2024 ++cw->deadTags;
2025 treeDisable = true;
2026 intoTree(t);
2027 treeDisable = false;
2028 continue;
2029 }
2030 if (action == TAGACT_HTML || action == TAGACT_BODY) {
2031 debugPrint(tdb, "node pass %s", t->info->name);
2032 t->dead = true;
2033 ++cw->deadTags;
2034 intoTree(t);
2035 continue;
2036 }
2037
2038 /* this node is ok, but if parent is a pass through node... */
2039 if (parent == 0 || /* this shouldn't happen */
2040 parent->action == TAGACT_BODY) {
2041 /* link up to treeAttach */
2042 const char *w = "root";
2043 if (treeAttach)
2044 w = treeAttach->info->name;
2045 debugPrint(tdb, "node up %s to %s",
2046 t->info->name, w);
2047 t->parent = treeAttach;
2048 if (treeAttach) {
2049 Tag *c =
2050 treeAttach->firstchild;
2051 if (!c)
2052 treeAttach->firstchild = t;
2053 else {
2054 while (c->sibling)
2055 c = c->sibling;
2056 c->sibling = t;
2057 }
2058 }
2059 goto checkattributes;
2060 }
2061 }
2062
2063 /* regular linking through the parent node */
2064 /* Could be treeAttach if this is a frame inside a window */
2065 t->parent = (parent ? parent : treeAttach);
2066 if (prev) {
2067 prev->sibling = t;
2068 } else if (parent) {
2069 parent->firstchild = t;
2070 } else if (treeAttach) {
2071 treeAttach->firstchild = t;
2072 }
2073 prev = t;
2074
2075 checkattributes:
2076 /* check for some common attributes here */
2077 action = t->action;
2078 if (stringInListCI(t->attributes, "onclick") >= 0)
2079 t->onclick = t->doorway = true;
2080 if (stringInListCI(t->attributes, "onchange") >= 0)
2081 t->onchange = t->doorway = true;
2082 if (stringInListCI(t->attributes, "onsubmit") >= 0)
2083 t->onsubmit = t->doorway = true;
2084 if (stringInListCI(t->attributes, "onreset") >= 0)
2085 t->onreset = t->doorway = true;
2086 if (stringInListCI(t->attributes, "onload") >= 0)
2087 t->onload = t->doorway = true;
2088 if (stringInListCI(t->attributes, "onunload") >= 0)
2089 t->onunload = t->doorway = true;
2090 if (stringInListCI(t->attributes, "checked") >= 0)
2091 t->checked = t->rchecked = true;
2092 if (stringInListCI(t->attributes, "readonly") >= 0)
2093 t->rdonly = true;
2094 if (stringInListCI(t->attributes, "disabled") >= 0)
2095 t->disabled = true;
2096 if (stringInListCI(t->attributes, "multiple") >= 0)
2097 t->multiple = true;
2098 if (stringInListCI(t->attributes, "async") >= 0)
2099 t->async = true;
2100 if ((j = stringInListCI(t->attributes, "name")) >= 0) {
2101 /* temporarily, make another copy; some day we'll just point to the value */
2102 v = t->atvals[j];
2103 if (v && !*v)
2104 v = 0;
2105 t->name = cloneString(v);
2106 }
2107 if ((j = stringInListCI(t->attributes, "id")) >= 0) {
2108 v = t->atvals[j];
2109 if (v && !*v)
2110 v = 0;
2111 t->id = cloneString(v);
2112 }
2113 if ((j = stringInListCI(t->attributes, "class")) >= 0) {
2114 v = t->atvals[j];
2115 if (v && !*v)
2116 v = 0;
2117 t->jclass = cloneString(v);
2118 }
2119 if ((j = stringInListCI(t->attributes, "value")) >= 0) {
2120 v = t->atvals[j];
2121 if (v && !*v)
2122 v = 0;
2123 t->value = cloneString(v);
2124 t->rvalue = cloneString(v);
2125 }
2126 // Resolve href against the base, but wait a minute, what if it's <p href=blah>
2127 // and we're not suppose to resolve it? I don't ask about the parent node.
2128 // Well, in general, I don't carry the href attribute into the js node.
2129 // I only do it when it is relevant, such as <a> or <area>.
2130 // See the exceptions in pushAttributes() in this file.
2131 // I know, it's confusing.
2132 if ((j = stringInListCI(t->attributes, "href")) >= 0) {
2133 v = t->atvals[j];
2134 if (v && !*v)
2135 v = 0;
2136 if (v) {
2137 v = resolveURL(cf->hbase, v);
2138 cnzFree(t->atvals[j]);
2139 t->atvals[j] = v;
2140 if (action == TAGACT_BASE && !cf->baseset) {
2141 nzFree(cf->hbase);
2142 cf->hbase = cloneString(v);
2143 cf->baseset = true;
2144 }
2145 t->href = cloneString(v);
2146 }
2147 }
2148 if ((j = stringInListCI(t->attributes, "src")) >= 0) {
2149 v = t->atvals[j];
2150 if (v && !*v)
2151 v = 0;
2152 if (v) {
2153 v = resolveURL(cf->hbase, v);
2154 cnzFree(t->atvals[j]);
2155 t->atvals[j] = v;
2156 if (!t->href)
2157 t->href = cloneString(v);
2158 }
2159 }
2160 if ((j = stringInListCI(t->attributes, "action")) >= 0) {
2161 v = t->atvals[j];
2162 if (v && !*v)
2163 v = 0;
2164 if (v) {
2165 v = resolveURL(cf->hbase, v);
2166 cnzFree(t->atvals[j]);
2167 t->atvals[j] = v;
2168 if (!t->href)
2169 t->href = cloneString(v);
2170 }
2171 }
2172
2173 /* href=javascript:foo() is another doorway into js */
2174 if (t->href && memEqualCI(t->href, "javascript:", 11))
2175 t->doorway = true;
2176 /* And of course the primary doorway */
2177 if (action == TAGACT_SCRIPT) {
2178 t->doorway = true;
2179 t->scriptgen = htmlGenerated;
2180 }
2181
2182 intoTree(t);
2183 }
2184 } /* intoTree */
2185
underKill(Tag * t)2186 void underKill(Tag *t)
2187 {
2188 Tag *u, *v;
2189 for (u = t->firstchild; u; u = v) {
2190 v = u->sibling;
2191 u->sibling = u->parent = 0;
2192 u->deleted = true;
2193 if (!u->jv)
2194 killTag(u);
2195 }
2196 t->firstchild = NULL;
2197 }
2198
killTag(Tag * t)2199 void killTag(Tag *t)
2200 {
2201 Tag *c, *parent;
2202 debugPrint(4, "kill tag %s %d", t->info->name, t->seqno);
2203 t->dead = true;
2204 ++cw->deadTags;
2205 if (t->balance) {
2206 t->balance->dead = true;
2207 ++cw->deadTags;
2208 }
2209 t->deleted = true;
2210 t->jv = NULL;
2211
2212 // unlink it from the tree above.
2213 parent = t->parent;
2214 if (parent) {
2215 t->parent = NULL;
2216 if (parent->firstchild == t)
2217 parent->firstchild = t->sibling;
2218 else {
2219 c = parent->firstchild;
2220 if (c) {
2221 for (; c->sibling; c = c->sibling) {
2222 if (c->sibling != t)
2223 continue;
2224 c->sibling = t->sibling;
2225 break;
2226 }
2227 }
2228 }
2229 }
2230
2231 underKill(t);
2232 }
2233
2234 /* Parse some html, as generated by innerHTML or document.write. */
html_from_setter(jsobjtype inner,const char * h)2235 void html_from_setter(jsobjtype inner, const char *h)
2236 {
2237 Tag *t = NULL;
2238 int l = 0;
2239 debugPrint(4, "Generated {%s}", h);
2240 t = tagFromJavaVar(inner);
2241 if (!t) {
2242 debugPrint(1,
2243 "innerHTML finds no tag for %p, cannot parse",
2244 inner);
2245 return;
2246 }
2247 debugPrint(4, "parse under %s %d", t->info->name, t->seqno);
2248 l = cw->numTags;
2249
2250 /* Cut all the children away from t */
2251 underKill(t);
2252
2253 html2nodes(h, false);
2254 htmlGenerated = true;
2255 htmlNodesIntoTree(l, t);
2256 prerender(0);
2257 innerParent = inner;
2258 decorate(0);
2259 innerParent = 0;
2260 } /* html_from_setter */
2261
processStyles(jsobjtype so,const char * stylestring)2262 static void processStyles(jsobjtype so, const char *stylestring)
2263 {
2264 char *workstring = cloneString(stylestring);
2265 char *s; // gets truncated to the style name
2266 char *sv;
2267 char *next;
2268 for (s = workstring; *s; s = next) {
2269 next = strchr(s, ';');
2270 if (!next) {
2271 next = s + strlen(s);
2272 } else {
2273 *next++ = 0;
2274 skipWhite2(&next);
2275 }
2276 sv = strchr(s, ':');
2277 // if there was something there, but it didn't
2278 // adhere to the expected syntax, skip this pair
2279 if (sv) {
2280 *sv++ = '\0';
2281 skipWhite2(&sv);
2282 trimWhite(s);
2283 trimWhite(sv);
2284 // the property name has to be nonempty
2285 if (*s) {
2286 camelCase(s);
2287 set_property_string(cf, so, s, sv);
2288 // Should we set a specification level here, perhaps high,
2289 // so the css sheets don't overwrite it?
2290 // sv + "$$scy" = 99999;
2291 }
2292 }
2293 }
2294 nzFree(workstring);
2295 } /* processStyles */
2296