1 /*
2 This file is part of the KDE libraries
3
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1999,2001 Lars Knoll (knoll@kde.org)
7 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
8 (C) 2003 Apple Computer, Inc.
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
19
20 You should have received a copy of the GNU Library General Public License
21 along with this library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA.
24 */
25 //----------------------------------------------------------------------------
26 //
27 // KDE HTML Widget -- HTML Parser
28 // #define PARSER_DEBUG
29
30 #include "htmlparser.h"
31
32 #include <dom/dom_exception.h>
33
34 #include <html/html_baseimpl.h>
35 #include <html/html_blockimpl.h>
36 #include <html/html_canvasimpl.h>
37 #include <html/html_documentimpl.h>
38 #include <html/html_elementimpl.h>
39 #include <html/html_formimpl.h>
40 #include <html/html_headimpl.h>
41 #include <html/html_imageimpl.h>
42 #include <html/html_inlineimpl.h>
43 #include <html/html_listimpl.h>
44 #include <html/html_miscimpl.h>
45 #include <html/html_tableimpl.h>
46 #include <html/html_objectimpl.h>
47 #include <html/HTMLAudioElement.h>
48 #include <html/HTMLVideoElement.h>
49 #include <html/HTMLSourceElement.h>
50 #include <xml/dom_textimpl.h>
51 #include <xml/dom_nodeimpl.h>
52 #include <html/htmltokenizer.h>
53 #include <khtmlview.h>
54 #include <khtml_part.h>
55 #include <khtml_global.h>
56 #include <css/cssproperties.h>
57 #include <css/cssvalues.h>
58
59 #include <rendering/render_object.h>
60
61 #include "khtml_debug.h"
62 #include <klocalizedstring.h>
63
64 // Turn off gnu90 inlining to avoid linker errors
65 #undef __GNUC_STDC_INLINE__
66 #undef __GNUC_GNU_INLINE__
67 #include <doctypes.h>
68
69 #undef OPTIONAL // for win32, MinGW
70
71 using namespace DOM;
72 using namespace khtml;
73
74 #ifdef PARSER_DEBUG
getParserPrintableName(int id)75 static QString getParserPrintableName(int id)
76 {
77 if (id >= ID_CLOSE_TAG) {
78 return "/" + getPrintableName(id - ID_CLOSE_TAG);
79 } else {
80 return getPrintableName(id);
81 }
82 }
83 #endif
84
85 //----------------------------------------------------------------------------
86
87 /**
88 * @internal
89 */
90 class HTMLStackElem
91 {
92 public:
HTMLStackElem(int _id,int _level,DOM::NodeImpl * _node,bool _inline_,HTMLStackElem * _next)93 HTMLStackElem(int _id,
94 int _level,
95 DOM::NodeImpl *_node,
96 bool _inline_,
97 HTMLStackElem *_next)
98 :
99 id(_id),
100 level(_level),
101 strayTableContent(false),
102 m_inline(_inline_),
103 node(_node),
104 next(_next)
105 {
106 node->ref();
107 }
108
~HTMLStackElem()109 ~HTMLStackElem()
110 {
111 node->deref();
112 }
113
setNode(NodeImpl * newNode)114 void setNode(NodeImpl *newNode)
115 {
116 newNode->ref();
117 node->deref();
118 node = newNode;
119 }
120
121 int id;
122 int level;
123 bool strayTableContent;
124 bool m_inline;
125 NodeImpl *node;
126 HTMLStackElem *next;
127 };
128
129 /**
130 * @internal
131 *
132 * The parser parses tokenized input into the document, building up the
133 * document tree. If the document is wellformed, parsing it is
134 * straightforward.
135 * Unfortunately, people can't write wellformed HTML documents, so the parser
136 * has to be tolerant about errors.
137 *
138 * We have to take care of the following error conditions:
139 * 1. The element being added is explicitly forbidden inside some outer tag.
140 * In this case we should close all tags up to the one, which forbids
141 * the element, and add it afterwards.
142 * 2. We are not allowed to add the element directly. It could be, that
143 * the person writing the document forgot some tag inbetween (or that the
144 * tag inbetween is optional...) This could be the case with the following
145 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
146 * 3. We wan't to add a block element inside to an inline element. Close all
147 * inline elements up to the next higher block element.
148 * 4. If this doesn't help close elements, until we are allowed to add the
149 * element or ignore the tag.
150 *
151 */
152
KHTMLParser(KHTMLView * _parent,DocumentImpl * doc)153 KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentImpl *doc)
154 {
155 //qCDebug(KHTML_LOG) << "parser constructor";
156 #if SPEED_DEBUG > 0
157 qt.start();
158 #endif
159
160 HTMLWidget = _parent;
161 document = doc;
162
163 blockStack = nullptr;
164 current = nullptr;
165
166 // ID_CLOSE_TAG == Num of tags
167 forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
168
169 reset();
170 }
171
KHTMLParser(DOM::DocumentFragmentImpl * i,DocumentImpl * doc)172 KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentImpl *doc)
173 {
174 HTMLWidget = nullptr;
175 document = doc;
176
177 forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
178
179 blockStack = nullptr;
180 current = nullptr;
181
182 reset();
183
184 setCurrent(i);
185
186 inBody = true;
187 }
188
~KHTMLParser()189 KHTMLParser::~KHTMLParser()
190 {
191 #if SPEED_DEBUG > 0
192 qCDebug(KHTML_LOG) << "TIME: parsing time was = " << qt.elapsed();
193 #endif
194
195 freeBlock();
196
197 if (current) {
198 current->deref();
199 }
200
201 delete [] forbiddenTag;
202 delete isindex;
203 }
204
reset()205 void KHTMLParser::reset()
206 {
207 setCurrent(document);
208
209 freeBlock();
210
211 // before parsing no tags are forbidden...
212 memset(forbiddenTag, 0, (ID_CLOSE_TAG + 1)*sizeof(ushort));
213
214 inBody = false;
215 haveFrameSet = false;
216 haveContent = false;
217 haveBody = false;
218 haveTitle = false;
219 inSelect = false;
220 inStrayTableContent = 0;
221 m_inline = false;
222
223 form = nullptr;
224 map = nullptr;
225 end = false;
226 isindex = nullptr;
227
228 discard_until = 0;
229 }
230
parseToken(Token * t)231 void KHTMLParser::parseToken(Token *t)
232 {
233 if (t->tid > 2 * ID_CLOSE_TAG) {
234 // qCDebug(KHTML_LOG) << "Unknown tag!! tagID = " << t->tid;
235 return;
236 }
237 if (discard_until) {
238 if (t->tid == discard_until) {
239 discard_until = 0;
240 }
241
242 // do not skip </iframe>
243 if (discard_until || current->id() + ID_CLOSE_TAG != t->tid) {
244 return;
245 }
246 }
247
248 #ifdef PARSER_DEBUG
249 qCDebug(KHTML_LOG) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")"
250 << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")";
251 qCDebug(KHTML_LOG) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent;
252 #endif
253
254 // holy shit. apparently some sites use </br> instead of <br>
255 // be compatible with IE and NS
256 if (t->tid == ID_BR + ID_CLOSE_TAG && document->inCompatMode()) {
257 t->tid -= ID_CLOSE_TAG;
258 }
259
260 if (t->tid > ID_CLOSE_TAG) {
261 processCloseTag(t);
262 return;
263 }
264
265 // ignore spaces, if we're not inside a paragraph or other inline code
266 if (t->tid == ID_TEXT && t->text) {
267 if (inBody && !skipMode() &&
268 current->id() != ID_STYLE && current->id() != ID_TITLE &&
269 current->id() != ID_SCRIPT &&
270 !t->text->containsOnlyWhitespace()) {
271 haveContent = true;
272 }
273 #ifdef PARSER_DEBUG
274
275 qCDebug(KHTML_LOG) << "length=" << t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'";
276 #endif
277 }
278
279 NodeImpl *n = getElement(t);
280 // just to be sure, and to catch currently unimplemented stuff
281 if (!n) {
282 return;
283 }
284
285 // set attributes
286 if (n->isElementNode() && t->tid != ID_ISINDEX) {
287 ElementImpl *e = static_cast<ElementImpl *>(n);
288 e->setAttributeMap(t->attrs);
289 }
290
291 // if this tag is forbidden inside the current context, pop
292 // blocks until we are allowed to add it...
293 while (blockStack && forbiddenTag[t->tid]) {
294 #ifdef PARSER_DEBUG
295 qCDebug(KHTML_LOG) << "t->id: " << t->tid << " is forbidden :-( ";
296 #endif
297 popOneBlock();
298 }
299
300 // sometimes flat doesn't make sense
301 switch (t->tid) {
302 case ID_SELECT:
303 case ID_OPTION:
304 t->flat = false;
305 }
306
307 // the tokenizer needs the feedback for space discarding
308 if (tagPriority(t->tid) == 0) {
309 t->flat = true;
310 }
311
312 if (!insertNode(n, t->flat)) {
313 // we couldn't insert the node...
314 #ifdef PARSER_DEBUG
315 qCDebug(KHTML_LOG) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!";
316 #endif
317 if (map == n) {
318 #ifdef PARSER_DEBUG
319 qCDebug(KHTML_LOG) << " --> resetting map!";
320 #endif
321 map = nullptr;
322 }
323 if (form == n) {
324 #ifdef PARSER_DEBUG
325 qCDebug(KHTML_LOG) << " --> resetting form!";
326 #endif
327 form = nullptr;
328 }
329 delete n;
330 }
331 }
332
parseDoctypeToken(DoctypeToken * t)333 void KHTMLParser::parseDoctypeToken(DoctypeToken *t)
334 {
335 // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
336 if (current != document || document->doctype()) {
337 return;
338 }
339
340 DocumentTypeImpl *doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);
341 if (!t->internalSubset.isEmpty()) {
342 doctype->setInternalSubset(t->internalSubset);
343 }
344 document->addChild(doctype);
345
346 // Determine parse mode here
347 // This code more or less mimics Mozilla's implementation.
348 //
349 // There are three possible parse modes:
350 // COMPAT - quirks mode emulates WinIE
351 // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
352 // be omitted from numbers.
353 // ALMOST STRICT - This mode is identical to strict mode
354 // except for its treatment of line-height in the inline box model. For
355 // now (until the inline box model is re-written), this mode is identical
356 // to STANDARDS mode.
357 // STRICT - no quirks apply. Web pages will obey the specifications to
358 // the letter.
359
360 if (!document->isHTMLDocument()) { // FIXME Could document be non-HTML?
361 return;
362 }
363 DOM::HTMLDocumentImpl *htmldoc = static_cast<DOM::HTMLDocumentImpl *>(document);
364 if (t->name.toLower() == "html") {
365 if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) {
366 // Internal subsets always denote full standards, as does
367 // a doctype without a public ID.
368 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
369 } else {
370 // We have to check a list of public IDs to see what we
371 // should do.
372 QString lowerPubID = t->publicID.toLower();
373 QByteArray pubIDStr = lowerPubID.toLocal8Bit();
374
375 // Look up the entry in our gperf-generated table.
376 const PubIDInfo *doctypeEntry = Perfect_Hash::findDoctypeEntry(pubIDStr.constData(), t->publicID.length());
377 if (!doctypeEntry) {
378 // The DOCTYPE is not in the list. Assume strict mode.
379 // ### Doesn't make any sense, but it's what Mozilla does.
380 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
381 } else {
382 switch ((!t->systemID.isEmpty()) ?
383 doctypeEntry->mode_if_sysid :
384 doctypeEntry->mode_if_no_sysid) {
385 case PubIDInfo::eQuirks3:
386 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
387 break;
388 case PubIDInfo::eQuirks:
389 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4);
390 break;
391 case PubIDInfo::eAlmostStandards:
392 htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4);
393 break;
394 default:
395 assert(!"Unknown parse mode");
396 }
397 }
398 }
399 } else {
400 // Malformed doctype implies quirks mode.
401 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
402 }
403 }
404
isTableRelatedTag(int id)405 static bool isTableRelatedTag(int id)
406 {
407 return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
408 id == ID_TH);
409 }
410
insertNode(NodeImpl * n,bool flat)411 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
412 {
413 int id = n->id();
414
415 // <table> is never allowed inside stray table content. Always pop out of the stray table content
416 // and close up the first table, and then start the second table as a sibling.
417 if (inStrayTableContent && id == ID_TABLE) {
418 popBlock(ID_TABLE);
419 }
420
421 // let's be stupid and just try to insert it.
422 // this should work if the document is wellformed
423 #ifdef PARSER_DEBUG
424 NodeImpl *tmp = current;
425 #endif
426 NodeImpl *newNode = current->addChild(n);
427 if (newNode) {
428 #ifdef PARSER_DEBUG
429 qCDebug(KHTML_LOG) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string();
430 #endif
431 // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
432 if (current->id() == ID_TABLE && id == ID_FORM) {
433 flat = true;
434 static_cast<HTMLFormElementImpl *>(n)->setMalformed(true);
435 }
436
437 // don't push elements without end tag on the stack
438 if (tagPriority(id) != 0 && !flat) {
439 #if SPEED_DEBUG < 2
440 if (!n->attached() && HTMLWidget) {
441 n->attach();
442 }
443 #endif
444 if (n->isInline()) {
445 m_inline = true;
446 }
447 pushBlock(id, tagPriority(id));
448 setCurrent(newNode);
449 } else {
450 #if SPEED_DEBUG < 2
451 if (!n->attached() && HTMLWidget) {
452 n->attach();
453 }
454 if (n->maintainsState()) {
455 document->registerMaintainsState(n);
456 document->attemptRestoreState(n);
457 }
458 n->close();
459 #endif
460 if (n->isInline()) {
461 m_inline = true;
462 }
463 }
464
465 #if SPEED_DEBUG < 1
466 if (tagPriority(id) == 0 && n->renderer()) {
467 n->renderer()->calcMinMaxWidth();
468 }
469 #endif
470 return true;
471 } else {
472 #ifdef PARSER_DEBUG
473 qCDebug(KHTML_LOG) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string();
474 #endif
475 // error handling...
476 HTMLElementImpl *e;
477 bool handled = false;
478
479 // first switch on current element for elements with optional end-tag and inline-only content
480 switch (current->id()) {
481 case ID_P:
482 case ID_DT:
483 if (!n->isInline()) {
484 popBlock(current->id());
485 return insertNode(n);
486 }
487 break;
488 case ID_TITLE:
489 popBlock(current->id());
490 return insertNode(n);
491 default:
492 break;
493 }
494
495 // switch according to the element to insert
496 switch (id) {
497 case ID_TR:
498 case ID_TH:
499 case ID_TD:
500 if (inStrayTableContent && !isTableRelatedTag(current->id())) {
501 // pop out to the nearest enclosing table-related tag.
502 while (blockStack && !isTableRelatedTag(current->id())) {
503 popOneBlock();
504 }
505 return insertNode(n);
506 }
507 break;
508 case ID_HEAD:
509 // ### allow not having <HTML> in at all, as per HTML spec
510 if (!current->isDocumentNode() && current->id() != ID_HTML) {
511 return false;
512 }
513 break;
514 case ID_COMMENT:
515 if (head) {
516 break;
517 }
518 case ID_META:
519 case ID_LINK:
520 case ID_ISINDEX:
521 case ID_BASE:
522 if (!head) {
523 createHead();
524 }
525 if (head) {
526 if (head->addChild(n)) {
527 #if SPEED_DEBUG < 2
528 if (!n->attached() && HTMLWidget) {
529 n->attach();
530 }
531 #endif
532 }
533
534 return true;
535 }
536
537 break;
538 case ID_HTML:
539 if (!current->isDocumentNode()) {
540 if (doc()->documentElement()->id() == ID_HTML) {
541 // we have another <HTML> element.... apply attributes to existing one
542 // make sure we don't overwrite already existing attributes
543 NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
544 NamedAttrMapImpl *bmap = static_cast<ElementImpl *>(doc()->documentElement())->attributes(false);
545 bool changed = false;
546 for (unsigned long l = 0; map && l < map->length(); ++l) {
547 NodeImpl::Id attrId = map->idAt(l);
548 DOMStringImpl *attrValue = map->valueAt(l);
549 changed = !bmap->getValue(attrId);
550 bmap->setValue(attrId, attrValue);
551 }
552 if (changed) {
553 doc()->recalcStyle(NodeImpl::Inherit);
554 }
555 }
556 return false;
557 }
558 break;
559 case ID_TITLE:
560 case ID_STYLE:
561 if (!head) {
562 createHead();
563 }
564 if (head) {
565 DOM::NodeImpl *newNode = head->addChild(n);
566 if (newNode) {
567 pushBlock(id, tagPriority(id));
568 setCurrent(newNode);
569 #if SPEED_DEBUG < 2
570 if (!n->attached() && HTMLWidget) {
571 n->attach();
572 }
573 #endif
574 } else {
575 #ifdef PARSER_DEBUG
576 qCDebug(KHTML_LOG) << "adding style before to body failed!!!!";
577 #endif
578 discard_until = ID_STYLE + ID_CLOSE_TAG;
579 return false;
580 }
581 return true;
582 } else if (inBody) {
583 discard_until = id + ID_CLOSE_TAG;
584 return false;
585 }
586 break;
587 case ID_SCRIPT:
588 // if we failed to insert it, go into skip mode
589 discard_until = id + ID_CLOSE_TAG;
590 break;
591 case ID_BODY:
592 if (inBody && doc()->body()) {
593 // we have another <BODY> element.... apply attributes to existing one
594 // make sure we don't overwrite already existing attributes
595 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
596 NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
597 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
598 bool changed = false;
599 for (unsigned long l = 0; map && l < map->length(); ++l) {
600 NodeImpl::Id attrId = map->idAt(l);
601 DOMStringImpl *attrValue = map->valueAt(l);
602 if (!bmap->getValue(attrId)) {
603 bmap->setValue(attrId, attrValue);
604 changed = true;
605 }
606 }
607 if (changed) {
608 doc()->recalcStyle(NodeImpl::Inherit);
609 }
610 } else if (current->isDocumentNode()) {
611 break;
612 }
613 return false;
614 break;
615
616 // the following is a hack to move non rendered elements
617 // outside of tables.
618 // needed for broken constructs like <table><form ...><tr>....
619 case ID_INPUT: {
620 ElementImpl *e = static_cast<ElementImpl *>(n);
621 DOMString type = e->getAttribute(ATTR_TYPE);
622
623 if (strcasecmp(type, "hidden") != 0) {
624 break;
625 }
626 // Fall through!
627 }
628 case ID_TEXT: {
629 // Don't try to fit random white-space anywhere
630 TextImpl *t = static_cast<TextImpl *>(n);
631 if (t->containsOnlyWhitespace()) {
632 return false;
633 }
634 // ignore text inside the following elements.
635 switch (current->id()) {
636 case ID_SELECT:
637 return false;
638 default:
639 ;
640 // fall through!!
641 };
642 break;
643 }
644 case ID_DL:
645 popBlock(ID_DT);
646 if (current->id() == ID_DL) {
647 e = new HTMLGenericElementImpl(document, ID_DD);
648 insertNode(e);
649 handled = true;
650 }
651 break;
652 case ID_DT:
653 e = new HTMLDListElementImpl(document);
654 if (insertNode(e)) {
655 insertNode(n);
656 return true;
657 }
658 break;
659 case ID_AREA: {
660 if (map) {
661 map->addChild(n);
662 #if SPEED_DEBUG < 2
663 if (!n->attached() && HTMLWidget) {
664 n->attach();
665 }
666 #endif
667 handled = true;
668 return true;
669 } else {
670 return false;
671 }
672 }
673
674 case ID_THEAD:
675 case ID_TBODY:
676 case ID_TFOOT:
677 case ID_CAPTION:
678 case ID_COLGROUP: {
679 if (isTableRelatedTag(current->id())) {
680 while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id())) {
681 popOneBlock();
682 }
683 return insertNode(n);
684 }
685 }
686 default:
687 break;
688 }
689
690 // switch on the currently active element
691 switch (current->id()) {
692 case ID_HTML:
693 switch (id) {
694 case ID_SCRIPT:
695 case ID_STYLE:
696 case ID_META:
697 case ID_LINK:
698 case ID_OBJECT:
699 case ID_EMBED:
700 case ID_TITLE:
701 case ID_ISINDEX:
702 case ID_BASE:
703 if (!head) {
704 head = new HTMLHeadElementImpl(document);
705 insertNode(head.get());
706 handled = true;
707 }
708 break;
709 case ID_TEXT: {
710 TextImpl *t = static_cast<TextImpl *>(n);
711 if (t->containsOnlyWhitespace()) {
712 return false;
713 }
714 /* Fall through to default */
715 }
716 default:
717 if (haveFrameSet) {
718 break;
719 }
720 e = new HTMLBodyElementImpl(document);
721 startBody();
722 insertNode(e);
723 handled = true;
724 break;
725 }
726 break;
727 case ID_HEAD:
728 // we can get here only if the element is not allowed in head.
729 if (id == ID_HTML) {
730 return false;
731 } else {
732 // This means the body starts here...
733 if (haveFrameSet) {
734 break;
735 }
736 popBlock(ID_HEAD);
737 e = new HTMLBodyElementImpl(document);
738 startBody();
739 insertNode(e);
740 handled = true;
741 }
742 break;
743 case ID_BODY:
744 break;
745 case ID_CAPTION:
746 // Illegal content in a caption. Close the caption and try again.
747 popBlock(ID_CAPTION);
748 switch (id) {
749 case ID_THEAD:
750 case ID_TFOOT:
751 case ID_TBODY:
752 case ID_TR:
753 case ID_TD:
754 case ID_TH:
755 return insertNode(n, flat);
756 }
757 break;
758 case ID_TABLE:
759 case ID_THEAD:
760 case ID_TFOOT:
761 case ID_TBODY:
762 case ID_TR:
763 switch (id) {
764 case ID_TABLE:
765 popBlock(ID_TABLE); // end the table
766 handled = checkChild(current->id(), id, doc()->inStrictMode());
767 break;
768 default: {
769 NodeImpl *node = current;
770 NodeImpl *parent = node->parentNode();
771 // A script may have removed the current node's parent from the DOM
772 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
773 // FIXME: we should do real recovery here and re-parent with the correct node.
774 if (!parent) {
775 return false;
776 }
777 NodeImpl *parentparent = parent->parentNode();
778
779 if (n->isTextNode() ||
780 (node->id() == ID_TR &&
781 (parent->id() == ID_THEAD ||
782 parent->id() == ID_TBODY ||
783 parent->id() == ID_TFOOT) && parentparent->id() == ID_TABLE) ||
784 (!checkChild(ID_TR, id) && (node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT) &&
785 parent->id() == ID_TABLE)) {
786 node = (node->id() == ID_TABLE) ? node :
787 ((node->id() == ID_TR) ? parentparent : parent);
788 NodeImpl *parent = node->parentNode();
789 if (!parent) {
790 return false;
791 }
792 int exceptioncode = 0;
793 #ifdef PARSER_DEBUG
794 qCDebug(KHTML_LOG) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")";
795 #endif
796 parent->insertBefore(n, node, exceptioncode);
797 if (exceptioncode) {
798 #ifndef PARSER_DEBUG
799 if (!n->isTextNode())
800 #endif
801 // qCDebug(KHTML_LOG) << "adding content before table failed..";
802 break;
803 }
804 if (n->isElementNode() && tagPriority(id) != 0 &&
805 !flat && endTagRequirement(id) != DOM::FORBIDDEN) {
806
807 pushBlock(id, tagPriority(id));
808 setCurrent(n);
809 inStrayTableContent++;
810 blockStack->strayTableContent = true;
811 }
812 return true;
813 }
814
815 if (current->id() == ID_TR) {
816 e = new HTMLTableCellElementImpl(document, ID_TD);
817 } else if (current->id() == ID_TABLE) {
818 e = new HTMLTableSectionElementImpl(document, ID_TBODY, true /* implicit */);
819 } else {
820 e = new HTMLTableRowElementImpl(document);
821 }
822
823 insertNode(e);
824 handled = true;
825 break;
826 } // end default
827 } // end switch
828 break;
829 case ID_OBJECT:
830 discard_until = id + ID_CLOSE_TAG;
831 return false;
832 case ID_UL:
833 case ID_OL:
834 case ID_DIR:
835 case ID_MENU:
836 e = new HTMLLIElementImpl(document);
837 e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
838 insertNode(e);
839 handled = true;
840 break;
841 case ID_FORM:
842 popBlock(ID_FORM);
843 handled = true;
844 break;
845 case ID_SELECT:
846 if (n->isInline()) {
847 return false;
848 }
849 break;
850 case ID_P:
851 case ID_H1:
852 case ID_H2:
853 case ID_H3:
854 case ID_H4:
855 case ID_H5:
856 case ID_H6:
857 if (!n->isInline()) {
858 popBlock(current->id());
859 handled = true;
860 }
861 break;
862 case ID_OPTION:
863 case ID_OPTGROUP:
864 if (id == ID_OPTGROUP) {
865 popBlock(current->id());
866 handled = true;
867 } else if (id == ID_SELECT) {
868 // IE treats a nested select as </select>. Let's do the same
869 popBlock(ID_SELECT);
870 break;
871 }
872 break;
873 // head elements in the body should be ignored.
874
875 case ID_ADDRESS:
876 case ID_COLGROUP:
877 case ID_FONT:
878 popBlock(current->id());
879 handled = true;
880 break;
881 default:
882 if (current->isDocumentNode()) {
883 DocumentImpl *doc = static_cast<DocumentImpl *>(current);
884 if (!doc->documentElement()) {
885 e = new HTMLHtmlElementImpl(document);
886 insertNode(e);
887 handled = true;
888 }
889 } else if (current->isInline()) {
890 popInlineBlocks();
891 handled = true;
892 }
893 }
894
895 // if we couldn't handle the error, just rethrow the exception...
896 if (!handled) {
897 //qCDebug(KHTML_LOG) << "Exception handler failed in HTMLPArser::insertNode()";
898 return false;
899 }
900
901 return insertNode(n);
902 }
903 }
904
getElement(Token * t)905 NodeImpl *KHTMLParser::getElement(Token *t)
906 {
907 NodeImpl *n = nullptr;
908
909 switch (t->tid) {
910 case ID_HTML:
911 n = new HTMLHtmlElementImpl(document);
912 break;
913 case ID_HEAD:
914 if (!head && (current->id() == ID_HTML || current->isDocumentNode())) {
915 head = new HTMLHeadElementImpl(document);
916 n = head.get();
917 }
918 break;
919 case ID_BODY:
920 // body no longer allowed if we have a frameset
921 if (haveFrameSet) {
922 break;
923 }
924 popBlock(ID_HEAD);
925 n = new HTMLBodyElementImpl(document);
926 haveBody = true;
927 startBody();
928 break;
929
930 // head elements
931 case ID_BASE:
932 n = new HTMLBaseElementImpl(document);
933 break;
934 case ID_LINK:
935 n = new HTMLLinkElementImpl(document);
936 break;
937 case ID_META:
938 n = new HTMLMetaElementImpl(document);
939 break;
940 case ID_STYLE:
941 n = new HTMLStyleElementImpl(document);
942 break;
943 case ID_TITLE:
944 // only one non-empty <title> allowed
945 if (haveTitle) {
946 discard_until = ID_TITLE + ID_CLOSE_TAG;
947 break;
948 }
949 n = new HTMLTitleElementImpl(document);
950 // we'll set haveTitle when closing the tag
951 break;
952
953 // frames
954 case ID_FRAME:
955 n = new HTMLFrameElementImpl(document);
956 break;
957 case ID_FRAMESET:
958 popBlock(ID_HEAD);
959 if (inBody && !haveFrameSet && !haveContent && !haveBody) {
960 popBlock(ID_BODY);
961 // ### actually for IE document.body returns the now hidden "body" element
962 // we can't implement that behavior now because it could cause too many
963 // regressions and the headaches are not worth the work as long as there is
964 // no site actually relying on that detail (Dirk)
965 if (static_cast<HTMLDocumentImpl *>(document)->body())
966 static_cast<HTMLDocumentImpl *>(document)->body()
967 ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE);
968 inBody = false;
969 }
970 if ((haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML) {
971 break;
972 }
973 n = new HTMLFrameSetElementImpl(document);
974 haveFrameSet = true;
975 startBody();
976 break;
977 // a bit a special case, since the frame is inlined...
978 case ID_IFRAME:
979 n = new HTMLIFrameElementImpl(document);
980 break;
981
982 // form elements
983 case ID_FORM:
984 // thou shall not nest <form> - NS/IE quirk
985 if (form) {
986 break;
987 }
988 n = form = new HTMLFormElementImpl(document, false);
989 break;
990 case ID_BUTTON:
991 n = new HTMLButtonElementImpl(document, form);
992 break;
993 case ID_FIELDSET:
994 n = new HTMLFieldSetElementImpl(document, form);
995 break;
996 case ID_INPUT:
997 if (t->attrs &&
998 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
999 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
1000 !strcasecmp(t->attrs->getValue(ATTR_TYPE), "image")) {
1001 const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1002 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1003 return nullptr;
1004 }
1005 }
1006 n = new HTMLInputElementImpl(document, form);
1007 break;
1008 case ID_ISINDEX:
1009 n = handleIsindex(t);
1010 if (!inBody) {
1011 isindex = n;
1012 n = nullptr;
1013 } else {
1014 t->flat = true;
1015 }
1016 break;
1017 case ID_KEYGEN:
1018 n = new HTMLKeygenElementImpl(document, form);
1019 break;
1020 case ID_LABEL:
1021 n = new HTMLLabelElementImpl(document);
1022 break;
1023 case ID_LEGEND:
1024 n = new HTMLLegendElementImpl(document, form);
1025 break;
1026 case ID_OPTGROUP:
1027 n = new HTMLOptGroupElementImpl(document, form);
1028 break;
1029 case ID_OPTION:
1030 popOptionalBlock(ID_OPTION);
1031 n = new HTMLOptionElementImpl(document, form);
1032 break;
1033 case ID_SELECT:
1034 inSelect = true;
1035 n = new HTMLSelectElementImpl(document, form);
1036 break;
1037 case ID_TEXTAREA:
1038 n = new HTMLTextAreaElementImpl(document, form);
1039 break;
1040
1041 // lists
1042 case ID_DL:
1043 n = new HTMLDListElementImpl(document);
1044 break;
1045 case ID_DD:
1046 popOptionalBlock(ID_DT);
1047 popOptionalBlock(ID_DD);
1048 n = new HTMLGenericElementImpl(document, t->tid);
1049 break;
1050 case ID_DT:
1051 popOptionalBlock(ID_DD);
1052 popOptionalBlock(ID_DT);
1053 n = new HTMLGenericElementImpl(document, t->tid);
1054 break;
1055 case ID_UL: {
1056 n = new HTMLUListElementImpl(document);
1057 break;
1058 }
1059 case ID_OL: {
1060 n = new HTMLOListElementImpl(document);
1061 break;
1062 }
1063 case ID_DIR:
1064 n = new HTMLDirectoryElementImpl(document);
1065 break;
1066 case ID_MENU:
1067 n = new HTMLMenuElementImpl(document);
1068 break;
1069 case ID_LI:
1070 popOptionalBlock(ID_LI);
1071 n = new HTMLLIElementImpl(document);
1072 break;
1073 // formatting elements (block)
1074 case ID_BLOCKQUOTE:
1075 n = new HTMLGenericElementImpl(document, t->tid);
1076 break;
1077 case ID_LAYER:
1078 case ID_ILAYER:
1079 n = new HTMLLayerElementImpl(document, t->tid);
1080 break;
1081 case ID_P:
1082 case ID_DIV:
1083 n = new HTMLDivElementImpl(document, t->tid);
1084 break;
1085 case ID_H1:
1086 case ID_H2:
1087 case ID_H3:
1088 case ID_H4:
1089 case ID_H5:
1090 case ID_H6:
1091 n = new HTMLGenericElementImpl(document, t->tid);
1092 break;
1093 case ID_HR:
1094 n = new HTMLHRElementImpl(document);
1095 break;
1096 case ID_PRE:
1097 case ID_XMP:
1098 case ID_PLAINTEXT:
1099 case ID_LISTING:
1100 n = new HTMLPreElementImpl(document, t->tid);
1101 break;
1102
1103 // font stuff
1104 case ID_BASEFONT:
1105 n = new HTMLBaseFontElementImpl(document);
1106 break;
1107 case ID_FONT:
1108 n = new HTMLFontElementImpl(document);
1109 break;
1110
1111 // ins/del
1112 case ID_DEL:
1113 case ID_INS:
1114 n = new HTMLGenericElementImpl(document, t->tid);
1115 break;
1116
1117 // anchor
1118 case ID_A:
1119 popBlock(ID_A);
1120
1121 n = new HTMLAnchorElementImpl(document);
1122 break;
1123
1124 // images
1125 case ID_IMAGE:
1126 case ID_IMG:
1127 if (t->attrs &&
1128 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
1129 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled()) {
1130 const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1131 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1132 return nullptr;
1133 }
1134 }
1135 n = new HTMLImageElementImpl(document, form);
1136 break;
1137
1138 case ID_CANVAS:
1139 n = new HTMLCanvasElementImpl(document);
1140 break;
1141
1142 case ID_MAP:
1143 map = new HTMLMapElementImpl(document);
1144 n = map;
1145 break;
1146 case ID_AREA:
1147 n = new HTMLAreaElementImpl(document);
1148 break;
1149
1150 // objects, applets and scripts
1151 case ID_APPLET:
1152 n = new HTMLAppletElementImpl(document);
1153 break;
1154 case ID_EMBED:
1155 n = new HTMLEmbedElementImpl(document);
1156 break;
1157 case ID_OBJECT:
1158 n = new HTMLObjectElementImpl(document);
1159 break;
1160 case ID_PARAM:
1161 n = new HTMLParamElementImpl(document);
1162 break;
1163 case ID_SCRIPT: {
1164 HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document);
1165 scriptElement->setCreatedByParser(true);
1166 n = scriptElement;
1167 break;
1168 }
1169
1170 // media
1171 case ID_AUDIO:
1172 n = new HTMLAudioElement(document);
1173 break;
1174 case ID_VIDEO:
1175 n = new HTMLVideoElement(document);
1176 break;
1177 case ID_SOURCE:
1178 n = new HTMLSourceElement(document);
1179 break;
1180
1181 // tables
1182 case ID_TABLE:
1183 n = new HTMLTableElementImpl(document);
1184 break;
1185 case ID_CAPTION:
1186 n = new HTMLTableCaptionElementImpl(document);
1187 break;
1188 case ID_COLGROUP:
1189 case ID_COL:
1190 n = new HTMLTableColElementImpl(document, t->tid);
1191 break;
1192 case ID_TR:
1193 popBlock(ID_TR);
1194 n = new HTMLTableRowElementImpl(document);
1195 break;
1196 case ID_TD:
1197 case ID_TH:
1198 popBlock(ID_TH);
1199 popBlock(ID_TD);
1200 n = new HTMLTableCellElementImpl(document, t->tid);
1201 break;
1202 case ID_TBODY:
1203 case ID_THEAD:
1204 case ID_TFOOT:
1205 popBlock(ID_THEAD);
1206 popBlock(ID_TBODY);
1207 popBlock(ID_TFOOT);
1208 n = new HTMLTableSectionElementImpl(document, t->tid, false);
1209 break;
1210
1211 // inline elements
1212 case ID_BR:
1213 n = new HTMLBRElementImpl(document);
1214 break;
1215 case ID_Q:
1216 n = new HTMLGenericElementImpl(document, t->tid);
1217 break;
1218
1219 // elements with no special representation in the DOM
1220
1221 // block:
1222 case ID_ADDRESS:
1223 case ID_CENTER:
1224 n = new HTMLGenericElementImpl(document, t->tid);
1225 break;
1226 // inline
1227 // %fontstyle
1228 case ID_TT:
1229 case ID_U:
1230 case ID_B:
1231 case ID_I:
1232 case ID_S:
1233 case ID_STRIKE:
1234 case ID_BIG:
1235 case ID_SMALL:
1236
1237 // %phrase
1238 case ID_EM:
1239 case ID_STRONG:
1240 case ID_DFN:
1241 case ID_CODE:
1242 case ID_SAMP:
1243 case ID_KBD:
1244 case ID_VAR:
1245 case ID_CITE:
1246 case ID_ABBR:
1247 case ID_ACRONYM:
1248
1249 // %special
1250 case ID_SUB:
1251 case ID_SUP:
1252 case ID_SPAN:
1253 case ID_WBR:
1254 case ID_NOBR:
1255 if (t->tid == ID_NOBR || t->tid == ID_WBR) {
1256 popOptionalBlock(t->tid);
1257 }
1258 case ID_BDO:
1259 n = new HTMLGenericElementImpl(document, t->tid);
1260 break;
1261
1262 // these are special, and normally not rendered
1263 case ID_NOEMBED:
1264 if (!t->flat) {
1265 n = new HTMLGenericElementImpl(document, t->tid);
1266 discard_until = ID_NOEMBED + ID_CLOSE_TAG;
1267 }
1268 return n;
1269 case ID_NOFRAMES:
1270 if (!t->flat) {
1271 n = new HTMLGenericElementImpl(document, t->tid);
1272 discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
1273 }
1274 return n;
1275 case ID_NOSCRIPT:
1276 if (!t->flat) {
1277 n = new HTMLGenericElementImpl(document, t->tid);
1278 if (HTMLWidget && HTMLWidget->part()->jScriptEnabled()) {
1279 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
1280 }
1281 }
1282 return n;
1283 case ID_NOLAYER:
1284 // discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1285 return nullptr;
1286 break;
1287 case ID_MARQUEE:
1288 n = new HTMLMarqueeElementImpl(document);
1289 break;
1290 // text
1291 case ID_TEXT:
1292 // qCDebug(KHTML_LOG) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1293 n = new TextImpl(document, t->text);
1294 break;
1295 case ID_COMMENT:
1296 n = new CommentImpl(document, t->text);
1297 break;
1298 default:
1299 n = new HTMLGenericElementImpl(document, t->tid);
1300 break;
1301 // qCDebug(KHTML_LOG) << "Unknown tag " << t->tid << "!";
1302 }
1303 return n;
1304 }
1305
processCloseTag(Token * t)1306 void KHTMLParser::processCloseTag(Token *t)
1307 {
1308 // FIXME: the below only behaves according to "in body" insertion mode (HTML5 8.2.5.10)
1309 // - might need fixing when we have other insertion modes.
1310 switch (t->tid) {
1311 case ID_HTML+ID_CLOSE_TAG:
1312 case ID_BODY+ID_CLOSE_TAG:
1313 // we never trust those close tags, since stupid webpages close
1314 // them prematurely
1315 return;
1316 case ID_FORM+ID_CLOSE_TAG: // needs additional error checking. See spec.
1317 form = nullptr;
1318 if (!isElementInScope(ID_FORM)) {
1319 // Parse error. Ignore.
1320 return;
1321 }
1322 // this one is to get the right style on the body element
1323 break;
1324 case ID_MAP+ID_CLOSE_TAG:
1325 map = nullptr;
1326 break;
1327 case ID_SELECT+ID_CLOSE_TAG:
1328 inSelect = false;
1329 break;
1330 case ID_TITLE+ID_CLOSE_TAG:
1331 // Set haveTitle only if <title> isn't empty
1332 if (current->firstChild()) {
1333 haveTitle = true;
1334 }
1335 break;
1336 case ID_P+ID_CLOSE_TAG:
1337 if (!isElementInScope(ID_P)) {
1338 // Parse error. Handle as if <p> had been seen.
1339 t->tid = ID_P;
1340 parseToken(t);
1341 popBlock(ID_P);
1342 return;
1343 }
1344 break;
1345 case ID_ADDRESS+ID_CLOSE_TAG:
1346 // case ID_ARTICLE+ID_CLOSE_TAG:
1347 case ID_BLOCKQUOTE+ID_CLOSE_TAG:
1348 case ID_CENTER+ID_CLOSE_TAG:
1349 // case ID_DATAGRID+ID_CLOSE_TAG:
1350 // case ID_DETAILS+ID_CLOSE_TAG:
1351 // case ID_DIALOG+ID_CLOSE_TAG:
1352 case ID_DIR+ID_CLOSE_TAG:
1353 case ID_DIV+ID_CLOSE_TAG:
1354 case ID_DL+ID_CLOSE_TAG:
1355 case ID_FIELDSET+ID_CLOSE_TAG:
1356 // case ID_FIGURE+ID_CLOSE_TAG:
1357 // case ID_FOOTER+ID_CLOSE_TAG:
1358 // case ID_HEADER+ID_CLOSE_TAG:
1359 case ID_LISTING+ID_CLOSE_TAG:
1360 case ID_MENU+ID_CLOSE_TAG:
1361 // case ID_NAV+ID_CLOSE_TAG:
1362 case ID_OL+ID_CLOSE_TAG:
1363 case ID_PRE+ID_CLOSE_TAG:
1364 // case ID_SECTION+ID_CLOSE_TAG:
1365 case ID_UL+ID_CLOSE_TAG:
1366
1367 case ID_DD+ID_CLOSE_TAG:
1368 case ID_DT+ID_CLOSE_TAG:
1369 case ID_LI+ID_CLOSE_TAG:
1370
1371 case ID_APPLET+ID_CLOSE_TAG: // those four should also "Clear the list of active formatting elements
1372 case ID_BUTTON+ID_CLOSE_TAG: // up to the last marker." whenever we implement adoption agency.
1373 case ID_MARQUEE+ID_CLOSE_TAG:
1374 case ID_OBJECT+ID_CLOSE_TAG:
1375
1376 case ID_HEAD+ID_CLOSE_TAG: // ### according to HTML5, should be treated as 'Any other end tag'
1377 // We'll do that when proper 'Any other end tag' handling is implemented.
1378 // In the meantime, test scoping at least (#170694)
1379
1380 if (!isElementInScope(t->tid - ID_CLOSE_TAG)) {
1381 // Parse error. Ignore token.
1382 return;
1383 }
1384 break;
1385 case ID_H1:
1386 case ID_H2:
1387 case ID_H3:
1388 case ID_H4:
1389 case ID_H5:
1390 case ID_H6:
1391 if (!isHeadingInScope()) {
1392 // Parse error. Ignore token.
1393 return;
1394 }
1395 break;
1396 case ID_A: // Formatting elements - will need special handling - cf. HTML5 "adoption agency algorithm"
1397 case ID_B: // meant to replace the "residual style" handling we have now.
1398 case ID_BIG:
1399 case ID_CODE:
1400 case ID_EM:
1401 case ID_FONT:
1402 case ID_I:
1403 case ID_NOBR:
1404 case ID_S:
1405 case ID_SMALL:
1406 case ID_STRIKE:
1407 case ID_STRONG:
1408 case ID_TT:
1409 case ID_U:
1410 break;
1411
1412 default:
1413 // otherTag = true; // FIXME: implement 'Any other end tag' handling
1414 break;
1415 }
1416
1417 #ifdef PARSER_DEBUG
1418 qCDebug(KHTML_LOG) << "added the following children to " << current->nodeName().string();
1419 NodeImpl *child = current->firstChild();
1420 while (child != 0) {
1421 qCDebug(KHTML_LOG) << " " << child->nodeName().string();
1422 child = child->nextSibling();
1423 }
1424 #endif
1425
1426 generateImpliedEndTags(t->tid - ID_CLOSE_TAG);
1427 popBlock(t->tid - ID_CLOSE_TAG);
1428
1429 #ifdef PARSER_DEBUG
1430 qCDebug(KHTML_LOG) << "closeTag --> current = " << current->nodeName().string();
1431 #endif
1432 }
1433
isResidualStyleTag(int _id)1434 bool KHTMLParser::isResidualStyleTag(int _id)
1435 {
1436 switch (_id) {
1437 case ID_A:
1438 case ID_B:
1439 case ID_BIG:
1440 case ID_EM:
1441 case ID_FONT:
1442 case ID_I:
1443 case ID_NOBR:
1444 case ID_S:
1445 case ID_SMALL:
1446 case ID_STRIKE:
1447 case ID_STRONG:
1448 case ID_TT:
1449 case ID_U:
1450 case ID_DFN:
1451 case ID_CODE:
1452 case ID_SAMP:
1453 case ID_KBD:
1454 case ID_VAR:
1455 case ID_DEL:
1456 case ID_INS:
1457 return true;
1458 default:
1459 return false;
1460 }
1461 }
1462
isAffectedByResidualStyle(int _id)1463 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1464 {
1465 if (isResidualStyleTag(_id)) {
1466 return true;
1467 }
1468
1469 switch (_id) {
1470 case ID_P:
1471 case ID_DIV:
1472 case ID_BLOCKQUOTE:
1473 case ID_ADDRESS:
1474 case ID_H1:
1475 case ID_H2:
1476 case ID_H3:
1477 case ID_H4:
1478 case ID_H5:
1479 case ID_H6:
1480 case ID_CENTER:
1481 case ID_UL:
1482 case ID_OL:
1483 case ID_LI:
1484 case ID_DL:
1485 case ID_DT:
1486 case ID_DD:
1487 case ID_PRE:
1488 case ID_LISTING:
1489 return true;
1490 default:
1491 return false;
1492 }
1493 }
1494
handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem * elem)1495 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem)
1496 {
1497 // Find the element that crosses over to a higher level.
1498 // ### For now, if there is more than one, we will only make sure we close the residual style.
1499 int exceptionCode = 0;
1500 HTMLStackElem *curr = blockStack;
1501 HTMLStackElem *maxElem = nullptr;
1502 HTMLStackElem *endElem = nullptr;
1503 HTMLStackElem *prev = nullptr;
1504 HTMLStackElem *prevMaxElem = nullptr;
1505 bool advancedResidual = false; // ### if set we only close the residual style
1506 while (curr && curr != elem) {
1507 if (curr->level > elem->level) {
1508 if (!isAffectedByResidualStyle(curr->id)) {
1509 return;
1510 }
1511 if (maxElem) {
1512 advancedResidual = true;
1513 } else {
1514 endElem = curr;
1515 }
1516 maxElem = curr;
1517 prevMaxElem = prev;
1518 }
1519
1520 prev = curr;
1521 curr = curr->next;
1522 }
1523
1524 if (!curr || !maxElem) {
1525 return;
1526 }
1527
1528 NodeImpl *residualElem = prev->node;
1529 NodeImpl *blockElem = prevMaxElem ? prevMaxElem->node : current;
1530 RefPtr<NodeImpl> parentElem = elem->node;
1531
1532 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1533 // FIXME: We should either always allow it or perform an additional fixup instead of
1534 // just bailing here.
1535 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1536 if (!parentElem->childAllowed(blockElem)) {
1537 return;
1538 }
1539
1540 if (maxElem->node->parentNode() != elem->node && !advancedResidual) {
1541 // Walk the stack and remove any elements that aren't residual style tags. These
1542 // are basically just being closed up. Example:
1543 // <font><span>Moo<p>Goo</font></p>.
1544 // In the above example, the <span> doesn't need to be reopened. It can just close.
1545 HTMLStackElem *currElem = maxElem->next;
1546 HTMLStackElem *prevElem = maxElem;
1547 while (currElem != elem) {
1548 HTMLStackElem *nextElem = currElem->next;
1549 if (!isResidualStyleTag(currElem->id)) {
1550 prevElem->next = nextElem;
1551 prevElem->setNode(currElem->node);
1552 delete currElem;
1553 } else {
1554 prevElem = currElem;
1555 }
1556 currElem = nextElem;
1557 }
1558
1559 // We have to reopen residual tags in between maxElem and elem. An example of this case s:
1560 // <font><i>Moo<p>Foo</font>.
1561 // In this case, we need to transform the part before the <p> into:
1562 // <font><i>Moo</i></font><i>
1563 // so that the <i> will remain open. This involves the modification of elements
1564 // in the block stack.
1565 // This will also affect how we ultimately reparent the block, since we want it to end up
1566 // under the reopened residual tags (e.g., the <i> in the above example.)
1567 RefPtr<NodeImpl> prevNode = nullptr;
1568 RefPtr<NodeImpl> currNode = nullptr;
1569 currElem = maxElem;
1570 while (currElem->node != residualElem) {
1571 if (isResidualStyleTag(currElem->node->id())) {
1572 // Create a clone of this element.
1573 currNode = currElem->node->cloneNode(false);
1574 currElem->node->close();
1575 removeForbidden(currElem->id, forbiddenTag);
1576
1577 // Change the stack element's node to point to the clone.
1578 currElem->setNode(currNode.get());
1579
1580 // Attach the previous node as a child of this new node.
1581 if (prevNode) {
1582 currNode->appendChild(prevNode.get(), exceptionCode);
1583 } else { // The new parent for the block element is going to be the innermost clone.
1584 parentElem = currNode;
1585 }
1586
1587 prevNode = currNode;
1588 }
1589
1590 currElem = currElem->next;
1591 }
1592
1593 // Now append the chain of new residual style elements if one exists.
1594 if (prevNode) {
1595 elem->node->appendChild(prevNode.get(), exceptionCode);
1596 }
1597 }
1598
1599 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1600 // All content of |blockElem| is reparented to be under this clone. We then
1601 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1602 // be performed to fix up the rendering tree.
1603 // So for this example: <b>...<p>Foo</b>Goo</p>
1604 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1605 //
1606 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1607 SharedPtr<NodeImpl> guard(blockElem);
1608 blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1609
1610 if (!advancedResidual) {
1611 // Step 2: Clone |residualElem|.
1612 RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1613
1614 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1615 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1616 // the new content (instead of a bunch of individual attachments).
1617 NodeImpl *currNode = blockElem->firstChild();
1618 while (currNode) {
1619 NodeImpl *nextNode = currNode->nextSibling();
1620 SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving
1621 blockElem->removeChild(currNode, exceptionCode);
1622 newNode->appendChild(currNode, exceptionCode);
1623 currNode = nextNode;
1624
1625 // TODO - To be replaced.
1626 // Re-register form elements with currently active form, step 1 will have removed them
1627 if (form && currNode && currNode->isGenericFormElement()) {
1628 HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode);
1629 form->registerFormElement(e);
1630 }
1631 }
1632
1633 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1634 // attachment can occur yet.
1635 blockElem->appendChild(newNode.get(), exceptionCode);
1636 }
1637
1638 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1639 parentElem->appendChild(blockElem, exceptionCode);
1640
1641 // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1642 // the node associated with the previous stack element so that when it gets popped,
1643 // it doesn't make the residual element the next current node.
1644 HTMLStackElem *currElem = maxElem;
1645 HTMLStackElem *prevElem = nullptr;
1646 while (currElem != elem) {
1647 prevElem = currElem;
1648 currElem = currElem->next;
1649 }
1650 prevElem->next = elem->next;
1651 prevElem->setNode(elem->node);
1652 delete elem;
1653
1654 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1655 // In the above example, Goo should stay italic.
1656 curr = blockStack;
1657 HTMLStackElem *residualStyleStack = nullptr;
1658 while (curr && curr != endElem) {
1659 // We will actually schedule this tag for reopening
1660 // after we complete the close of this entire block.
1661 NodeImpl *currNode = current;
1662 if (isResidualStyleTag(curr->id)) {
1663 // We've overloaded the use of stack elements and are just reusing the
1664 // struct with a slightly different meaning to the variables. Instead of chaining
1665 // from innermost to outermost, we build up a list of all the tags we need to reopen
1666 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1667 // to the outermost tag we need to reopen.
1668 // We also set curr->node to be the actual element that corresponds to the ID stored in
1669 // curr->id rather than the node that you should pop to when the element gets pulled off
1670 // the stack.
1671 popOneBlock(false);
1672 curr->setNode(currNode);
1673 curr->next = residualStyleStack;
1674 residualStyleStack = curr;
1675 } else {
1676 popOneBlock();
1677 }
1678
1679 curr = blockStack;
1680 }
1681
1682 reopenResidualStyleTags(residualStyleStack, nullptr); // FIXME: Deal with stray table content some day
1683 // if it becomes necessary to do so.
1684 }
1685
reopenResidualStyleTags(HTMLStackElem * elem,DOM::NodeImpl * malformedTableParent)1686 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent)
1687 {
1688 // Loop for each tag that needs to be reopened.
1689 while (elem) {
1690 // Create a shallow clone of the DOM node for this element.
1691 RefPtr<NodeImpl> newNode = elem->node->cloneNode(false);
1692
1693 // Append the new node. In the malformed table case, we need to insert before the table,
1694 // which will be the last child.
1695 int exceptionCode = 0;
1696 if (malformedTableParent) {
1697 malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode);
1698 } else {
1699 current->appendChild(newNode.get(), exceptionCode);
1700 }
1701 // FIXME: Is it really OK to ignore the exceptions here?
1702
1703 // Now push a new stack element for this node we just created.
1704 pushBlock(elem->id, elem->level);
1705
1706 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1707 // that it is inside a malformed table.
1708 blockStack->strayTableContent = malformedTableParent != nullptr;
1709 if (blockStack->strayTableContent) {
1710 inStrayTableContent++;
1711 }
1712
1713 // Clear our malformed table parent variable.
1714 malformedTableParent = nullptr;
1715
1716 // Update |current| manually to point to the new node.
1717 setCurrent(newNode.get());
1718
1719 // Advance to the next tag that needs to be reopened.
1720 HTMLStackElem *next = elem->next;
1721 delete elem;
1722 elem = next;
1723 }
1724 }
1725
pushBlock(int _id,int _level)1726 void KHTMLParser::pushBlock(int _id, int _level)
1727 {
1728 HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
1729
1730 blockStack = Elem;
1731 addForbidden(_id, forbiddenTag);
1732 }
1733
generateImpliedEndTags(int _id)1734 void KHTMLParser::generateImpliedEndTags(int _id)
1735 {
1736 HTMLStackElem *Elem = blockStack;
1737
1738 int level = tagPriority(_id);
1739 while (Elem && Elem->id != _id) {
1740 HTMLStackElem *NextElem = Elem->next;
1741 if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) {
1742 popOneBlock();
1743 } else {
1744 break;
1745 }
1746 Elem = NextElem;
1747 }
1748 }
1749
popOptionalBlock(int _id)1750 void KHTMLParser::popOptionalBlock(int _id)
1751 {
1752 bool found = false;
1753 HTMLStackElem *Elem = blockStack;
1754
1755 int level = tagPriority(_id);
1756 while (Elem) {
1757 if (Elem->id == _id) {
1758 found = true;
1759 break;
1760 }
1761 if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id))) {
1762 break;
1763 }
1764 Elem = Elem->next;
1765 }
1766
1767 if (found) {
1768 generateImpliedEndTags(_id);
1769 popBlock(_id);
1770 }
1771 }
1772
isElementInScope(int _id)1773 bool KHTMLParser::isElementInScope(int _id)
1774 {
1775 // HTML5 8.2.3.2
1776 HTMLStackElem *Elem = blockStack;
1777 while (Elem && Elem->id != _id) {
1778 if (DOM::checkIsScopeBoundary(Elem->id)) {
1779 return false;
1780 }
1781 Elem = Elem->next;
1782 }
1783 return Elem;
1784 }
1785
isHeadingInScope()1786 bool KHTMLParser::isHeadingInScope()
1787 {
1788 HTMLStackElem *Elem = blockStack;
1789 while (Elem && (Elem->id < ID_H1 || Elem->id > ID_H6)) {
1790 if (DOM::checkIsScopeBoundary(Elem->id)) {
1791 return false;
1792 }
1793 Elem = Elem->next;
1794 }
1795 return Elem;
1796 }
1797
popBlock(int _id)1798 void KHTMLParser::popBlock(int _id)
1799 {
1800 HTMLStackElem *Elem = blockStack;
1801 int maxLevel = 0;
1802
1803 #ifdef PARSER_DEBUG
1804 qCDebug(KHTML_LOG) << "popBlock(" << getParserPrintableName(_id) << ")";
1805 while (Elem) {
1806 qCDebug(KHTML_LOG) << " > " << getParserPrintableName(Elem->id);
1807 Elem = Elem->next;
1808 }
1809 Elem = blockStack;
1810 #endif
1811
1812 while (Elem && (Elem->id != _id)) {
1813 if (maxLevel < Elem->level) {
1814 maxLevel = Elem->level;
1815 }
1816 Elem = Elem->next;
1817 }
1818 if (!Elem) {
1819 return;
1820 }
1821
1822 if (maxLevel > Elem->level) {
1823 // We didn't match because the tag is in a different scope, e.g.,
1824 // <b><p>Foo</b>. Try to correct the problem.
1825 if (!isResidualStyleTag(_id)) {
1826 return;
1827 }
1828 return handleResidualStyleCloseTagAcrossBlocks(Elem);
1829 }
1830
1831 bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1832 HTMLStackElem *residualStyleStack = nullptr;
1833 NodeImpl *malformedTableParent = nullptr;
1834
1835 Elem = blockStack;
1836
1837 while (Elem) {
1838 if (Elem->id == _id) {
1839 int strayTable = inStrayTableContent;
1840 popOneBlock();
1841 Elem = nullptr;
1842
1843 // This element was the root of some malformed content just inside an implicit or
1844 // explicit <tbody> or <tr>.
1845 // If we end up needing to reopen residual style tags, the root of the reopened chain
1846 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1847 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1848 NodeImpl *curr = current;
1849 while (curr && curr->id() != ID_TABLE) {
1850 curr = curr->parentNode();
1851 }
1852 malformedTableParent = curr ? curr->parentNode() : nullptr;
1853 }
1854 } else {
1855 // Schedule this tag for reopening
1856 // after we complete the close of this entire block.
1857 NodeImpl *currNode = current;
1858 if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1859 // We've overloaded the use of stack elements and are just reusing the
1860 // struct with a slightly different meaning to the variables. Instead of chaining
1861 // from innermost to outermost, we build up a list of all the tags we need to reopen
1862 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1863 // to the outermost tag we need to reopen.
1864 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1865 // Elem->id rather than the node that you should pop to when the element gets pulled off
1866 // the stack.
1867 popOneBlock(false);
1868 Elem->next = residualStyleStack;
1869 Elem->setNode(currNode);
1870 residualStyleStack = Elem;
1871 } else {
1872 popOneBlock();
1873 }
1874 Elem = blockStack;
1875 }
1876 }
1877
1878 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1879 }
1880
popOneBlock(bool delBlock)1881 void KHTMLParser::popOneBlock(bool delBlock)
1882 {
1883 HTMLStackElem *Elem = blockStack;
1884
1885 // we should never get here, but some bad html might cause it.
1886 #ifndef PARSER_DEBUG
1887 if (!Elem) {
1888 return;
1889 }
1890 #else
1891 qCDebug(KHTML_LOG) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")";
1892 #endif
1893
1894 #if SPEED_DEBUG < 1
1895 if ((Elem->node != current)) {
1896 if (current->maintainsState() && document) {
1897 document->registerMaintainsState(current);
1898 document->attemptRestoreState(current);
1899 }
1900 current->close();
1901 }
1902 #endif
1903
1904 removeForbidden(Elem->id, forbiddenTag);
1905
1906 blockStack = Elem->next;
1907 // we only set inline to false, if the element we close is a block level element.
1908 // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1909
1910 m_inline = Elem->m_inline;
1911
1912 if (current->id() == ID_FORM && form && inStrayTableContent) {
1913 form->setMalformed(true);
1914 }
1915
1916 setCurrent(Elem->node);
1917
1918 if (Elem->strayTableContent) {
1919 inStrayTableContent--;
1920 }
1921
1922 if (delBlock) {
1923 delete Elem;
1924 }
1925 }
1926
popInlineBlocks()1927 void KHTMLParser::popInlineBlocks()
1928 {
1929 while (blockStack && current->isInline() && current->id() != ID_FONT) {
1930 popOneBlock();
1931 }
1932 }
1933
freeBlock()1934 void KHTMLParser::freeBlock()
1935 {
1936 while (blockStack) {
1937 popOneBlock();
1938 }
1939 blockStack = nullptr;
1940 }
1941
createHead()1942 void KHTMLParser::createHead()
1943 {
1944 if (head || !doc()->documentElement()) {
1945 return;
1946 }
1947
1948 head = new HTMLHeadElementImpl(document);
1949 HTMLElementImpl *body = doc()->body();
1950 int exceptioncode = 0;
1951 doc()->documentElement()->insertBefore(head.get(), body, exceptioncode);
1952 if (exceptioncode) {
1953 #ifdef PARSER_DEBUG
1954 qCDebug(KHTML_LOG) << "creation of head failed!!!!:" << exceptioncode;
1955 #endif
1956 delete head.get();
1957 head = nullptr;
1958 }
1959
1960 // If the body does not exist yet, then the <head> should be pushed as the current block.
1961 if (head && !body) {
1962 pushBlock(head->id(), tagPriority(head->id()));
1963 setCurrent(head.get());
1964 }
1965 }
1966
handleIsindex(Token * t)1967 NodeImpl *KHTMLParser::handleIsindex(Token *t)
1968 {
1969 NodeImpl *n;
1970 HTMLFormElementImpl *myform = form;
1971 if (!myform) {
1972 myform = new HTMLFormElementImpl(document, true);
1973 n = myform;
1974 } else {
1975 n = new HTMLDivElementImpl(document, ID_DIV);
1976 }
1977 NodeImpl *child = new HTMLHRElementImpl(document);
1978 n->addChild(child);
1979 DOMStringImpl *a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : nullptr;
1980 DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1981 if (a) {
1982 text = a;
1983 }
1984 child = new TextImpl(document, text.implementation());
1985 n->addChild(child);
1986 child = new HTMLIsIndexElementImpl(document, myform);
1987 static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1988 n->addChild(child);
1989 child = new HTMLHRElementImpl(document);
1990 n->addChild(child);
1991
1992 return n;
1993 }
1994
startBody()1995 void KHTMLParser::startBody()
1996 {
1997 if (inBody) {
1998 return;
1999 }
2000
2001 inBody = true;
2002
2003 if (isindex) {
2004 insertNode(isindex, true /* don't decend into this node */);
2005 isindex = nullptr;
2006 }
2007 }
2008