1 /*
2     This file is part of the KDE libraries
3 
4     Copyright (C) 1997 Martin Jones (mjones@kde.org)
5               (C) 1997 Torben Weis (weis@kde.org)
6               (C) 1998 Waldo Bastian (bastian@kde.org)
7               (C) 1999 Lars Knoll (knoll@kde.org)
8               (C) 2003 Apple Computer, Inc.
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Library General Public
12     License as published by the Free Software Foundation; either
13     version 2 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Library General Public License for more details.
19 
20     You should have received a copy of the GNU Library General Public License
21     along with this library; see the file COPYING.LIB.  If not, write to
22     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23     Boston, MA 02110-1301, USA.
24 */
25 //----------------------------------------------------------------------------
26 //
27 // KDE HTML Widget -- HTML Parser
28 
29 #ifndef HTMLPARSER_H
30 #define HTMLPARSER_H
31 
32 // 0 all
33 // 1 domtree + rendertree + styleForElement, no layouting
34 // 2 domtree only
35 #define SPEED_DEBUG 0
36 
37 #ifdef SPEED_DEBUG
38 #include <QDateTime>
39 #endif
40 
41 #include "dom/dom_string.h"
42 #include "xml/dom_nodeimpl.h"
43 #include "html/html_documentimpl.h"
44 #include "html/html_headimpl.h"
45 
46 class KHTMLView;
47 class HTMLStackElem;
48 
49 namespace DOM
50 {
51 class HTMLDocumentImpl;
52 class DocumentImpl;
53 class NodeImpl;
54 class HTMLFormElementImpl;
55 class HTMLMapElementImpl;
56 class HTMLHeadElementImpl;
57 class DocumentFragmentImpl;
58 }
59 
60 namespace khtml
61 {
62 
63 class Token;
64 class DoctypeToken;
65 
66 /**
67  * The parser for html. It receives a stream of tokens from the HTMLTokenizer, and
68  * builds up the Document structure form it.
69  */
70 class KHTMLParser
71 {
72 public:
73     KHTMLParser(KHTMLView *w, DOM::DocumentImpl *i);
74     KHTMLParser(DOM::DocumentFragmentImpl *frag, DOM::DocumentImpl *doc);
75     virtual ~KHTMLParser();
76 
77     /**
78      * parses one token delivered by the tokenizer
79      */
80     void parseToken(Token *_t);
81 
82     /**
83      * parses a doctype token delivered by the tokenizer
84      */
85     void parseDoctypeToken(DoctypeToken *_t);
86 
87     /**
88      * resets the parser
89      */
90     void reset();
91 
skipMode()92     bool skipMode() const
93     {
94         return (discard_until != 0);
95     }
noSpaces()96     bool noSpaces() const
97     {
98         return (inSelect || !m_inline  || !inBody);
99     }
selectMode()100     bool selectMode() const
101     {
102         return inSelect;
103     }
104 
doc()105     DOM::HTMLDocumentImpl *doc() const
106     {
107         return static_cast<DOM::HTMLDocumentImpl *>(document);
108     }
docPtr()109     DOM::DocumentImpl *docPtr() const
110     {
111         return document;
112     }
113 
currentScriptElement()114     DOM::HTMLScriptElementImpl *currentScriptElement() const
115     {
116         return (current && current->id() == ID_SCRIPT) ? static_cast<DOM::HTMLScriptElementImpl *>(current) : nullptr;
117     }
118 
119 protected:
120 
121     KHTMLView *HTMLWidget;
122     DOM::DocumentImpl *document;
123 
124     /*
125      * generate an element from the token
126      */
127     DOM::NodeImpl *getElement(Token *);
128 
129     void processCloseTag(Token *);
130 
131     bool insertNode(DOM::NodeImpl *n, bool flat = false);
132 
133     /*
134      * The currently active element (the one new elements will be added to)
135      */
setCurrent(DOM::NodeImpl * newNode)136     void setCurrent(DOM::NodeImpl *newNode)
137     {
138         if (newNode) {
139             newNode->ref();
140         }
141         if (current) {
142             current->deref();
143         }
144         current = newNode;
145     }
146 
147 private:
148     DOM::NodeImpl *current;
149 
150     HTMLStackElem *blockStack;
151 
152     void pushBlock(int _id, int _level);
153 
154     void generateImpliedEndTags(int _id);
155     void popOptionalBlock(int _id);
156     void popBlock(int _id);
157     void popOneBlock(bool delBlock = true);
158     void popInlineBlocks();
159     bool isElementInScope(int _id);
160     bool isHeadingInScope();
161 
162     void freeBlock(void);
163 
164     void createHead();
165 
166     bool isResidualStyleTag(int _id);
167     bool isAffectedByResidualStyle(int _id);
168     void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem);
169     void reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent);
170 
171     ushort *forbiddenTag;
172 
173     /*
174      * currently active form
175      */
176     DOM::HTMLFormElementImpl *form;
177 
178     /*
179      * current map
180      */
181     DOM::HTMLMapElementImpl *map;
182 
183     /*
184      * the head element. Needed for crappy html which defines <base> after </head>
185      */
186     RefPtr<DOM::HTMLHeadElementImpl> head;
187 
188     /*
189      * a possible <isindex> element in the head. Compatibility hack for
190      * html from the stone age
191      */
192     DOM::NodeImpl *isindex;
193     DOM::NodeImpl *handleIsindex(Token *t);
194 
195     /*
196      * inserts the stupid isIndex element.
197      */
198     void startBody();
199 
200     bool inBody;
201     bool haveContent;
202     bool haveBody;
203     bool haveFrameSet;
204     bool haveTitle;
205     bool m_inline;
206     bool end;
207     bool inSelect;
208 
209     /*
210      * tells the parser to discard all tags, until it reaches the one specified
211      */
212     int discard_until;
213 
214     bool headLoaded;
215     int inStrayTableContent;
216 
217 #if SPEED_DEBUG > 0
218     QTime qt;
219 #endif
220 };
221 
222 } // namespace khtml
223 
224 #endif // HTMLPARSER_H
225 
226