1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_TREE_BUILDER_H_ 28 #define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_TREE_BUILDER_H_ 29 30 #include "base/macros.h" 31 #include "base/memory/scoped_refptr.h" 32 #include "third_party/blink/renderer/core/html/parser/html_construction_site.h" 33 #include "third_party/blink/renderer/core/html/parser/html_element_stack.h" 34 #include "third_party/blink/renderer/core/html/parser/html_parser_options.h" 35 #include "third_party/blink/renderer/platform/heap/handle.h" 36 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h" 37 #include "third_party/blink/renderer/platform/wtf/text/text_position.h" 38 #include "third_party/blink/renderer/platform/wtf/vector.h" 39 40 namespace blink { 41 42 class AtomicHTMLToken; 43 class DocumentFragment; 44 class Element; 45 class HTMLDocument; 46 class HTMLDocumentParser; 47 48 class HTMLTreeBuilder final : public GarbageCollected<HTMLTreeBuilder> { 49 public: 50 // HTMLTreeBuilder can be created for non-HTMLDocument (XHTMLDocument) from 51 // editing code. 52 // TODO(kouhei): Fix editing code to always invoke HTML parser on 53 // HTMLDocument. 54 HTMLTreeBuilder(HTMLDocumentParser*, 55 Document&, 56 ParserContentPolicy, 57 const HTMLParserOptions&, 58 bool allow_shadow_root); 59 HTMLTreeBuilder(HTMLDocumentParser*, 60 DocumentFragment*, 61 Element* context_element, 62 ParserContentPolicy, 63 const HTMLParserOptions&, 64 bool allow_shadow_root); 65 ~HTMLTreeBuilder(); 66 void Trace(Visitor*) const; 67 OpenElements()68 const HTMLElementStack* OpenElements() const { return tree_.OpenElements(); } 69 IsParsingFragment()70 bool IsParsingFragment() const { return !!fragment_context_.Fragment(); } IsParsingTemplateContents()71 bool IsParsingTemplateContents() const { 72 return tree_.OpenElements()->HasTemplateInHTMLScope(); 73 } IsParsingFragmentOrTemplateContents()74 bool IsParsingFragmentOrTemplateContents() const { 75 return IsParsingFragment() || IsParsingTemplateContents(); 76 } 77 78 void Detach(); 79 80 void ConstructTree(AtomicHTMLToken*); 81 HasParserBlockingScript()82 bool HasParserBlockingScript() const { return !!script_to_process_; } 83 // Must be called to take the parser-blocking script before calling the parser 84 // again. 85 Element* TakeScriptToProcess(TextPosition& script_start_position); 86 87 // Done, close any open tags, etc. 88 void Finished(); 89 90 // Synchronously flush pending text and queued tasks, possibly creating more 91 // DOM nodes. Flushing pending text depends on |mode|. Flush(FlushMode mode)92 void Flush(FlushMode mode) { tree_.Flush(mode); } 93 SetShouldSkipLeadingNewline(bool should_skip)94 void SetShouldSkipLeadingNewline(bool should_skip) { 95 should_skip_leading_newline_ = should_skip; 96 } 97 98 private: 99 class CharacterTokenBuffer; 100 // Represents HTML5 "insertion mode" 101 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 102 enum InsertionMode { 103 kInitialMode, 104 kBeforeHTMLMode, 105 kBeforeHeadMode, 106 kInHeadMode, 107 kInHeadNoscriptMode, 108 kAfterHeadMode, 109 kTemplateContentsMode, 110 kInBodyMode, 111 kTextMode, 112 kInTableMode, 113 kInTableTextMode, 114 kInCaptionMode, 115 kInColumnGroupMode, 116 kInTableBodyMode, 117 kInRowMode, 118 kInCellMode, 119 kInSelectMode, 120 kInSelectInTableMode, 121 kAfterBodyMode, 122 kInFramesetMode, 123 kAfterFramesetMode, 124 kAfterAfterBodyMode, 125 kAfterAfterFramesetMode, 126 }; 127 #ifndef DEBUG 128 static const char* ToString(InsertionMode); 129 #endif 130 131 void ProcessToken(AtomicHTMLToken*); 132 133 void ProcessDoctypeToken(AtomicHTMLToken*); 134 void ProcessStartTag(AtomicHTMLToken*); 135 void ProcessEndTag(AtomicHTMLToken*); 136 void ProcessComment(AtomicHTMLToken*); 137 void ProcessCharacter(AtomicHTMLToken*); 138 void ProcessEndOfFile(AtomicHTMLToken*); 139 140 bool ProcessStartTagForInHead(AtomicHTMLToken*); 141 void ProcessStartTagForInBody(AtomicHTMLToken*); 142 void ProcessStartTagForInTable(AtomicHTMLToken*); 143 void ProcessEndTagForInBody(AtomicHTMLToken*); 144 void ProcessEndTagForInTable(AtomicHTMLToken*); 145 void ProcessEndTagForInTableBody(AtomicHTMLToken*); 146 void ProcessEndTagForInRow(AtomicHTMLToken*); 147 void ProcessEndTagForInCell(AtomicHTMLToken*); 148 149 void ProcessHtmlStartTagForInBody(AtomicHTMLToken*); 150 bool ProcessBodyEndTagForInBody(AtomicHTMLToken*); 151 bool ProcessTableEndTagForInTable(); 152 bool ProcessCaptionEndTagForInCaption(); 153 bool ProcessColgroupEndTagForInColumnGroup(); 154 bool ProcessTrEndTagForInRow(); 155 // FIXME: This function should be inlined into its one call site or it 156 // needs to assert which tokens it can be called with. 157 void ProcessAnyOtherEndTagForInBody(AtomicHTMLToken*); 158 159 void ProcessCharacterBuffer(CharacterTokenBuffer&); 160 inline void ProcessCharacterBufferForInBody(CharacterTokenBuffer&); 161 162 void ProcessFakeStartTag( 163 const QualifiedName&, 164 const Vector<Attribute>& attributes = Vector<Attribute>()); 165 void ProcessFakeEndTag(const QualifiedName&); 166 void ProcessFakeEndTag(const AtomicString&); 167 void ProcessFakePEndTagIfPInButtonScope(); 168 169 void ProcessGenericRCDATAStartTag(AtomicHTMLToken*); 170 void ProcessGenericRawTextStartTag(AtomicHTMLToken*); 171 void ProcessScriptStartTag(AtomicHTMLToken*); 172 173 // Default processing for the different insertion modes. 174 void DefaultForInitial(); 175 void DefaultForBeforeHTML(); 176 void DefaultForBeforeHead(); 177 void DefaultForInHead(); 178 void DefaultForInHeadNoscript(); 179 void DefaultForAfterHead(); 180 void DefaultForInTableText(); 181 182 inline HTMLStackItem* AdjustedCurrentStackItem() const; 183 inline bool ShouldProcessTokenInForeignContent(AtomicHTMLToken*); 184 void ProcessTokenInForeignContent(AtomicHTMLToken*); 185 186 void CallTheAdoptionAgency(AtomicHTMLToken*); 187 188 void CloseTheCell(); 189 190 template <bool shouldClose(const HTMLStackItem*)> 191 void ProcessCloseWhenNestedTag(AtomicHTMLToken*); 192 193 void ParseError(AtomicHTMLToken*); 194 GetInsertionMode()195 InsertionMode GetInsertionMode() const { return insertion_mode_; } SetInsertionMode(InsertionMode mode)196 void SetInsertionMode(InsertionMode mode) { insertion_mode_ = mode; } 197 198 void ResetInsertionModeAppropriately(); 199 200 void ProcessTemplateStartTag(AtomicHTMLToken*); 201 bool ProcessTemplateEndTag(AtomicHTMLToken*); 202 bool ProcessEndOfFileForInTemplateContents(AtomicHTMLToken*); 203 204 class FragmentParsingContext { 205 DISALLOW_NEW(); 206 207 public: 208 FragmentParsingContext() = default; 209 void Init(DocumentFragment*, Element* context_element); 210 Fragment()211 DocumentFragment* Fragment() const { return fragment_; } ContextElement()212 Element* ContextElement() const { 213 DCHECK(fragment_); 214 return context_element_stack_item_->GetElement(); 215 } ContextElementStackItem()216 HTMLStackItem* ContextElementStackItem() const { 217 DCHECK(fragment_); 218 return context_element_stack_item_.Get(); 219 } 220 221 void Trace(Visitor*) const; 222 223 private: 224 Member<DocumentFragment> fragment_; 225 Member<HTMLStackItem> context_element_stack_item_; 226 227 DISALLOW_COPY_AND_ASSIGN(FragmentParsingContext); 228 }; 229 230 // https://html.spec.whatwg.org/C/#frameset-ok-flag 231 bool frameset_ok_; 232 #if DCHECK_IS_ON() 233 bool is_attached_ = true; 234 #endif 235 FragmentParsingContext fragment_context_; 236 HTMLConstructionSite tree_; 237 238 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 239 InsertionMode insertion_mode_; 240 241 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 242 InsertionMode original_insertion_mode_; 243 244 Vector<InsertionMode> template_insertion_modes_; 245 246 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 247 StringBuilder pending_table_characters_; 248 249 bool should_skip_leading_newline_; 250 251 const bool allow_shadow_root_; 252 253 // We access parser because HTML5 spec requires that we be able to change the 254 // state of the tokenizer from within parser actions. We also need it to track 255 // the current position. 256 Member<HTMLDocumentParser> parser_; 257 258 // <script> tag which needs processing before resuming the parser. 259 Member<Element> script_to_process_; 260 261 // Starting line number of the script tag needing processing. 262 TextPosition script_to_process_start_position_; 263 264 HTMLParserOptions options_; 265 266 DISALLOW_COPY_AND_ASSIGN(HTMLTreeBuilder); 267 }; 268 269 } // namespace blink 270 271 #endif 272