1 /* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_BACKGROUND_HTML_PARSER_H_ 27 #define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_BACKGROUND_HTML_PARSER_H_ 28 29 #include <memory> 30 31 #include "base/macros.h" 32 #include "base/memory/weak_ptr.h" 33 #include "base/optional.h" 34 #include "base/single_thread_task_runner.h" 35 #include "third_party/blink/renderer/core/dom/document_encoding_data.h" 36 #include "third_party/blink/renderer/core/html/parser/background_html_input_stream.h" 37 #include "third_party/blink/renderer/core/html/parser/compact_html_token.h" 38 #include "third_party/blink/renderer/core/html/parser/html_parser_options.h" 39 #include "third_party/blink/renderer/core/html/parser/html_preload_scanner.h" 40 #include "third_party/blink/renderer/core/html/parser/html_source_tracker.h" 41 #include "third_party/blink/renderer/core/html/parser/html_tree_builder_simulator.h" 42 #include "third_party/blink/renderer/core/html/parser/text_resource_decoder.h" 43 #include "third_party/blink/renderer/core/page/viewport_description.h" 44 45 namespace blink { 46 47 class HTMLDocumentParser; 48 49 class BackgroundHTMLParser { 50 USING_FAST_MALLOC(BackgroundHTMLParser); 51 52 public: 53 struct Configuration { 54 USING_FAST_MALLOC(Configuration); 55 56 public: 57 Configuration(); 58 HTMLParserOptions options; 59 WeakPersistent<HTMLDocumentParser> parser; 60 std::unique_ptr<TextResourceDecoder> decoder; 61 }; 62 63 // The returned BackgroundHTMLParser must first be initialized by calling 64 // init(), and free by calling stop(). 65 static base::WeakPtr<BackgroundHTMLParser> Create( 66 std::unique_ptr<Configuration>, 67 scoped_refptr<base::SingleThreadTaskRunner>); 68 void Init(const KURL& document_url, 69 std::unique_ptr<CachedDocumentParameters>, 70 const MediaValuesCached::MediaValuesCachedData&, 71 bool priority_hints_origin_trial_enabled); 72 73 struct Checkpoint { 74 USING_FAST_MALLOC(Checkpoint); 75 76 public: 77 WeakPersistent<HTMLDocumentParser> parser; 78 std::unique_ptr<HTMLToken> token; 79 std::unique_ptr<HTMLTokenizer> tokenizer; 80 HTMLTreeBuilderSimulator::State tree_builder_state; 81 HTMLInputCheckpoint input_checkpoint; 82 TokenPreloadScannerCheckpoint preload_scanner_checkpoint; 83 String unparsed_input; 84 }; 85 86 void AppendRawBytesFromMainThread(std::unique_ptr<Vector<char>>); 87 void SetDecoder(std::unique_ptr<TextResourceDecoder>); 88 void Flush(); 89 void ResumeFrom(std::unique_ptr<Checkpoint>); 90 void StartedChunkWithCheckpoint(HTMLInputCheckpoint); 91 void Finish(); 92 void Stop(); 93 94 void ForcePlaintextForTextDocument(); 95 96 void ClearParser(); 97 98 private: 99 BackgroundHTMLParser(std::unique_ptr<Configuration>, 100 scoped_refptr<base::SingleThreadTaskRunner>); 101 ~BackgroundHTMLParser(); 102 103 void AppendDecodedBytes(const String&); 104 void MarkEndOfFile(); 105 void PumpTokenizer(); 106 107 void EnqueueTokenizedChunk(); 108 void UpdateDocument(const String& decoded_data); 109 110 BackgroundHTMLInputStream input_; 111 HTMLSourceTracker source_tracker_; 112 std::unique_ptr<HTMLToken> token_; 113 std::unique_ptr<HTMLTokenizer> tokenizer_; 114 HTMLTreeBuilderSimulator tree_builder_simulator_; 115 HTMLParserOptions options_; 116 WeakPersistent<HTMLDocumentParser> parser_; 117 118 CompactHTMLTokenStream pending_tokens_; 119 PreloadRequestStream pending_preloads_; 120 base::Optional<ViewportDescription> viewport_description_; 121 std::unique_ptr<TokenPreloadScanner> preload_scanner_; 122 std::unique_ptr<TextResourceDecoder> decoder_; 123 DocumentEncodingData last_seen_encoding_data_; 124 scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner_; 125 126 // Index into |pending_tokens_| of the last <meta> csp token found. Will be 127 // |TokenizedChunk::kNoPendingToken| if none have been found. 128 int pending_csp_meta_token_index_; 129 130 bool starting_script_; 131 132 base::WeakPtrFactory<BackgroundHTMLParser> weak_factory_{this}; 133 134 DISALLOW_COPY_AND_ASSIGN(BackgroundHTMLParser); 135 }; 136 137 } // namespace blink 138 139 #endif 140