1 /*
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
27
28 #include <memory>
29 #include <utility>
30
31 #include "base/single_thread_task_runner.h"
32 #include "third_party/blink/public/platform/platform.h"
33 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
34 #include "third_party/blink/renderer/core/html/parser/text_resource_decoder.h"
35 #include "third_party/blink/renderer/core/html_names.h"
36 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
37 #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
38 #include "third_party/blink/renderer/platform/wtf/functional.h"
39 #include "third_party/blink/renderer/platform/wtf/text/text_position.h"
40
41 namespace blink {
42
43 // On a network with high latency and high bandwidth, using a device with a fast
44 // CPU, we could end up speculatively tokenizing the whole document, well ahead
45 // of when the main-thread actually needs it. This is a waste of memory (and
46 // potentially time if the speculation fails). So we limit our outstanding
47 // tokens arbitrarily to 10,000. Our maximal memory spent speculating will be
48 // approximately:
49 // (kOutstandingTokenLimit + kPendingTokenLimit) * sizeof(CompactToken)
50 //
51 // We use a separate low and high water mark to avoid
52 // constantly topping off the main thread's token buffer. At time of writing,
53 // this is (10000 + 1000) * 28 bytes = ~308kb of memory. These numbers have not
54 // been tuned.
55 static const size_t kOutstandingTokenLimit = 10000;
56
57 // We limit our chucks to 1000 tokens, to make sure the main thread is never
58 // waiting on the parser thread for tokens. This was tuned in
59 // https://bugs.webkit.org/show_bug.cgi?id=110408.
60 static const size_t kPendingTokenLimit = 1000;
61
62 static_assert(kOutstandingTokenLimit > kPendingTokenLimit,
63 "Outstanding token limit is applied after pending token limit.");
64
Create(std::unique_ptr<Configuration> config,scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner)65 base::WeakPtr<BackgroundHTMLParser> BackgroundHTMLParser::Create(
66 std::unique_ptr<Configuration> config,
67 scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner) {
68 auto* background_parser = new BackgroundHTMLParser(
69 std::move(config), std::move(loading_task_runner));
70 return background_parser->weak_factory_.GetWeakPtr();
71 }
72
Init(const KURL & document_url,std::unique_ptr<CachedDocumentParameters> cached_document_parameters,const MediaValuesCached::MediaValuesCachedData & media_values_cached_data,bool priority_hints_origin_trial_enabled)73 void BackgroundHTMLParser::Init(
74 const KURL& document_url,
75 std::unique_ptr<CachedDocumentParameters> cached_document_parameters,
76 const MediaValuesCached::MediaValuesCachedData& media_values_cached_data,
77 bool priority_hints_origin_trial_enabled) {
78 TRACE_EVENT1("loading", "BackgroundHTMLParser::Init", "url",
79 document_url.GetString().Utf8());
80 preload_scanner_.reset(new TokenPreloadScanner(
81 document_url, std::move(cached_document_parameters),
82 media_values_cached_data, TokenPreloadScanner::ScannerType::kMainDocument,
83 priority_hints_origin_trial_enabled));
84 }
85
Configuration()86 BackgroundHTMLParser::Configuration::Configuration() {}
87
BackgroundHTMLParser(std::unique_ptr<Configuration> config,scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner)88 BackgroundHTMLParser::BackgroundHTMLParser(
89 std::unique_ptr<Configuration> config,
90 scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner)
91 : token_(std::make_unique<HTMLToken>()),
92 tokenizer_(std::make_unique<HTMLTokenizer>(config->options)),
93 tree_builder_simulator_(config->options),
94 options_(config->options),
95 parser_(config->parser),
96 decoder_(std::move(config->decoder)),
97 loading_task_runner_(std::move(loading_task_runner)),
98 pending_csp_meta_token_index_(
99 HTMLDocumentParser::TokenizedChunk::kNoPendingToken),
100 starting_script_(false) {}
101
102 BackgroundHTMLParser::~BackgroundHTMLParser() = default;
103
AppendRawBytesFromMainThread(std::unique_ptr<Vector<char>> buffer)104 void BackgroundHTMLParser::AppendRawBytesFromMainThread(
105 std::unique_ptr<Vector<char>> buffer) {
106 TRACE_EVENT0("loading", "BackgroundHTMLParser::AppendRawBytesFromMainThread");
107 DCHECK(decoder_);
108 UpdateDocument(decoder_->Decode(buffer->data(), buffer->size()));
109 }
110
AppendDecodedBytes(const String & input)111 void BackgroundHTMLParser::AppendDecodedBytes(const String& input) {
112 DCHECK(!input_.Current().IsClosed());
113 input_.Append(input);
114 PumpTokenizer();
115 }
116
SetDecoder(std::unique_ptr<TextResourceDecoder> decoder)117 void BackgroundHTMLParser::SetDecoder(
118 std::unique_ptr<TextResourceDecoder> decoder) {
119 DCHECK(decoder);
120 decoder_ = std::move(decoder);
121 }
122
Flush()123 void BackgroundHTMLParser::Flush() {
124 DCHECK(decoder_);
125 UpdateDocument(decoder_->Flush());
126 }
127
UpdateDocument(const String & decoded_data)128 void BackgroundHTMLParser::UpdateDocument(const String& decoded_data) {
129 DocumentEncodingData encoding_data(*decoder_.get());
130 if (encoding_data != last_seen_encoding_data_) {
131 last_seen_encoding_data_ = encoding_data;
132 if (parser_)
133 parser_->DidReceiveEncodingDataFromBackgroundParser(encoding_data);
134 }
135 if (decoded_data.IsEmpty())
136 return;
137
138 AppendDecodedBytes(decoded_data);
139 }
140
ResumeFrom(std::unique_ptr<Checkpoint> checkpoint)141 void BackgroundHTMLParser::ResumeFrom(std::unique_ptr<Checkpoint> checkpoint) {
142 parser_ = checkpoint->parser;
143 token_ = std::move(checkpoint->token);
144 tokenizer_ = std::move(checkpoint->tokenizer);
145 tree_builder_simulator_.SetState(checkpoint->tree_builder_state);
146 input_.RewindTo(checkpoint->input_checkpoint, checkpoint->unparsed_input);
147 preload_scanner_->RewindTo(checkpoint->preload_scanner_checkpoint);
148 starting_script_ = false;
149 PumpTokenizer();
150 }
151
StartedChunkWithCheckpoint(HTMLInputCheckpoint input_checkpoint)152 void BackgroundHTMLParser::StartedChunkWithCheckpoint(
153 HTMLInputCheckpoint input_checkpoint) {
154 // Note, we should not have to worry about the index being invalid as messages
155 // from the main thread will be processed in FIFO order.
156 input_.InvalidateCheckpointsBefore(input_checkpoint);
157 PumpTokenizer();
158 }
159
Finish()160 void BackgroundHTMLParser::Finish() {
161 MarkEndOfFile();
162 PumpTokenizer();
163 }
164
Stop()165 void BackgroundHTMLParser::Stop() {
166 ClearParser();
167 delete this;
168 }
169
ForcePlaintextForTextDocument()170 void BackgroundHTMLParser::ForcePlaintextForTextDocument() {
171 // This is only used by the TextDocumentParser (a subclass of
172 // HTMLDocumentParser) to force us into the PLAINTEXT state w/o using a
173 // <plaintext> tag. The TextDocumentParser uses a <pre> tag for historical /
174 // compatibility reasons.
175 tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
176 }
177
ClearParser()178 void BackgroundHTMLParser::ClearParser() {
179 parser_.Clear();
180 }
181
MarkEndOfFile()182 void BackgroundHTMLParser::MarkEndOfFile() {
183 DCHECK(!input_.Current().IsClosed());
184 input_.Append(String(&kEndOfFileMarker, 1));
185 input_.Close();
186 }
187
PumpTokenizer()188 void BackgroundHTMLParser::PumpTokenizer() {
189 TRACE_EVENT0("loading", "BackgroundHTMLParser::pumpTokenizer");
190 HTMLTreeBuilderSimulator::SimulatedToken simulated_token =
191 HTMLTreeBuilderSimulator::kOtherToken;
192
193 // No need to start speculating until the main thread has almost caught up.
194 if (input_.TotalCheckpointTokenCount() > kOutstandingTokenLimit)
195 return;
196
197 while (tokenizer_->NextToken(input_.Current(), *token_)) {
198 {
199 TextPosition position = TextPosition(input_.Current().CurrentLine(),
200 input_.Current().CurrentColumn());
201
202 CompactHTMLToken token(token_.get(), position);
203 bool is_csp_meta_tag = false;
204 preload_scanner_->Scan(token, input_.Current(), pending_preloads_,
205 &viewport_description_, &is_csp_meta_tag);
206
207 simulated_token =
208 tree_builder_simulator_.Simulate(token, tokenizer_.get());
209
210 // Break chunks before a script tag is inserted and flag the chunk as
211 // starting a script so the main parser can decide if it should yield
212 // before processing the chunk.
213 if (simulated_token == HTMLTreeBuilderSimulator::kValidScriptStart) {
214 EnqueueTokenizedChunk();
215 starting_script_ = true;
216 }
217
218 pending_tokens_.push_back(token);
219 if (is_csp_meta_tag) {
220 pending_csp_meta_token_index_ = pending_tokens_.size() - 1;
221 }
222 }
223
224 token_->Clear();
225
226 if (simulated_token == HTMLTreeBuilderSimulator::kScriptEnd ||
227 simulated_token == HTMLTreeBuilderSimulator::kStyleEnd ||
228 simulated_token == HTMLTreeBuilderSimulator::kLink ||
229 simulated_token == HTMLTreeBuilderSimulator::kCustomElementBegin ||
230 pending_tokens_.size() >= kPendingTokenLimit) {
231 EnqueueTokenizedChunk();
232
233 // If we're far ahead of the main thread, yield for a bit to avoid
234 // consuming too much memory.
235 if (input_.TotalCheckpointTokenCount() > kOutstandingTokenLimit)
236 break;
237 }
238 }
239
240 EnqueueTokenizedChunk();
241 }
242
EnqueueTokenizedChunk()243 void BackgroundHTMLParser::EnqueueTokenizedChunk() {
244 if (pending_tokens_.IsEmpty())
245 return;
246
247 auto chunk = std::make_unique<HTMLDocumentParser::TokenizedChunk>();
248 TRACE_EVENT_WITH_FLOW0("blink,loading",
249 "BackgroundHTMLParser::sendTokensToMainThread",
250 chunk.get(), TRACE_EVENT_FLAG_FLOW_OUT);
251
252 chunk->preloads.swap(pending_preloads_);
253 if (viewport_description_.has_value())
254 chunk->viewport = viewport_description_;
255 chunk->tokenizer_state = tokenizer_->GetState();
256 chunk->tree_builder_state = tree_builder_simulator_.GetState();
257 chunk->input_checkpoint = input_.CreateCheckpoint(pending_tokens_.size());
258 chunk->preload_scanner_checkpoint = preload_scanner_->CreateCheckpoint();
259 chunk->tokens.swap(pending_tokens_);
260 chunk->starting_script = starting_script_;
261 chunk->pending_csp_meta_token_index = pending_csp_meta_token_index_;
262 starting_script_ = false;
263 pending_csp_meta_token_index_ =
264 HTMLDocumentParser::TokenizedChunk::kNoPendingToken;
265
266 if (parser_)
267 parser_->EnqueueTokenizedChunk(std::move(chunk));
268 }
269
270 } // namespace blink
271