1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
27
28 #include <memory>
29 #include <utility>
30
31 #include "base/auto_reset.h"
32 #include "base/numerics/safe_conversions.h"
33 #include "third_party/blink/public/common/features.h"
34 #include "third_party/blink/public/common/loader/loading_behavior_flag.h"
35 #include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h"
36 #include "third_party/blink/public/platform/platform.h"
37 #include "third_party/blink/public/platform/task_type.h"
38 #include "third_party/blink/renderer/core/css/media_values_cached.h"
39 #include "third_party/blink/renderer/core/css/style_engine.h"
40 #include "third_party/blink/renderer/core/dom/document_fragment.h"
41 #include "third_party/blink/renderer/core/dom/element.h"
42 #include "third_party/blink/renderer/core/frame/local_frame.h"
43 #include "third_party/blink/renderer/core/html/html_document.h"
44 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
45 #include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
46 #include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
47 #include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h"
48 #include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
49 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
50 #include "third_party/blink/renderer/core/html/parser/pump_session.h"
51 #include "third_party/blink/renderer/core/html_names.h"
52 #include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
53 #include "third_party/blink/renderer/core/loader/document_loader.h"
54 #include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
55 #include "third_party/blink/renderer/core/loader/preload_helper.h"
56 #include "third_party/blink/renderer/core/probe/core_probes.h"
57 #include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
58 #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
59 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
60 #include "third_party/blink/renderer/platform/heap/handle.h"
61 #include "third_party/blink/renderer/platform/heap/heap.h"
62 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
63 #include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
64 #include "third_party/blink/renderer/platform/runtime_enabled_features.h"
65 #include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
66 #include "third_party/blink/renderer/platform/scheduler/public/thread.h"
67 #include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
68 #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
69 #include "third_party/blink/renderer/platform/wtf/shared_buffer.h"
70
71 namespace blink {
72
73 static size_t g_discarded_token_count_for_testing = 0;
74
ResetDiscardedTokenCountForTesting()75 void ResetDiscardedTokenCountForTesting() {
76 g_discarded_token_count_for_testing = 0;
77 }
78
GetDiscardedTokenCountForTesting()79 size_t GetDiscardedTokenCountForTesting() {
80 return g_discarded_token_count_for_testing;
81 }
82
83 // This sets the (default) maximum number of tokens which the foreground HTML
84 // parser should try to process in one go. Lower values generally mean faster
85 // first paints, larger values delay first paint, but make sure it's closer to
86 // the final page. This is the default value to use, if no Finch-provided
87 // value exists.
88 constexpr int kDefaultMaxTokenizationBudget = 250;
89
90 class EndIfDelayedForbiddenScope;
91 class ShouldCompleteScope;
92
93 // This class encapsulates the internal state needed for synchronous foreground
94 // HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
95 // tracks what should be done after the pump completes.)
96 class HTMLDocumentParserState
97 : public GarbageCollected<HTMLDocumentParserState> {
98 friend EndIfDelayedForbiddenScope;
99 friend ShouldCompleteScope;
100
101 public:
102 // Keeps track of whether the parser needs to complete tokenization work,
103 // optionally followed by EndIfDelayed.
104 enum class DeferredParserState {
105 // Indicates that a tokenizer pump has either completed or hasn't been
106 // scheduled.
107 kNotScheduled = 0, // Enforce ordering in this enum.
108 // Indicates that a tokenizer pump is scheduled and hasn't completed yet.
109 kScheduled = 1,
110 // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
111 kScheduledWithEndIfDelayed = 2
112 };
113
114 enum class MetaCSPTokenState {
115 // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
116 // defer any preloads until we've added the CSP token to the document and
117 // applied the Content Security Policy.
118 kSeen = 0,
119 // Indicates that there is no meta CSP token in the upcoming chunk.
120 kNotSeen = 1,
121 // Indicates that we've added the CSP token to the document and we can now
122 // fetch preloads.
123 kProcessed = 2,
124 // Indicates that it's too late to apply a Content-Security policy (because
125 // we've exited the header section.)
126 kUnenforceable = 3,
127 };
128
HTMLDocumentParserState(ParserSynchronizationPolicy mode)129 explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
130 : state_(DeferredParserState::kNotScheduled),
131 meta_csp_state_(MetaCSPTokenState::kNotSeen),
132 mode_(mode),
133 end_if_delayed_forbidden_(0),
134 should_complete_(0) {}
135
Trace(Visitor * v) const136 void Trace(Visitor* v) const {}
137
SetState(DeferredParserState state)138 void SetState(DeferredParserState state) {
139 DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
140 state_ = state;
141 }
GetState() const142 DeferredParserState GetState() const { return state_; }
143
IsScheduled() const144 bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
GetStateAsString() const145 const char* GetStateAsString() const {
146 switch (state_) {
147 case DeferredParserState::kNotScheduled:
148 return "not_scheduled";
149 case DeferredParserState::kScheduled:
150 return "scheduled";
151 case DeferredParserState::kScheduledWithEndIfDelayed:
152 return "scheduled_with_end_if_delayed";
153 }
154 }
155
ShouldEndIfDelayed() const156 bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
ShouldComplete() const157 bool ShouldComplete() const {
158 return should_complete_ || GetMode() != kAllowDeferredParsing;
159 }
IsSynchronous() const160 bool IsSynchronous() const {
161 return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
162 }
GetMode() const163 ParserSynchronizationPolicy GetMode() const { return mode_; }
164
SetSeenCSPMetaTag(const bool seen)165 void SetSeenCSPMetaTag(const bool seen) {
166 if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
167 return;
168 if (seen)
169 meta_csp_state_ = MetaCSPTokenState::kSeen;
170 else
171 meta_csp_state_ = MetaCSPTokenState::kNotSeen;
172 }
173
SetExitedHeader()174 void SetExitedHeader() {
175 meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
176 }
HaveExitedHeader() const177 bool HaveExitedHeader() const {
178 return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
179 }
180
181 private:
EnterEndIfDelayedForbidden()182 void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
ExitEndIfDelayedForbidden()183 void ExitEndIfDelayedForbidden() {
184 end_if_delayed_forbidden_--;
185 DCHECK_GE(end_if_delayed_forbidden_, 0);
186 }
187
EnterShouldComplete()188 void EnterShouldComplete() { should_complete_++; }
ExitShouldComplete()189 void ExitShouldComplete() {
190 should_complete_--;
191 DCHECK_GE(should_complete_, 0);
192 }
193
194 DeferredParserState state_;
195 MetaCSPTokenState meta_csp_state_;
196 ParserSynchronizationPolicy mode_;
197 int end_if_delayed_forbidden_;
198 int should_complete_;
199 };
200
201 class EndIfDelayedForbiddenScope {
202 STACK_ALLOCATED();
203
204 public:
EndIfDelayedForbiddenScope(HTMLDocumentParserState * state)205 explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
206 : state_(state) {
207 state_->EnterEndIfDelayedForbidden();
208 }
~EndIfDelayedForbiddenScope()209 ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }
210
211 private:
212 HTMLDocumentParserState* state_;
213 };
214
215 class ShouldCompleteScope {
216 STACK_ALLOCATED();
217
218 public:
ShouldCompleteScope(HTMLDocumentParserState * state)219 explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
220 state_->EnterShouldComplete();
221 }
~ShouldCompleteScope()222 ~ShouldCompleteScope() { state_->ExitShouldComplete(); }
223
224 private:
225 HTMLDocumentParserState* state_;
226 };
227
228 // This is a direct transcription of step 4 from:
229 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
TokenizerStateForContextElement(Element * context_element,bool report_errors,const HTMLParserOptions & options)230 static HTMLTokenizer::State TokenizerStateForContextElement(
231 Element* context_element,
232 bool report_errors,
233 const HTMLParserOptions& options) {
234 if (!context_element)
235 return HTMLTokenizer::kDataState;
236
237 const QualifiedName& context_tag = context_element->TagQName();
238
239 if (context_tag.Matches(html_names::kTitleTag) ||
240 context_tag.Matches(html_names::kTextareaTag))
241 return HTMLTokenizer::kRCDATAState;
242 if (context_tag.Matches(html_names::kStyleTag) ||
243 context_tag.Matches(html_names::kXmpTag) ||
244 context_tag.Matches(html_names::kIFrameTag) ||
245 context_tag.Matches(html_names::kNoembedTag) ||
246 (context_tag.Matches(html_names::kNoscriptTag) &&
247 options.scripting_flag) ||
248 context_tag.Matches(html_names::kNoframesTag))
249 return report_errors ? HTMLTokenizer::kRAWTEXTState
250 : HTMLTokenizer::kPLAINTEXTState;
251 if (context_tag.Matches(html_names::kScriptTag))
252 return report_errors ? HTMLTokenizer::kScriptDataState
253 : HTMLTokenizer::kPLAINTEXTState;
254 if (context_tag.Matches(html_names::kPlaintextTag))
255 return HTMLTokenizer::kPLAINTEXTState;
256 return HTMLTokenizer::kDataState;
257 }
258
259 class ScopedYieldTimer {
260 public:
261 // This object is created at the start of a block of parsing, and will
262 // report the time since the last block yielded if known.
ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer> * timer,HTMLParserMetrics * metrics_reporter)263 ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
264 HTMLParserMetrics* metrics_reporter)
265 : timer_(timer), reporting_metrics_(metrics_reporter) {
266 if (!reporting_metrics_ || !(*timer_))
267 return;
268
269 metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
270 timer_->reset();
271 }
272
273 // The destructor creates a new timer, which will keep track of time until
274 // the next block starts.
~ScopedYieldTimer()275 ~ScopedYieldTimer() {
276 if (reporting_metrics_)
277 *timer_ = std::make_unique<base::ElapsedTimer>();
278 }
279
280 private:
281 std::unique_ptr<base::ElapsedTimer>* timer_;
282 bool reporting_metrics_;
283 };
284
HTMLDocumentParser(HTMLDocument & document,ParserSynchronizationPolicy sync_policy)285 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
286 ParserSynchronizationPolicy sync_policy)
287 : HTMLDocumentParser(document, kAllowScriptingContent, sync_policy) {
288 script_runner_ =
289 HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);
290
291 // Allow declarative shadow DOM for the document parser, if not explicitly
292 // disabled.
293 bool allow_shadow_root = document.GetDeclarativeShadowRootAllowState() !=
294 Document::DeclarativeShadowRootAllowState::kDeny;
295 tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
296 this, document, kAllowScriptingContent, options_, allow_shadow_root);
297 }
298
HTMLDocumentParser(DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy)299 HTMLDocumentParser::HTMLDocumentParser(
300 DocumentFragment* fragment,
301 Element* context_element,
302 ParserContentPolicy parser_content_policy)
303 : HTMLDocumentParser(fragment->GetDocument(),
304 parser_content_policy,
305 kForceSynchronousParsing) {
306 // Allow declarative shadow DOM for the fragment parser only if explicitly
307 // enabled.
308 bool allow_shadow_root =
309 fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
310 Document::DeclarativeShadowRootAllowState::kAllow;
311
312 // No script_runner_ in fragment parser.
313 tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
314 this, fragment, context_element, parser_content_policy, options_,
315 allow_shadow_root);
316
317 // For now document fragment parsing never reports errors.
318 bool report_errors = false;
319 tokenizer_->SetState(TokenizerStateForContextElement(
320 context_element, report_errors, options_));
321 }
322
323 namespace {
GetMaxTokenizationBudget()324 int GetMaxTokenizationBudget() {
325 static int max = base::GetFieldTrialParamByFeatureAsInt(
326 features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget",
327 kDefaultMaxTokenizationBudget);
328 return max;
329 }
330 } // namespace
331
HTMLDocumentParser(Document & document,ParserContentPolicy content_policy,ParserSynchronizationPolicy sync_policy)332 HTMLDocumentParser::HTMLDocumentParser(Document& document,
333 ParserContentPolicy content_policy,
334 ParserSynchronizationPolicy sync_policy)
335 : ScriptableDocumentParser(document, content_policy),
336 options_(&document),
337 reentry_permit_(HTMLParserReentryPermit::Create()),
338 token_(sync_policy != kAllowAsynchronousParsing
339 ? std::make_unique<HTMLToken>()
340 : nullptr),
341 tokenizer_(sync_policy != kAllowAsynchronousParsing
342 ? std::make_unique<HTMLTokenizer>(options_)
343 : nullptr),
344 loading_task_runner_(sync_policy == kForceSynchronousParsing
345 ? nullptr
346 : document.GetTaskRunner(TaskType::kNetworking)),
347 parser_scheduler_(sync_policy == kAllowAsynchronousParsing
348 ? MakeGarbageCollected<HTMLParserScheduler>(
349 this,
350 loading_task_runner_.get())
351 : nullptr),
352 task_runner_state_(
353 MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
354 pending_csp_meta_token_(nullptr),
355 can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing),
356 end_was_delayed_(false),
357 have_background_parser_(false),
358 pump_session_nesting_level_(0),
359 pump_speculations_session_nesting_level_(0),
360 is_parsing_at_line_number_(false),
361 tried_loading_link_headers_(false),
362 added_pending_parser_blocking_stylesheet_(false),
363 is_waiting_for_stylesheets_(false),
364 scheduler_(sync_policy == kAllowDeferredParsing
365 ? Thread::Current()->Scheduler()
366 : nullptr) {
367 DCHECK(CanParseAsynchronously() || (token_ && tokenizer_));
368 // Asynchronous parsing is not allowed in prefetch mode.
369 DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously());
370
371 // It is permissible to request the background HTML parser whilst also using
372 // --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually
373 // unintentional. To help flush out these cases, trigger a DCHECK.
374 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() ||
375 !CanParseAsynchronously());
376
377 // Report metrics for async document parsing only. The document
378 // must be main frame to meet UKM requirements, and must have a high
379 // resolution clock for high quality data.
380 if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() &&
381 document.GetFrame()->IsMainFrame() &&
382 base::TimeTicks::IsHighResolution()) {
383 metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
384 document.UkmSourceID(), document.UkmRecorder());
385 }
386
387 max_tokenization_budget_ = GetMaxTokenizationBudget();
388
389 // Don't create preloader for parsing clipboard content.
390 if (content_policy == kDisallowScriptingAndPluginContent)
391 return;
392
393 // Create preloader only when the document is:
394 // - attached to a frame (likely the prefetched resources will be loaded
395 // soon),
396 // - a HTML import document (blocks rendering and also resources will be
397 // loaded soon), or
398 // - is for no-state prefetch (made specifically for running preloader).
399 if (!document.GetFrame() && !document.IsHTMLImport() &&
400 !document.IsPrefetchOnly())
401 return;
402
403 preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
404 }
405
406 HTMLDocumentParser::~HTMLDocumentParser() = default;
407
Dispose()408 void HTMLDocumentParser::Dispose() {
409 // In Oilpan, HTMLDocumentParser can die together with Document, and detach()
410 // is not called in this case.
411 if (have_background_parser_)
412 StopBackgroundParser();
413 }
414
Trace(Visitor * visitor) const415 void HTMLDocumentParser::Trace(Visitor* visitor) const {
416 visitor->Trace(tree_builder_);
417 visitor->Trace(parser_scheduler_);
418 visitor->Trace(script_runner_);
419 visitor->Trace(preloader_);
420 visitor->Trace(task_runner_state_);
421 ScriptableDocumentParser::Trace(visitor);
422 HTMLParserScriptRunnerHost::Trace(visitor);
423 }
424
HasPendingWorkScheduledForTesting() const425 bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
426 return task_runner_state_->IsScheduled();
427 }
428
Detach()429 void HTMLDocumentParser::Detach() {
430 if (have_background_parser_)
431 StopBackgroundParser();
432 // Deschedule any pending tokenizer pumps.
433 task_runner_state_->SetState(
434 HTMLDocumentParserState::DeferredParserState::kNotScheduled);
435 DocumentParser::Detach();
436 if (script_runner_)
437 script_runner_->Detach();
438 tree_builder_->Detach();
439 // FIXME: It seems wrong that we would have a preload scanner here. Yet during
440 // fast/dom/HTMLScriptElement/script-load-events.html we do.
441 preload_scanner_.reset();
442 insertion_preload_scanner_.reset();
443 if (parser_scheduler_) {
444 parser_scheduler_->Detach();
445 parser_scheduler_.Clear();
446 }
447 // Oilpan: It is important to clear token_ to deallocate backing memory of
448 // HTMLToken::data_ and let the allocator reuse the memory for
449 // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
450 // tokenizer_ first because tokenizer_ has a raw pointer to token_.
451 tokenizer_.reset();
452 token_.reset();
453 }
454
StopParsing()455 void HTMLDocumentParser::StopParsing() {
456 DocumentParser::StopParsing();
457 if (parser_scheduler_) {
458 parser_scheduler_->Detach();
459 parser_scheduler_.Clear();
460 }
461 task_runner_state_->SetState(
462 HTMLDocumentParserState::DeferredParserState::kNotScheduled);
463 if (have_background_parser_)
464 StopBackgroundParser();
465 }
466
467 // This kicks off "Once the user agent stops parsing" as described by:
468 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
PrepareToStopParsing()469 void HTMLDocumentParser::PrepareToStopParsing() {
470 TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
471 (void*)this);
472 // FIXME: It may not be correct to disable this for the background parser.
473 // That means hasInsertionPoint() may not be correct in some cases.
474 DCHECK(!HasInsertionPoint() || have_background_parser_);
475
476 // NOTE: This pump should only ever emit buffered character tokens.
477 if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) {
478 DCHECK(!have_background_parser_);
479 ShouldCompleteScope should_complete(task_runner_state_);
480 EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
481 PumpTokenizerIfPossible();
482 }
483
484 if (IsStopped())
485 return;
486
487 DocumentParser::PrepareToStopParsing();
488
489 // We will not have a scriptRunner when parsing a DocumentFragment.
490 if (script_runner_)
491 GetDocument()->SetReadyState(Document::kInteractive);
492
493 // Setting the ready state above can fire mutation event and detach us from
494 // underneath. In that case, just bail out.
495 if (IsDetached())
496 return;
497
498 if (script_runner_)
499 script_runner_->RecordMetricsAtParseEnd();
500
501 AttemptToRunDeferredScriptsAndEnd();
502 }
503
IsParsingFragment() const504 bool HTMLDocumentParser::IsParsingFragment() const {
505 return tree_builder_->IsParsingFragment();
506 }
507
DeferredPumpTokenizerIfPossible()508 void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
509 // This method is called asynchronously, continues building the HTML document.
510 // This function should only be called when
511 // --enable-blink-features=ForceSynchronousHTMLParsing is available.
512 DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
513 // If we're scheduled for a tokenizer pump, then document should be attached
514 // and the parser should not be stopped, but sometimes a script completes
515 // loading (so we schedule a pump) but the Document is stopped in the meantime
516 // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
517 DCHECK(task_runner_state_->GetState() ==
518 HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
519 !IsDetached());
520 TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
521 "parser", (void*)this, "state",
522 task_runner_state_->GetStateAsString());
523 bool should_call_delay_end =
524 task_runner_state_->GetState() ==
525 HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
526 if (task_runner_state_->IsScheduled()) {
527 task_runner_state_->SetState(
528 HTMLDocumentParserState::DeferredParserState::kNotScheduled);
529 if (should_call_delay_end) {
530 EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
531 PumpTokenizerIfPossible();
532 EndIfDelayed();
533 } else {
534 PumpTokenizerIfPossible();
535 }
536 }
537 }
538
PumpTokenizerIfPossible()539 void HTMLDocumentParser::PumpTokenizerIfPossible() {
540 // This method is called synchronously, builds the HTML document up to
541 // the current budget, and optionally completes.
542 TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
543 (void*)this);
544
545 bool yielded = false;
546 CheckIfBlockingStylesheetAdded();
547 if (!IsStopped() &&
548 (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
549 yielded = PumpTokenizer();
550 }
551
552 if (yielded) {
553 DCHECK(!task_runner_state_->ShouldComplete());
554 SchedulePumpTokenizer();
555 } else if (task_runner_state_->ShouldEndIfDelayed()) {
556 // If we did not exceed the budget or parsed everything there was to
557 // parse, check if we should complete the document.
558 if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
559 EndIfDelayed();
560 } else {
561 ScheduleEndIfDelayed();
562 }
563 }
564 }
565
IsScheduledForUnpause() const566 bool HTMLDocumentParser::IsScheduledForUnpause() const {
567 return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause();
568 }
569
570 // Used by HTMLParserScheduler
ResumeParsingAfterYield()571 void HTMLDocumentParser::ResumeParsingAfterYield() {
572 DCHECK(CanParseAsynchronously());
573 DCHECK(have_background_parser_);
574 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
575
576 ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get());
577
578 CheckIfBlockingStylesheetAdded();
579 if (IsStopped() || IsPaused())
580 return;
581
582 PumpPendingSpeculations();
583 }
584
RunScriptsForPausedTreeBuilder()585 void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
586 TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
587 "parser", (void*)this);
588 DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));
589
590 TextPosition script_start_position = TextPosition::BelowRangePosition();
591 Element* script_element =
592 tree_builder_->TakeScriptToProcess(script_start_position);
593 // We will not have a scriptRunner when parsing a DocumentFragment.
594 if (script_runner_)
595 script_runner_->ProcessScriptElement(script_element, script_start_position);
596 CheckIfBlockingStylesheetAdded();
597 }
598
CanTakeNextToken()599 bool HTMLDocumentParser::CanTakeNextToken() {
600 if (IsStopped())
601 return false;
602
603 // If we're paused waiting for a script, we try to execute scripts before
604 // continuing.
605 if (tree_builder_->HasParserBlockingScript())
606 RunScriptsForPausedTreeBuilder();
607 if (IsStopped() || IsPaused())
608 return false;
609 return true;
610 }
611
EnqueueTokenizedChunk(std::unique_ptr<TokenizedChunk> chunk)612 void HTMLDocumentParser::EnqueueTokenizedChunk(
613 std::unique_ptr<TokenizedChunk> chunk) {
614 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
615 TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk");
616
617 DCHECK(chunk);
618 DCHECK(GetDocument());
619
620 if (!IsParsing())
621 return;
622
623 // ApplicationCache needs to be initialized before issuing preloads. We
624 // suspend preload until HTMLHTMLElement is inserted and ApplicationCache is
625 // initialized. Note: link rel preloads don't follow this policy per the spec.
626 // These directives should initiate a fetch as fast as possible.
627 if (!tried_loading_link_headers_ && GetDocument()->Loader()) {
628 // Note that on commit, the loader dispatched preloads for all the non-media
629 // links.
630 GetDocument()->Loader()->DispatchLinkHeaderPreloads(
631 base::OptionalOrNullptr(chunk->viewport),
632 PreloadHelper::kOnlyLoadMedia);
633 tried_loading_link_headers_ = true;
634 if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
635 // Link header preloads for prefetched signed exchanges won't be started
636 // until StartPrefetchedLinkHeaderPreloads() is called. See the header
637 // comment of PrefetchedSignedExchangeManager.
638 GetDocument()
639 ->Loader()
640 ->GetPrefetchedSignedExchangeManager()
641 ->StartPrefetchedLinkHeaderPreloads();
642 }
643 }
644
645 // Defer preloads if any of the chunks contains a <meta> csp tag.
646 if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) {
647 pending_csp_meta_token_ =
648 &chunk->tokens.at(chunk->pending_csp_meta_token_index);
649 }
650
651 if (preloader_) {
652 bool appcache_fetched = false;
653 if (GetDocument()->Loader()) {
654 appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() !=
655 mojom::blink::kAppCacheNoCacheId);
656 }
657 bool appcache_initialized = GetDocument()->documentElement();
658 // Delay sending some requests if meta tag based CSP is present or
659 // if AppCache was used to fetch the HTML but was not yet initialized for
660 // this document.
661 if (pending_csp_meta_token_ ||
662 ((!base::FeatureList::IsEnabled(
663 blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) ||
664 appcache_fetched) &&
665 !appcache_initialized)) {
666 PreloadRequestStream link_rel_preloads;
667 for (auto& request : chunk->preloads) {
668 // Link rel preloads don't need to wait for AppCache but they
669 // should probably wait for CSP.
670 if (!pending_csp_meta_token_ && request->IsLinkRelPreload())
671 link_rel_preloads.push_back(std::move(request));
672 else
673 queued_preloads_.push_back(std::move(request));
674 }
675 preloader_->TakeAndPreload(link_rel_preloads);
676 } else {
677 // We can safely assume that there are no queued preloads request after
678 // the document element is available, as we empty the queue immediately
679 // after the document element is created in documentElementAvailable().
680 DCHECK(queued_preloads_.IsEmpty());
681 preloader_->TakeAndPreload(chunk->preloads);
682 }
683 }
684
685 speculations_.push_back(std::move(chunk));
686
687 if (!IsPaused() && !IsScheduledForUnpause())
688 parser_scheduler_->ScheduleForUnpause();
689 }
690
DidReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData & data)691 void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser(
692 const DocumentEncodingData& data) {
693 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
694 GetDocument()->SetEncodingData(data);
695 }
696
ValidateSpeculations(std::unique_ptr<TokenizedChunk> chunk)697 void HTMLDocumentParser::ValidateSpeculations(
698 std::unique_ptr<TokenizedChunk> chunk) {
699 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
700 DCHECK(chunk);
701 // TODO(kouhei): We should simplify codepath here by disallowing
702 // ValidateSpeculations
703 // while IsPaused, and last_chunk_before_pause_ can simply be
704 // pushed to speculations_.
705 if (IsPaused()) {
706 // We're waiting on a network script or stylesheet, just save the chunk,
707 // we'll get a second ValidateSpeculations call after the script or
708 // stylesheet completes. This call should have been made immediately after
709 // RunScriptsForPausedTreeBuilder in the script case which may have started
710 // a network load and left us waiting.
711 DCHECK(!last_chunk_before_pause_);
712 last_chunk_before_pause_ = std::move(chunk);
713 return;
714 }
715
716 DCHECK(!last_chunk_before_pause_);
717 std::unique_ptr<HTMLTokenizer> tokenizer = std::move(tokenizer_);
718 std::unique_ptr<HTMLToken> token = std::move(token_);
719
720 if (!tokenizer) {
721 // There must not have been any changes to the HTMLTokenizer state on the
722 // main thread, which means the speculation buffer is correct.
723 return;
724 }
725
726 // Currently we're only smart enough to reuse the speculation buffer if the
727 // tokenizer both starts and ends in the DataState. That state is simplest
728 // because the HTMLToken is always in the Uninitialized state. We should
729 // consider whether we can reuse the speculation buffer in other states, but
730 // we'd likely need to do something more sophisticated with the HTMLToken.
731 if (chunk->tokenizer_state == HTMLTokenizer::kDataState &&
732 tokenizer->GetState() == HTMLTokenizer::kDataState &&
733 input_.Current().IsEmpty() &&
734 chunk->tree_builder_state ==
735 HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) {
736 DCHECK(token->IsUninitialized());
737 return;
738 }
739
740 DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token),
741 std::move(tokenizer));
742 }
743
DiscardSpeculationsAndResumeFrom(std::unique_ptr<TokenizedChunk> last_chunk_before_script,std::unique_ptr<HTMLToken> token,std::unique_ptr<HTMLTokenizer> tokenizer)744 void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom(
745 std::unique_ptr<TokenizedChunk> last_chunk_before_script,
746 std::unique_ptr<HTMLToken> token,
747 std::unique_ptr<HTMLTokenizer> tokenizer) {
748 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
749 // Clear back ref.
750 background_parser_->ClearParser();
751
752 size_t discarded_token_count = 0;
753 for (const auto& speculation : speculations_) {
754 discarded_token_count += speculation->tokens.size();
755 }
756 g_discarded_token_count_for_testing += discarded_token_count;
757
758 speculations_.clear();
759 pending_csp_meta_token_ = nullptr;
760 queued_preloads_.clear();
761
762 std::unique_ptr<BackgroundHTMLParser::Checkpoint> checkpoint =
763 std::make_unique<BackgroundHTMLParser::Checkpoint>();
764 checkpoint->parser = this;
765 checkpoint->token = std::move(token);
766 checkpoint->tokenizer = std::move(tokenizer);
767 checkpoint->tree_builder_state =
768 HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get());
769 checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint;
770 checkpoint->preload_scanner_checkpoint =
771 last_chunk_before_script->preload_scanner_checkpoint;
772 checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy();
773 // FIXME: This should be passed in instead of cleared.
774 input_.Current().Clear();
775
776 DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread());
777 loading_task_runner_->PostTask(
778 FROM_HERE,
779 WTF::Bind(&BackgroundHTMLParser::ResumeFrom, background_parser_,
780 WTF::Passed(std::move(checkpoint))));
781 }
782
ProcessTokenizedChunkFromBackgroundParser(std::unique_ptr<TokenizedChunk> pop_chunk,bool * reached_end_of_file)783 size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser(
784 std::unique_ptr<TokenizedChunk> pop_chunk,
785 bool* reached_end_of_file) {
786 TRACE_EVENT_WITH_FLOW0(
787 "blink,loading",
788 "HTMLDocumentParser::processTokenizedChunkFromBackgroundParser",
789 pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN);
790 base::AutoReset<bool> has_line_number(&is_parsing_at_line_number_, true);
791
792 SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1);
793 SECURITY_DCHECK(!InPumpSession());
794 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
795 DCHECK(!IsParsingFragment());
796 DCHECK(!IsPaused());
797 DCHECK(!IsStopped());
798 DCHECK(CanParseAsynchronously());
799 DCHECK(!tokenizer_);
800 DCHECK(!token_);
801 DCHECK(!last_chunk_before_pause_);
802
803 std::unique_ptr<TokenizedChunk> chunk(std::move(pop_chunk));
804 const CompactHTMLTokenStream& tokens = chunk->tokens;
805 size_t element_token_count = 0;
806
807 loading_task_runner_->PostTask(
808 FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint,
809 background_parser_, chunk->input_checkpoint));
810
811 for (const auto& token : tokens) {
812 DCHECK(!IsWaitingForScripts());
813
814 if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag ||
815 token.GetType() == HTMLToken::kEndTag))
816 element_token_count++;
817
818 text_position_ = token.GetTextPosition();
819
820 ConstructTreeFromCompactHTMLToken(token);
821
822 if (IsStopped())
823 break;
824
825 // Preloads were queued if there was a <meta> csp token in a tokenized
826 // chunk.
827 if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) {
828 pending_csp_meta_token_ = nullptr;
829 FetchQueuedPreloads();
830 }
831
832 if (IsPaused()) {
833 // The script or stylesheet should be the last token of this bunch.
834 DCHECK_EQ(&token, &tokens.back());
835 if (IsWaitingForScripts())
836 RunScriptsForPausedTreeBuilder();
837 ValidateSpeculations(std::move(chunk));
838 break;
839 }
840
841 if (token.GetType() == HTMLToken::kEndOfFile) {
842 // The EOF is assumed to be the last token of this bunch.
843 DCHECK_EQ(&token, &tokens.back());
844 // There should never be any chunks after the EOF.
845 DCHECK(speculations_.IsEmpty());
846 PrepareToStopParsing();
847 *reached_end_of_file = true;
848 break;
849 }
850
851 DCHECK(!tokenizer_);
852 DCHECK(!token_);
853 }
854
855 // Make sure all required pending text nodes are emitted before returning.
856 // This leaves "script", "style" and "svg" nodes text nodes intact.
857 if (!IsStopped())
858 tree_builder_->Flush(kFlushIfAtTextLimit);
859
860 is_parsing_at_line_number_ = false;
861
862 return element_token_count;
863 }
864
PumpPendingSpeculations()865 void HTMLDocumentParser::PumpPendingSpeculations() {
866 // If this assert fails, you need to call ValidateSpeculations to make sure
867 // tokenizer_ and token_ don't have state that invalidates speculations_.
868 DCHECK(!tokenizer_);
869 DCHECK(!token_);
870 DCHECK(!last_chunk_before_pause_);
871 DCHECK(!IsPaused());
872 DCHECK(!IsStopped());
873 DCHECK(!IsScheduledForUnpause());
874 DCHECK(!InPumpSession());
875 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
876
877 // FIXME: Here should never be reached when there is a blocking script,
878 // but it happens in unknown scenarios. See https://crbug.com/440901
879 if (IsWaitingForScripts()) {
880 parser_scheduler_->ScheduleForUnpause();
881 return;
882 }
883
884 // Do not allow pumping speculations in nested event loops.
885 if (pump_speculations_session_nesting_level_) {
886 parser_scheduler_->ScheduleForUnpause();
887 return;
888 }
889
890 probe::ParseHTML probe(GetDocument(), this);
891
892 SpeculationsPumpSession session(pump_speculations_session_nesting_level_);
893 bool reached_end_of_file = false;
894 while (!speculations_.IsEmpty()) {
895 DCHECK(!IsScheduledForUnpause());
896 size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser(
897 speculations_.TakeFirst(), &reached_end_of_file);
898 session.AddedElementTokens(element_token_count);
899
900 // Always check IsParsing first as document_ may be null. Surprisingly,
901 // IsScheduledForUnpause() may be set here as a result of
902 // ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript
903 // which invokes nested event loops. (e.g. inspector breakpoints)
904 CheckIfBlockingStylesheetAdded();
905 if (!IsParsing() || IsPaused() || IsScheduledForUnpause())
906 break;
907
908 if (speculations_.IsEmpty() ||
909 parser_scheduler_->YieldIfNeeded(
910 session, speculations_.front()->starting_script))
911 break;
912 }
913
914 if (metrics_reporter_) {
915 metrics_reporter_->AddChunk(session.ElapsedTime(),
916 session.ProcessedElementTokens());
917 if (reached_end_of_file)
918 metrics_reporter_->ReportMetricsAtParseEnd();
919 }
920 }
921
ForcePlaintextForTextDocument()922 void HTMLDocumentParser::ForcePlaintextForTextDocument() {
923 if (CanParseAsynchronously()) {
924 // This method is called before any data is appended, so we have to start
925 // the background parser ourselves.
926 if (!have_background_parser_)
927 StartBackgroundParser();
928
929 // This task should be synchronous, because otherwise synchronous
930 // tokenizing can happen before plaintext is forced.
931 background_parser_->ForcePlaintextForTextDocument();
932 } else
933 tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
934 }
935
PumpTokenizer()936 bool HTMLDocumentParser::PumpTokenizer() {
937 DCHECK(!GetDocument()->IsPrefetchOnly());
938 DCHECK(!IsStopped());
939 DCHECK(tokenizer_);
940 DCHECK(token_);
941
942 PumpSession session(pump_session_nesting_level_);
943
944 // If we're in kForceSynchronousParsing, always run until all available input
945 // is consumed.
946 bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
947 task_runner_state_->IsSynchronous() ||
948 pump_session_nesting_level_ > 1;
949 TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete",
950 should_run_until_completion, "parser", (void*)this);
951
952 // We tell the InspectorInstrumentation about every pump, even if we end up
953 // pumping nothing. It can filter out empty pumps itself.
954 // FIXME: input_.Current().length() is only accurate if we end up parsing the
955 // whole buffer in this pump. We should pass how much we parsed as part of
956 // DidWriteHTML instead of WillWriteHTML.
957 probe::ParseHTML probe(GetDocument(), this);
958
959 bool should_yield = false;
960 int budget = max_tokenization_budget_;
961
962 while (CanTakeNextToken() && !should_yield) {
963 {
964 RUNTIME_CALL_TIMER_SCOPE(
965 V8PerIsolateData::MainThreadIsolate(),
966 RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
967 if (!tokenizer_->NextToken(input_.Current(), Token()))
968 break;
969 budget--;
970 }
971 ConstructTreeFromHTMLToken();
972 if (!should_run_until_completion && !IsPaused()) {
973 DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
974 should_yield = budget <= 0;
975 should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
976 should_yield &= task_runner_state_->HaveExitedHeader();
977 } else {
978 should_yield = false;
979 }
980 DCHECK(IsStopped() || Token().IsUninitialized());
981 }
982
983 if (IsStopped())
984 return false;
985
986 // There should only be PendingText left since the tree-builder always flushes
987 // the task queue before returning. In case that ever changes, crash.
988 tree_builder_->Flush(kFlushAlways);
989 CHECK(!IsStopped());
990
991 if (IsPaused()) {
992 DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);
993
994 if (preloader_) {
995 if (!preload_scanner_) {
996 preload_scanner_ = CreatePreloadScanner(
997 TokenPreloadScanner::ScannerType::kMainDocument);
998 preload_scanner_->AppendToEnd(input_.Current());
999 }
1000 ScanAndPreload(preload_scanner_.get());
1001 }
1002 }
1003
1004 // should_run_until_completion implies that we should not yield
1005 CHECK(!should_run_until_completion || !should_yield);
1006 return should_yield;
1007 }
1008
SchedulePumpTokenizer()1009 void HTMLDocumentParser::SchedulePumpTokenizer() {
1010 TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
1011 DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1012 DCHECK(!IsStopped());
1013 DCHECK(!InPumpSession());
1014 DCHECK(!task_runner_state_->ShouldComplete());
1015 if (task_runner_state_->IsScheduled()) {
1016 // If the parser is already scheduled, there's no need to do anything.
1017 return;
1018 }
1019 loading_task_runner_->PostTask(
1020 FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
1021 WrapPersistent(this)));
1022 task_runner_state_->SetState(
1023 HTMLDocumentParserState::DeferredParserState::kScheduled);
1024 }
1025
ScheduleEndIfDelayed()1026 void HTMLDocumentParser::ScheduleEndIfDelayed() {
1027 TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
1028 DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1029 DCHECK(!IsStopped());
1030 DCHECK(!InPumpSession());
1031 DCHECK(!task_runner_state_->ShouldComplete());
1032
1033 // Schedule a pump callback if needed.
1034 if (!task_runner_state_->IsScheduled()) {
1035 loading_task_runner_->PostTask(
1036 FROM_HERE,
1037 WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
1038 WrapPersistent(this)));
1039 }
1040 // If a pump is already scheduled, it's OK to just upgrade it to one
1041 // which calls EndIfDelayed afterwards.
1042 task_runner_state_->SetState(
1043 HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
1044 }
1045
ConstructTreeFromHTMLToken()1046 void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
1047 DCHECK(!GetDocument()->IsPrefetchOnly());
1048
1049 AtomicHTMLToken atomic_token(Token());
1050
1051 // Check whether we've exited the header.
1052 if (!task_runner_state_->HaveExitedHeader()) {
1053 if (GetDocument()->body()) {
1054 task_runner_state_->SetExitedHeader();
1055 }
1056 }
1057
1058 // We clear the token_ in case ConstructTreeFromAtomicToken
1059 // synchronously re-enters the parser. We don't clear the token immedately
1060 // for kCharacter tokens because the AtomicHTMLToken avoids copying the
1061 // characters by keeping a pointer to the underlying buffer in the
1062 // HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter
1063 // the parser.
1064 //
1065 // FIXME: Stop clearing the token_ once we start running the parser off
1066 // the main thread or once we stop allowing synchronous JavaScript
1067 // execution from ParseAttribute.
1068 if (Token().GetType() != HTMLToken::kCharacter)
1069 Token().Clear();
1070
1071 tree_builder_->ConstructTree(&atomic_token);
1072 CheckIfBlockingStylesheetAdded();
1073
1074 // FIXME: ConstructTree may synchronously cause Document to be detached.
1075 if (!token_)
1076 return;
1077
1078 if (!Token().IsUninitialized()) {
1079 DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter);
1080 Token().Clear();
1081 }
1082 }
1083
ConstructTreeFromCompactHTMLToken(const CompactHTMLToken & compact_token)1084 void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken(
1085 const CompactHTMLToken& compact_token) {
1086 DCHECK(!GetDocument()->IsPrefetchOnly());
1087 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1088 AtomicHTMLToken token(compact_token);
1089 tree_builder_->ConstructTree(&token);
1090 CheckIfBlockingStylesheetAdded();
1091 }
1092
HasInsertionPoint()1093 bool HTMLDocumentParser::HasInsertionPoint() {
1094 // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
1095 // model of the EOF character differs slightly from the one in the spec
1096 // because our treatment is uniform between network-sourced and script-sourced
1097 // input streams whereas the spec treats them differently.
1098 return input_.HasInsertionPoint() ||
1099 (WasCreatedByScript() && !input_.HaveSeenEndOfFile());
1100 }
1101
insert(const String & source)1102 void HTMLDocumentParser::insert(const String& source) {
1103 if (IsStopped())
1104 return;
1105
1106 TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
1107 source.length(), "parser", (void*)this);
1108
1109 if (!tokenizer_) {
1110 DCHECK(!InPumpSession());
1111 DCHECK(have_background_parser_ || WasCreatedByScript());
1112 token_ = std::make_unique<HTMLToken>();
1113 tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1114 }
1115
1116 SegmentedString excluded_line_number_source(source);
1117 excluded_line_number_source.SetExcludeLineNumbers();
1118 input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);
1119
1120 // Pump the the tokenizer to build the document from the given insert point.
1121 // Should process everything available and not defer anything.
1122 ShouldCompleteScope should_complete(task_runner_state_);
1123 EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1124 // Call EndIfDelayed manually at the end to maintain preload behaviour.
1125 PumpTokenizerIfPossible();
1126
1127 if (IsPaused()) {
1128 // Check the document.write() output with a separate preload scanner as
1129 // the main scanner can't deal with insertions.
1130 if (!insertion_preload_scanner_) {
1131 insertion_preload_scanner_ =
1132 CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
1133 }
1134 insertion_preload_scanner_->AppendToEnd(source);
1135 if (preloader_) {
1136 ScanAndPreload(insertion_preload_scanner_.get());
1137 }
1138 }
1139 EndIfDelayed();
1140 }
1141
StartBackgroundParser()1142 void HTMLDocumentParser::StartBackgroundParser() {
1143 TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser");
1144 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1145 DCHECK(!IsStopped());
1146 DCHECK(CanParseAsynchronously());
1147 DCHECK(!have_background_parser_);
1148 DCHECK(GetDocument());
1149 have_background_parser_ = true;
1150
1151 // Make sure that the viewport is up-to-date, so that the correct viewport
1152 // dimensions will be fed to the background parser and preload scanner.
1153 if (GetDocument()->Loader())
1154 GetDocument()->GetStyleEngine().UpdateViewport();
1155
1156 std::unique_ptr<BackgroundHTMLParser::Configuration> config =
1157 std::make_unique<BackgroundHTMLParser::Configuration>();
1158 config->options = options_;
1159 config->parser = this;
1160 config->decoder = TakeDecoder();
1161
1162 // The background parser is created on the main thread, but may otherwise
1163 // only be used from the parser thread.
1164 background_parser_ =
1165 BackgroundHTMLParser::Create(std::move(config), loading_task_runner_);
1166 // TODO(csharrison): This is a hack to initialize MediaValuesCached on the
1167 // correct thread. We should get rid of it.
1168
1169 // TODO(domfarolino): Remove this once Priority Hints is no longer in Origin
1170 // Trial. This currently exists because the TokenPreloadScanner needs to know
1171 // the status of the Priority Hints Origin Trial, and has no way of figuring
1172 // this out on its own. See https://crbug.com/821464.
1173 bool priority_hints_origin_trial_enabled =
1174 RuntimeEnabledFeatures::PriorityHintsEnabled(
1175 GetDocument()->GetExecutionContext());
1176
1177 background_parser_->Init(
1178 GetDocument()->Url(),
1179 std::make_unique<CachedDocumentParameters>(GetDocument()),
1180 MediaValuesCached::MediaValuesCachedData(*GetDocument()),
1181 priority_hints_origin_trial_enabled);
1182 }
1183
StopBackgroundParser()1184 void HTMLDocumentParser::StopBackgroundParser() {
1185 DCHECK(CanParseAsynchronously());
1186 DCHECK(have_background_parser_);
1187 DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1188
1189 have_background_parser_ = false;
1190
1191 // Make this sync, as lsan triggers on some unittests if the task runner is
1192 // used.
1193 background_parser_->Stop();
1194 }
1195
Append(const String & input_source)1196 void HTMLDocumentParser::Append(const String& input_source) {
1197 TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
1198 input_source.length(), "parser", (void*)this);
1199
1200 if (IsStopped())
1201 return;
1202
1203 // We should never reach this point if we're using a parser thread, as
1204 // appendBytes() will directly ship the data to the thread.
1205 DCHECK(!CanParseAsynchronously());
1206
1207 const SegmentedString source(input_source);
1208
1209 if (!preload_scanner_ && GetDocument()->Url().IsValid() &&
1210 (!task_runner_state_->IsSynchronous() ||
1211 GetDocument()->IsPrefetchOnly() || IsPaused())) {
1212 // If we're operating with synchronous, budgeted foreground HTML parsing
1213 // or using the background parser, need to create a preload scanner to
1214 // make sure that parser-blocking Javascript requests are dispatched in
1215 // plenty of time, which prevents unnecessary delays.
1216 // When parsing without a budget (e.g. for HTML fragment parsing), it's
1217 // additional overhead to scan the string unless the parser's already
1218 // paused whilst executing a script.
1219 preload_scanner_ =
1220 CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
1221 }
1222
1223 if (GetDocument()->IsPrefetchOnly()) {
1224 // Do not prefetch if there is an appcache.
1225 if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0)
1226 return;
1227
1228 preload_scanner_->AppendToEnd(source);
1229 ScanAndPreload(preload_scanner_.get());
1230
1231 // Return after the preload scanner, do not actually parse the document.
1232 return;
1233 }
1234 if (preload_scanner_) {
1235 if (input_.Current().IsEmpty() && !IsPaused()) {
1236 // We have parsed until the end of the current input and so are now
1237 // moving ahead of the preload scanner. Clear the scanner so we know to
1238 // scan starting from the current input point if we block again.
1239 preload_scanner_.reset();
1240 } else {
1241 preload_scanner_->AppendToEnd(source);
1242 if (preloader_) {
1243 if (!task_runner_state_->IsSynchronous() || IsPaused()) {
1244 // Should scan and preload if the parser's paused and operating
1245 // synchronously, or if the parser's operating in an asynchronous
1246 // mode.
1247 ScanAndPreload(preload_scanner_.get());
1248 }
1249 }
1250 }
1251 }
1252
1253 input_.AppendToEnd(source);
1254
1255 if (InPumpSession()) {
1256 // We've gotten data off the network in a nested write. We don't want to
1257 // consume any more of the input stream now. Do not worry. We'll consume
1258 // this data in a less-nested write().
1259 return;
1260 }
1261
1262 // Schedule a tokenizer pump to process this new data.
1263 if (task_runner_state_->GetMode() ==
1264 ParserSynchronizationPolicy::kAllowDeferredParsing &&
1265 !task_runner_state_->ShouldComplete()) {
1266 SchedulePumpTokenizer();
1267 } else {
1268 PumpTokenizerIfPossible();
1269 }
1270 }
1271
end()1272 void HTMLDocumentParser::end() {
1273 DCHECK(!IsDetached());
1274 DCHECK(!IsScheduledForUnpause());
1275
1276 if (have_background_parser_)
1277 StopBackgroundParser();
1278
1279 // Informs the the rest of WebCore that parsing is really finished (and
1280 // deletes this).
1281 tree_builder_->Finished();
1282
1283 // All preloads should be done.
1284 preloader_ = nullptr;
1285
1286 DocumentParser::StopParsing();
1287 }
1288
AttemptToRunDeferredScriptsAndEnd()1289 void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
1290 DCHECK(IsStopping());
1291 // FIXME: It may not be correct to disable this for the background parser.
1292 // That means hasInsertionPoint() may not be correct in some cases.
1293 DCHECK(!HasInsertionPoint() || have_background_parser_);
1294 if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
1295 return;
1296 end();
1297 }
1298
ShouldDelayEnd() const1299 bool HTMLDocumentParser::ShouldDelayEnd() const {
1300 return InPumpSession() || IsPaused() || IsExecutingScript() ||
1301 task_runner_state_->IsScheduled();
1302 }
1303
AttemptToEnd()1304 void HTMLDocumentParser::AttemptToEnd() {
1305 // finish() indicates we will not receive any more data. If we are waiting on
1306 // an external script to load, we can't finish parsing quite yet.
1307 TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
1308 (void*)this);
1309
1310 if (ShouldDelayEnd()) {
1311 end_was_delayed_ = true;
1312 return;
1313 }
1314 PrepareToStopParsing();
1315 }
1316
EndIfDelayed()1317 void HTMLDocumentParser::EndIfDelayed() {
1318 TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
1319 (void*)this);
1320 ShouldCompleteScope should_complete(task_runner_state_);
1321 EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1322 // If we've already been detached, don't bother ending.
1323 if (IsDetached())
1324 return;
1325
1326 if (!end_was_delayed_ || ShouldDelayEnd())
1327 return;
1328
1329 end_was_delayed_ = false;
1330 PrepareToStopParsing();
1331 }
1332
Finish()1333 void HTMLDocumentParser::Finish() {
1334 // FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes
1335 // sense to call any methods on DocumentParser once it's been stopped.
1336 // However, FrameLoader::Stop calls DocumentParser::Finish unconditionally.
1337
1338 ShouldCompleteScope should_complete(task_runner_state_);
1339 EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1340 Flush();
1341 if (IsDetached())
1342 return;
1343
1344 // Empty documents never got an append() call, and thus have never started a
1345 // background parser. In those cases, we ignore CanParseAsynchronously() and
1346 // fall through to the synchronous case.
1347 if (have_background_parser_) {
1348 if (!input_.HaveSeenEndOfFile())
1349 input_.CloseWithoutMarkingEndOfFile();
1350 loading_task_runner_->PostTask(
1351 FROM_HERE,
1352 WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_));
1353 return;
1354 }
1355
1356 if (!tokenizer_) {
1357 DCHECK(!token_);
1358 // We're finishing before receiving any data. Rather than booting up the
1359 // background parser just to spin it down, we finish parsing synchronously.
1360 token_ = std::make_unique<HTMLToken>();
1361 tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1362 }
1363
1364 // We're not going to get any more data off the network, so we tell the input
1365 // stream we've reached the end of file. finish() can be called more than
1366 // once, if the first time does not call end().
1367 if (!input_.HaveSeenEndOfFile())
1368 input_.MarkEndOfFile();
1369
1370 if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
1371 // If there's any deferred work remaining, synchronously pump the tokenizer
1372 // one last time to make sure that everything's added to the document.
1373 PumpTokenizerIfPossible();
1374 }
1375
1376 AttemptToEnd();
1377 }
1378
IsExecutingScript() const1379 bool HTMLDocumentParser::IsExecutingScript() const {
1380 if (!script_runner_)
1381 return false;
1382 return script_runner_->IsExecutingScript();
1383 }
1384
IsParsingAtLineNumber() const1385 bool HTMLDocumentParser::IsParsingAtLineNumber() const {
1386 if (CanParseAsynchronously()) {
1387 return is_parsing_at_line_number_ &&
1388 ScriptableDocumentParser::IsParsingAtLineNumber();
1389 }
1390 return ScriptableDocumentParser::IsParsingAtLineNumber();
1391 }
1392
LineNumber() const1393 OrdinalNumber HTMLDocumentParser::LineNumber() const {
1394 if (have_background_parser_)
1395 return text_position_.line_;
1396
1397 return input_.Current().CurrentLine();
1398 }
1399
GetTextPosition() const1400 TextPosition HTMLDocumentParser::GetTextPosition() const {
1401 if (have_background_parser_)
1402 return text_position_;
1403
1404 const SegmentedString& current_string = input_.Current();
1405 OrdinalNumber line = current_string.CurrentLine();
1406 OrdinalNumber column = current_string.CurrentColumn();
1407
1408 return TextPosition(line, column);
1409 }
1410
IsWaitingForScripts() const1411 bool HTMLDocumentParser::IsWaitingForScripts() const {
1412 // When the TreeBuilder encounters a </script> tag, it returns to the
1413 // HTMLDocumentParser where the script is transfered from the treebuilder to
1414 // the script runner. The script runner will hold the script until its loaded
1415 // and run. During any of this time, we want to count ourselves as "waiting
1416 // for a script" and thus run the preload scanner, as well as delay completion
1417 // of parsing.
1418 bool tree_builder_has_blocking_script =
1419 tree_builder_->HasParserBlockingScript();
1420 bool script_runner_has_blocking_script =
1421 script_runner_ && script_runner_->HasParserBlockingScript();
1422 // Since the parser is paused while a script runner has a blocking script, it
1423 // should never be possible to end up with both objects holding a blocking
1424 // script.
1425 DCHECK(
1426 !(tree_builder_has_blocking_script && script_runner_has_blocking_script));
1427 // If either object has a blocking script, the parser should be paused.
1428 return tree_builder_has_blocking_script ||
1429 script_runner_has_blocking_script ||
1430 reentry_permit_->ParserPauseFlag();
1431 }
1432
ResumeParsingAfterPause()1433 void HTMLDocumentParser::ResumeParsingAfterPause() {
1434 // This function runs after a parser-blocking script has completed. There are
1435 // four possible cases:
1436 // 1) Parsing with kForceSynchronousParsing, where there is no background
1437 // parser and a tokenizer_'s defined.
1438 // 2) Parsing with kAllowAsynchronousParsing, without a background parser. In
1439 // this case, the document is usually being completed or parsing has
1440 // otherwise stopped.
1441 // 3) Parsing with kAllowAsynchronousParsing with a background parser. In this
1442 // case, need to add any pending speculations to the document.
1443 // 4) Parsing with kAllowDeferredParsing, with a tokenizer_.
1444 TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
1445 (void*)this);
1446 DCHECK(!IsExecutingScript());
1447 DCHECK(!IsPaused());
1448
1449 CheckIfBlockingStylesheetAdded();
1450 if (IsStopped() || IsPaused())
1451 return;
1452
1453 if (have_background_parser_) { // Case 3)
1454 // If we paused in the middle of processing a token chunk,
1455 // deal with that before starting to pump.
1456 if (last_chunk_before_pause_) {
1457 ValidateSpeculations(std::move(last_chunk_before_pause_));
1458 DCHECK(!last_chunk_before_pause_);
1459 PumpPendingSpeculations();
1460 } else if (!IsScheduledForUnpause()) {
1461 // Otherwise, start pumping if we're not already scheduled to unpause
1462 // already.
1463 PumpPendingSpeculations();
1464 }
1465 return;
1466 }
1467
1468 insertion_preload_scanner_.reset();
1469 if (tokenizer_) {
1470 // Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing.
1471 // kForceSynchronousParsing must pump the tokenizer synchronously,
1472 // otherwise it can be deferred.
1473 if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
1474 !task_runner_state_->ShouldComplete() && !InPumpSession()) {
1475 SchedulePumpTokenizer();
1476 } else {
1477 ShouldCompleteScope should_complete(task_runner_state_);
1478 PumpTokenizerIfPossible();
1479 }
1480 } else {
1481 // Case 2): kAllowAsynchronousParsing, no background parser available
1482 // (indicating possible Document shutdown).
1483 EndIfDelayed();
1484 }
1485 }
1486
AppendCurrentInputStreamToPreloadScannerAndScan()1487 void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
1488 TRACE_EVENT1(
1489 "blink",
1490 "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
1491 "parser", (void*)this);
1492 DCHECK(preload_scanner_);
1493 DCHECK(preloader_);
1494 preload_scanner_->AppendToEnd(input_.Current());
1495 ScanAndPreload(preload_scanner_.get());
1496 }
1497
NotifyScriptLoaded()1498 void HTMLDocumentParser::NotifyScriptLoaded() {
1499 TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
1500 (void*)this);
1501 DCHECK(script_runner_);
1502 DCHECK(!IsExecutingScript());
1503
1504 scheduler::CooperativeSchedulingManager::AllowedStackScope
1505 allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());
1506
1507 if (IsStopped()) {
1508 return;
1509 }
1510
1511 if (IsStopping()) {
1512 AttemptToRunDeferredScriptsAndEnd();
1513 return;
1514 }
1515
1516 script_runner_->ExecuteScriptsWaitingForLoad();
1517 if (!IsPaused())
1518 ResumeParsingAfterPause();
1519 }
1520
ExecuteScriptsWaitingForResources()1521 void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
1522 TRACE_EVENT0("blink",
1523 "HTMLDocumentParser::ExecuteScriptsWaitingForResources");
1524 if (IsStopped())
1525 return;
1526
1527 DCHECK(GetDocument()->IsScriptExecutionReady());
1528
1529 if (is_waiting_for_stylesheets_)
1530 is_waiting_for_stylesheets_ = false;
1531
1532 // Document only calls this when the Document owns the DocumentParser so this
1533 // will not be called in the DocumentFragment case.
1534 DCHECK(script_runner_);
1535 script_runner_->ExecuteScriptsWaitingForResources();
1536 if (!IsPaused())
1537 ResumeParsingAfterPause();
1538 }
1539
DidAddPendingParserBlockingStylesheet()1540 void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
1541 // In-body CSS doesn't block painting. The parser needs to pause so that
1542 // the DOM doesn't include any elements that may depend on the CSS for style.
1543 // The stylesheet can be added and removed during the parsing of a single
1544 // token so don't actually set the bit to block parsing here, just track
1545 // the state of the added sheet in case it does persist beyond a single
1546 // token.
1547 added_pending_parser_blocking_stylesheet_ = true;
1548 }
1549
DidLoadAllPendingParserBlockingStylesheets()1550 void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
1551 // Just toggle the stylesheet flag here (mostly for synchronous sheets).
1552 // The document will also call into executeScriptsWaitingForResources
1553 // which is when the parser will re-start, otherwise it will attempt to
1554 // resume twice which could cause state machine issues.
1555 added_pending_parser_blocking_stylesheet_ = false;
1556 }
1557
CheckIfBlockingStylesheetAdded()1558 void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
1559 if (added_pending_parser_blocking_stylesheet_) {
1560 added_pending_parser_blocking_stylesheet_ = false;
1561 is_waiting_for_stylesheets_ = true;
1562 }
1563 }
1564
ParseDocumentFragment(const String & source,DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy)1565 void HTMLDocumentParser::ParseDocumentFragment(
1566 const String& source,
1567 DocumentFragment* fragment,
1568 Element* context_element,
1569 ParserContentPolicy parser_content_policy) {
1570 auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
1571 fragment, context_element, parser_content_policy);
1572 parser->Append(source);
1573 parser->Finish();
1574 // Allows ~DocumentParser to assert it was detached before destruction.
1575 parser->Detach();
1576 }
1577
AppendBytes(const char * data,size_t length)1578 void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
1579 TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
1580 (unsigned)length, "parser", (void*)this);
1581
1582 DCHECK(Thread::MainThread()->IsCurrentThread());
1583
1584 if (!length || IsStopped())
1585 return;
1586
1587 if (CanParseAsynchronously()) {
1588 if (!have_background_parser_)
1589 StartBackgroundParser();
1590
1591 std::unique_ptr<Vector<char>> buffer =
1592 std::make_unique<Vector<char>>(length);
1593 memcpy(buffer->data(), data, length);
1594
1595 loading_task_runner_->PostTask(
1596 FROM_HERE,
1597 WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread,
1598 background_parser_, WTF::Passed(std::move(buffer))));
1599 return;
1600 }
1601
1602 DecodedDataDocumentParser::AppendBytes(data, length);
1603 }
1604
Flush()1605 void HTMLDocumentParser::Flush() {
1606 TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
1607 // If we've got no decoder, we never received any data.
1608 if (IsDetached() || NeedsDecoder())
1609 return;
1610
1611 if (CanParseAsynchronously()) {
1612 // In some cases, flush() is called without any invocation of appendBytes.
1613 // Fallback to synchronous parsing in that case.
1614 if (!have_background_parser_) {
1615 can_parse_asynchronously_ = false;
1616 token_ = std::make_unique<HTMLToken>();
1617 tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1618 DecodedDataDocumentParser::Flush();
1619 return;
1620 }
1621
1622 loading_task_runner_->PostTask(
1623 FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_));
1624 } else {
1625 DecodedDataDocumentParser::Flush();
1626 }
1627 }
1628
SetDecoder(std::unique_ptr<TextResourceDecoder> decoder)1629 void HTMLDocumentParser::SetDecoder(
1630 std::unique_ptr<TextResourceDecoder> decoder) {
1631 DCHECK(decoder);
1632 DecodedDataDocumentParser::SetDecoder(std::move(decoder));
1633
1634 if (have_background_parser_) {
1635 loading_task_runner_->PostTask(
1636 FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder,
1637 background_parser_, WTF::Passed(TakeDecoder())));
1638 }
1639 }
1640
DocumentElementAvailable()1641 void HTMLDocumentParser::DocumentElementAvailable() {
1642 TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
1643 Document* document = GetDocument();
1644 DCHECK(document);
1645 DCHECK(document->documentElement());
1646 Element* documentElement = GetDocument()->documentElement();
1647 if (documentElement->hasAttribute(u"\u26A1") ||
1648 documentElement->hasAttribute("amp") ||
1649 documentElement->hasAttribute("i-amphtml-layout")) {
1650 // The DocumentLoader fetches a main resource and handles the result.
1651 // But it may not be available if JavaScript appends HTML to the page later
1652 // in the page's lifetime. This can happen both from in-page JavaScript and
1653 // from extensions. See example callstacks linked from crbug.com/931330.
1654 if (document->Loader()) {
1655 document->Loader()->DidObserveLoadingBehavior(
1656 kLoadingBehaviorAmpDocumentLoaded);
1657 }
1658 }
1659 if (preloader_)
1660 FetchQueuedPreloads();
1661 }
1662
CreatePreloadScanner(TokenPreloadScanner::ScannerType scanner_type)1663 std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
1664 TokenPreloadScanner::ScannerType scanner_type) {
1665 return std::make_unique<HTMLPreloadScanner>(
1666 options_, GetDocument()->Url(),
1667 std::make_unique<CachedDocumentParameters>(GetDocument()),
1668 MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type);
1669 }
1670
ScanAndPreload(HTMLPreloadScanner * scanner)1671 void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
1672 TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
1673 DCHECK(preloader_);
1674 bool seen_csp_meta_tag = false;
1675 PreloadRequestStream requests = scanner->Scan(
1676 GetDocument()->ValidBaseElementURL(), nullptr, seen_csp_meta_tag);
1677 task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag);
1678 for (auto& request : requests) {
1679 queued_preloads_.push_back(std::move(request));
1680 }
1681 FetchQueuedPreloads();
1682 }
1683
FetchQueuedPreloads()1684 void HTMLDocumentParser::FetchQueuedPreloads() {
1685 DCHECK(preloader_);
1686 TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");
1687
1688 if (CanParseAsynchronously()) {
1689 if (pending_csp_meta_token_ || !GetDocument()->documentElement())
1690 return;
1691 }
1692
1693 if (!queued_preloads_.IsEmpty())
1694 preloader_->TakeAndPreload(queued_preloads_);
1695 }
1696
1697 } // namespace blink
1698