1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
27 
28 #include <memory>
29 #include <utility>
30 
31 #include "base/auto_reset.h"
32 #include "base/numerics/safe_conversions.h"
33 #include "third_party/blink/public/common/features.h"
34 #include "third_party/blink/public/common/loader/loading_behavior_flag.h"
35 #include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h"
36 #include "third_party/blink/public/platform/platform.h"
37 #include "third_party/blink/public/platform/task_type.h"
38 #include "third_party/blink/renderer/core/css/media_values_cached.h"
39 #include "third_party/blink/renderer/core/css/style_engine.h"
40 #include "third_party/blink/renderer/core/dom/document_fragment.h"
41 #include "third_party/blink/renderer/core/dom/element.h"
42 #include "third_party/blink/renderer/core/frame/local_frame.h"
43 #include "third_party/blink/renderer/core/html/html_document.h"
44 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
45 #include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
46 #include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
47 #include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h"
48 #include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
49 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
50 #include "third_party/blink/renderer/core/html/parser/pump_session.h"
51 #include "third_party/blink/renderer/core/html_names.h"
52 #include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
53 #include "third_party/blink/renderer/core/loader/document_loader.h"
54 #include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
55 #include "third_party/blink/renderer/core/loader/preload_helper.h"
56 #include "third_party/blink/renderer/core/probe/core_probes.h"
57 #include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
58 #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
59 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
60 #include "third_party/blink/renderer/platform/heap/handle.h"
61 #include "third_party/blink/renderer/platform/heap/heap.h"
62 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
63 #include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
64 #include "third_party/blink/renderer/platform/runtime_enabled_features.h"
65 #include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
66 #include "third_party/blink/renderer/platform/scheduler/public/thread.h"
67 #include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
68 #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
69 #include "third_party/blink/renderer/platform/wtf/shared_buffer.h"
70 
71 namespace blink {
72 
73 static size_t g_discarded_token_count_for_testing = 0;
74 
ResetDiscardedTokenCountForTesting()75 void ResetDiscardedTokenCountForTesting() {
76   g_discarded_token_count_for_testing = 0;
77 }
78 
GetDiscardedTokenCountForTesting()79 size_t GetDiscardedTokenCountForTesting() {
80   return g_discarded_token_count_for_testing;
81 }
82 
83 // This sets the (default) maximum number of tokens which the foreground HTML
84 // parser should try to process in one go. Lower values generally mean faster
85 // first paints, larger values delay first paint, but make sure it's closer to
86 // the final page. This is the default value to use, if no Finch-provided
87 // value exists.
88 constexpr int kDefaultMaxTokenizationBudget = 250;
89 
90 class EndIfDelayedForbiddenScope;
91 class ShouldCompleteScope;
92 
93 // This class encapsulates the internal state needed for synchronous foreground
94 // HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
95 // tracks what should be done after the pump completes.)
96 class HTMLDocumentParserState
97     : public GarbageCollected<HTMLDocumentParserState> {
98   friend EndIfDelayedForbiddenScope;
99   friend ShouldCompleteScope;
100 
101  public:
102   // Keeps track of whether the parser needs to complete tokenization work,
103   // optionally followed by EndIfDelayed.
104   enum class DeferredParserState {
105     // Indicates that a tokenizer pump has either completed or hasn't been
106     // scheduled.
107     kNotScheduled = 0,  // Enforce ordering in this enum.
108     // Indicates that a tokenizer pump is scheduled and hasn't completed yet.
109     kScheduled = 1,
110     // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
111     kScheduledWithEndIfDelayed = 2
112   };
113 
114   enum class MetaCSPTokenState {
115     // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
116     // defer any preloads until we've added the CSP token to the document and
117     // applied the Content Security Policy.
118     kSeen = 0,
119     // Indicates that there is no meta CSP token in the upcoming chunk.
120     kNotSeen = 1,
121     // Indicates that we've added the CSP token to the document and we can now
122     // fetch preloads.
123     kProcessed = 2,
124     // Indicates that it's too late to apply a Content-Security policy (because
125     // we've exited the header section.)
126     kUnenforceable = 3,
127   };
128 
HTMLDocumentParserState(ParserSynchronizationPolicy mode)129   explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
130       : state_(DeferredParserState::kNotScheduled),
131         meta_csp_state_(MetaCSPTokenState::kNotSeen),
132         mode_(mode),
133         end_if_delayed_forbidden_(0),
134         should_complete_(0) {}
135 
Trace(Visitor * v) const136   void Trace(Visitor* v) const {}
137 
SetState(DeferredParserState state)138   void SetState(DeferredParserState state) {
139     DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
140     state_ = state;
141   }
GetState() const142   DeferredParserState GetState() const { return state_; }
143 
IsScheduled() const144   bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
GetStateAsString() const145   const char* GetStateAsString() const {
146     switch (state_) {
147       case DeferredParserState::kNotScheduled:
148         return "not_scheduled";
149       case DeferredParserState::kScheduled:
150         return "scheduled";
151       case DeferredParserState::kScheduledWithEndIfDelayed:
152         return "scheduled_with_end_if_delayed";
153     }
154   }
155 
ShouldEndIfDelayed() const156   bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
ShouldComplete() const157   bool ShouldComplete() const {
158     return should_complete_ || GetMode() != kAllowDeferredParsing;
159   }
IsSynchronous() const160   bool IsSynchronous() const {
161     return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
162   }
GetMode() const163   ParserSynchronizationPolicy GetMode() const { return mode_; }
164 
SetSeenCSPMetaTag(const bool seen)165   void SetSeenCSPMetaTag(const bool seen) {
166     if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
167       return;
168     if (seen)
169       meta_csp_state_ = MetaCSPTokenState::kSeen;
170     else
171       meta_csp_state_ = MetaCSPTokenState::kNotSeen;
172   }
173 
SetExitedHeader()174   void SetExitedHeader() {
175     meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
176   }
HaveExitedHeader() const177   bool HaveExitedHeader() const {
178     return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
179   }
180 
181  private:
EnterEndIfDelayedForbidden()182   void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
ExitEndIfDelayedForbidden()183   void ExitEndIfDelayedForbidden() {
184     end_if_delayed_forbidden_--;
185     DCHECK_GE(end_if_delayed_forbidden_, 0);
186   }
187 
EnterShouldComplete()188   void EnterShouldComplete() { should_complete_++; }
ExitShouldComplete()189   void ExitShouldComplete() {
190     should_complete_--;
191     DCHECK_GE(should_complete_, 0);
192   }
193 
194   DeferredParserState state_;
195   MetaCSPTokenState meta_csp_state_;
196   ParserSynchronizationPolicy mode_;
197   int end_if_delayed_forbidden_;
198   int should_complete_;
199 };
200 
201 class EndIfDelayedForbiddenScope {
202   STACK_ALLOCATED();
203 
204  public:
EndIfDelayedForbiddenScope(HTMLDocumentParserState * state)205   explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
206       : state_(state) {
207     state_->EnterEndIfDelayedForbidden();
208   }
~EndIfDelayedForbiddenScope()209   ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }
210 
211  private:
212   HTMLDocumentParserState* state_;
213 };
214 
215 class ShouldCompleteScope {
216   STACK_ALLOCATED();
217 
218  public:
ShouldCompleteScope(HTMLDocumentParserState * state)219   explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
220     state_->EnterShouldComplete();
221   }
~ShouldCompleteScope()222   ~ShouldCompleteScope() { state_->ExitShouldComplete(); }
223 
224  private:
225   HTMLDocumentParserState* state_;
226 };
227 
228 // This is a direct transcription of step 4 from:
229 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
TokenizerStateForContextElement(Element * context_element,bool report_errors,const HTMLParserOptions & options)230 static HTMLTokenizer::State TokenizerStateForContextElement(
231     Element* context_element,
232     bool report_errors,
233     const HTMLParserOptions& options) {
234   if (!context_element)
235     return HTMLTokenizer::kDataState;
236 
237   const QualifiedName& context_tag = context_element->TagQName();
238 
239   if (context_tag.Matches(html_names::kTitleTag) ||
240       context_tag.Matches(html_names::kTextareaTag))
241     return HTMLTokenizer::kRCDATAState;
242   if (context_tag.Matches(html_names::kStyleTag) ||
243       context_tag.Matches(html_names::kXmpTag) ||
244       context_tag.Matches(html_names::kIFrameTag) ||
245       context_tag.Matches(html_names::kNoembedTag) ||
246       (context_tag.Matches(html_names::kNoscriptTag) &&
247        options.scripting_flag) ||
248       context_tag.Matches(html_names::kNoframesTag))
249     return report_errors ? HTMLTokenizer::kRAWTEXTState
250                          : HTMLTokenizer::kPLAINTEXTState;
251   if (context_tag.Matches(html_names::kScriptTag))
252     return report_errors ? HTMLTokenizer::kScriptDataState
253                          : HTMLTokenizer::kPLAINTEXTState;
254   if (context_tag.Matches(html_names::kPlaintextTag))
255     return HTMLTokenizer::kPLAINTEXTState;
256   return HTMLTokenizer::kDataState;
257 }
258 
259 class ScopedYieldTimer {
260  public:
261   // This object is created at the start of a block of parsing, and will
262   // report the time since the last block yielded if known.
ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer> * timer,HTMLParserMetrics * metrics_reporter)263   ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
264                    HTMLParserMetrics* metrics_reporter)
265       : timer_(timer), reporting_metrics_(metrics_reporter) {
266     if (!reporting_metrics_ || !(*timer_))
267       return;
268 
269     metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
270     timer_->reset();
271   }
272 
273   // The destructor creates a new timer, which will keep track of time until
274   // the next block starts.
~ScopedYieldTimer()275   ~ScopedYieldTimer() {
276     if (reporting_metrics_)
277       *timer_ = std::make_unique<base::ElapsedTimer>();
278   }
279 
280  private:
281   std::unique_ptr<base::ElapsedTimer>* timer_;
282   bool reporting_metrics_;
283 };
284 
HTMLDocumentParser(HTMLDocument & document,ParserSynchronizationPolicy sync_policy)285 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
286                                        ParserSynchronizationPolicy sync_policy)
287     : HTMLDocumentParser(document, kAllowScriptingContent, sync_policy) {
288   script_runner_ =
289       HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);
290 
291   // Allow declarative shadow DOM for the document parser, if not explicitly
292   // disabled.
293   bool allow_shadow_root = document.GetDeclarativeShadowRootAllowState() !=
294                            Document::DeclarativeShadowRootAllowState::kDeny;
295   tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
296       this, document, kAllowScriptingContent, options_, allow_shadow_root);
297 }
298 
HTMLDocumentParser(DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy)299 HTMLDocumentParser::HTMLDocumentParser(
300     DocumentFragment* fragment,
301     Element* context_element,
302     ParserContentPolicy parser_content_policy)
303     : HTMLDocumentParser(fragment->GetDocument(),
304                          parser_content_policy,
305                          kForceSynchronousParsing) {
306   // Allow declarative shadow DOM for the fragment parser only if explicitly
307   // enabled.
308   bool allow_shadow_root =
309       fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
310       Document::DeclarativeShadowRootAllowState::kAllow;
311 
312   // No script_runner_ in fragment parser.
313   tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
314       this, fragment, context_element, parser_content_policy, options_,
315       allow_shadow_root);
316 
317   // For now document fragment parsing never reports errors.
318   bool report_errors = false;
319   tokenizer_->SetState(TokenizerStateForContextElement(
320       context_element, report_errors, options_));
321 }
322 
323 namespace {
GetMaxTokenizationBudget()324 int GetMaxTokenizationBudget() {
325   static int max = base::GetFieldTrialParamByFeatureAsInt(
326       features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget",
327       kDefaultMaxTokenizationBudget);
328   return max;
329 }
330 }  // namespace
331 
HTMLDocumentParser(Document & document,ParserContentPolicy content_policy,ParserSynchronizationPolicy sync_policy)332 HTMLDocumentParser::HTMLDocumentParser(Document& document,
333                                        ParserContentPolicy content_policy,
334                                        ParserSynchronizationPolicy sync_policy)
335     : ScriptableDocumentParser(document, content_policy),
336       options_(&document),
337       reentry_permit_(HTMLParserReentryPermit::Create()),
338       token_(sync_policy != kAllowAsynchronousParsing
339                  ? std::make_unique<HTMLToken>()
340                  : nullptr),
341       tokenizer_(sync_policy != kAllowAsynchronousParsing
342                      ? std::make_unique<HTMLTokenizer>(options_)
343                      : nullptr),
344       loading_task_runner_(sync_policy == kForceSynchronousParsing
345                                ? nullptr
346                                : document.GetTaskRunner(TaskType::kNetworking)),
347       parser_scheduler_(sync_policy == kAllowAsynchronousParsing
348                             ? MakeGarbageCollected<HTMLParserScheduler>(
349                                   this,
350                                   loading_task_runner_.get())
351                             : nullptr),
352       task_runner_state_(
353           MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
354       pending_csp_meta_token_(nullptr),
355       can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing),
356       end_was_delayed_(false),
357       have_background_parser_(false),
358       pump_session_nesting_level_(0),
359       pump_speculations_session_nesting_level_(0),
360       is_parsing_at_line_number_(false),
361       tried_loading_link_headers_(false),
362       added_pending_parser_blocking_stylesheet_(false),
363       is_waiting_for_stylesheets_(false),
364       scheduler_(sync_policy == kAllowDeferredParsing
365                      ? Thread::Current()->Scheduler()
366                      : nullptr) {
367   DCHECK(CanParseAsynchronously() || (token_ && tokenizer_));
368   // Asynchronous parsing is not allowed in prefetch mode.
369   DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously());
370 
371   // It is permissible to request the background HTML parser whilst also using
372   // --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually
373   // unintentional. To help flush out these cases, trigger a DCHECK.
374   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() ||
375          !CanParseAsynchronously());
376 
377   // Report metrics for async document parsing only. The document
378   // must be main frame to meet UKM requirements, and must have a high
379   // resolution clock for high quality data.
380   if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() &&
381       document.GetFrame()->IsMainFrame() &&
382       base::TimeTicks::IsHighResolution()) {
383     metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
384         document.UkmSourceID(), document.UkmRecorder());
385   }
386 
387   max_tokenization_budget_ = GetMaxTokenizationBudget();
388 
389   // Don't create preloader for parsing clipboard content.
390   if (content_policy == kDisallowScriptingAndPluginContent)
391     return;
392 
393   // Create preloader only when the document is:
394   // - attached to a frame (likely the prefetched resources will be loaded
395   // soon),
396   // - a HTML import document (blocks rendering and also resources will be
397   // loaded soon), or
398   // - is for no-state prefetch (made specifically for running preloader).
399   if (!document.GetFrame() && !document.IsHTMLImport() &&
400       !document.IsPrefetchOnly())
401     return;
402 
403   preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
404 }
405 
406 HTMLDocumentParser::~HTMLDocumentParser() = default;
407 
Dispose()408 void HTMLDocumentParser::Dispose() {
409   // In Oilpan, HTMLDocumentParser can die together with Document, and detach()
410   // is not called in this case.
411   if (have_background_parser_)
412     StopBackgroundParser();
413 }
414 
Trace(Visitor * visitor) const415 void HTMLDocumentParser::Trace(Visitor* visitor) const {
416   visitor->Trace(tree_builder_);
417   visitor->Trace(parser_scheduler_);
418   visitor->Trace(script_runner_);
419   visitor->Trace(preloader_);
420   visitor->Trace(task_runner_state_);
421   ScriptableDocumentParser::Trace(visitor);
422   HTMLParserScriptRunnerHost::Trace(visitor);
423 }
424 
HasPendingWorkScheduledForTesting() const425 bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
426   return task_runner_state_->IsScheduled();
427 }
428 
Detach()429 void HTMLDocumentParser::Detach() {
430   if (have_background_parser_)
431     StopBackgroundParser();
432   // Deschedule any pending tokenizer pumps.
433   task_runner_state_->SetState(
434       HTMLDocumentParserState::DeferredParserState::kNotScheduled);
435   DocumentParser::Detach();
436   if (script_runner_)
437     script_runner_->Detach();
438   tree_builder_->Detach();
439   // FIXME: It seems wrong that we would have a preload scanner here. Yet during
440   // fast/dom/HTMLScriptElement/script-load-events.html we do.
441   preload_scanner_.reset();
442   insertion_preload_scanner_.reset();
443   if (parser_scheduler_) {
444     parser_scheduler_->Detach();
445     parser_scheduler_.Clear();
446   }
447   // Oilpan: It is important to clear token_ to deallocate backing memory of
448   // HTMLToken::data_ and let the allocator reuse the memory for
449   // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
450   // tokenizer_ first because tokenizer_ has a raw pointer to token_.
451   tokenizer_.reset();
452   token_.reset();
453 }
454 
StopParsing()455 void HTMLDocumentParser::StopParsing() {
456   DocumentParser::StopParsing();
457   if (parser_scheduler_) {
458     parser_scheduler_->Detach();
459     parser_scheduler_.Clear();
460   }
461   task_runner_state_->SetState(
462       HTMLDocumentParserState::DeferredParserState::kNotScheduled);
463   if (have_background_parser_)
464     StopBackgroundParser();
465 }
466 
467 // This kicks off "Once the user agent stops parsing" as described by:
468 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
PrepareToStopParsing()469 void HTMLDocumentParser::PrepareToStopParsing() {
470   TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
471                (void*)this);
472   // FIXME: It may not be correct to disable this for the background parser.
473   // That means hasInsertionPoint() may not be correct in some cases.
474   DCHECK(!HasInsertionPoint() || have_background_parser_);
475 
476   // NOTE: This pump should only ever emit buffered character tokens.
477   if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) {
478     DCHECK(!have_background_parser_);
479     ShouldCompleteScope should_complete(task_runner_state_);
480     EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
481     PumpTokenizerIfPossible();
482   }
483 
484   if (IsStopped())
485     return;
486 
487   DocumentParser::PrepareToStopParsing();
488 
489   // We will not have a scriptRunner when parsing a DocumentFragment.
490   if (script_runner_)
491     GetDocument()->SetReadyState(Document::kInteractive);
492 
493   // Setting the ready state above can fire mutation event and detach us from
494   // underneath. In that case, just bail out.
495   if (IsDetached())
496     return;
497 
498   if (script_runner_)
499     script_runner_->RecordMetricsAtParseEnd();
500 
501   AttemptToRunDeferredScriptsAndEnd();
502 }
503 
IsParsingFragment() const504 bool HTMLDocumentParser::IsParsingFragment() const {
505   return tree_builder_->IsParsingFragment();
506 }
507 
DeferredPumpTokenizerIfPossible()508 void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
509   // This method is called asynchronously, continues building the HTML document.
510   // This function should only be called when
511   // --enable-blink-features=ForceSynchronousHTMLParsing is available.
512   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
513   // If we're scheduled for a tokenizer pump, then document should be attached
514   // and the parser should not be stopped, but sometimes a script completes
515   // loading (so we schedule a pump) but the Document is stopped in the meantime
516   // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
517   DCHECK(task_runner_state_->GetState() ==
518              HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
519          !IsDetached());
520   TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
521                "parser", (void*)this, "state",
522                task_runner_state_->GetStateAsString());
523   bool should_call_delay_end =
524       task_runner_state_->GetState() ==
525       HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
526   if (task_runner_state_->IsScheduled()) {
527     task_runner_state_->SetState(
528         HTMLDocumentParserState::DeferredParserState::kNotScheduled);
529     if (should_call_delay_end) {
530       EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
531       PumpTokenizerIfPossible();
532       EndIfDelayed();
533     } else {
534       PumpTokenizerIfPossible();
535     }
536   }
537 }
538 
PumpTokenizerIfPossible()539 void HTMLDocumentParser::PumpTokenizerIfPossible() {
540   // This method is called synchronously, builds the HTML document up to
541   // the current budget, and optionally completes.
542   TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
543                (void*)this);
544 
545   bool yielded = false;
546   CheckIfBlockingStylesheetAdded();
547   if (!IsStopped() &&
548       (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
549     yielded = PumpTokenizer();
550   }
551 
552   if (yielded) {
553     DCHECK(!task_runner_state_->ShouldComplete());
554     SchedulePumpTokenizer();
555   } else if (task_runner_state_->ShouldEndIfDelayed()) {
556     // If we did not exceed the budget or parsed everything there was to
557     // parse, check if we should complete the document.
558     if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
559       EndIfDelayed();
560     } else {
561       ScheduleEndIfDelayed();
562     }
563   }
564 }
565 
IsScheduledForUnpause() const566 bool HTMLDocumentParser::IsScheduledForUnpause() const {
567   return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause();
568 }
569 
570 // Used by HTMLParserScheduler
ResumeParsingAfterYield()571 void HTMLDocumentParser::ResumeParsingAfterYield() {
572   DCHECK(CanParseAsynchronously());
573   DCHECK(have_background_parser_);
574   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
575 
576   ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get());
577 
578   CheckIfBlockingStylesheetAdded();
579   if (IsStopped() || IsPaused())
580     return;
581 
582   PumpPendingSpeculations();
583 }
584 
RunScriptsForPausedTreeBuilder()585 void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
586   TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
587                "parser", (void*)this);
588   DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));
589 
590   TextPosition script_start_position = TextPosition::BelowRangePosition();
591   Element* script_element =
592       tree_builder_->TakeScriptToProcess(script_start_position);
593   // We will not have a scriptRunner when parsing a DocumentFragment.
594   if (script_runner_)
595     script_runner_->ProcessScriptElement(script_element, script_start_position);
596   CheckIfBlockingStylesheetAdded();
597 }
598 
CanTakeNextToken()599 bool HTMLDocumentParser::CanTakeNextToken() {
600   if (IsStopped())
601     return false;
602 
603   // If we're paused waiting for a script, we try to execute scripts before
604   // continuing.
605   if (tree_builder_->HasParserBlockingScript())
606     RunScriptsForPausedTreeBuilder();
607   if (IsStopped() || IsPaused())
608     return false;
609   return true;
610 }
611 
EnqueueTokenizedChunk(std::unique_ptr<TokenizedChunk> chunk)612 void HTMLDocumentParser::EnqueueTokenizedChunk(
613     std::unique_ptr<TokenizedChunk> chunk) {
614   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
615   TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk");
616 
617   DCHECK(chunk);
618   DCHECK(GetDocument());
619 
620   if (!IsParsing())
621     return;
622 
623   // ApplicationCache needs to be initialized before issuing preloads. We
624   // suspend preload until HTMLHTMLElement is inserted and ApplicationCache is
625   // initialized. Note: link rel preloads don't follow this policy per the spec.
626   // These directives should initiate a fetch as fast as possible.
627   if (!tried_loading_link_headers_ && GetDocument()->Loader()) {
628     // Note that on commit, the loader dispatched preloads for all the non-media
629     // links.
630     GetDocument()->Loader()->DispatchLinkHeaderPreloads(
631         base::OptionalOrNullptr(chunk->viewport),
632         PreloadHelper::kOnlyLoadMedia);
633     tried_loading_link_headers_ = true;
634     if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
635       // Link header preloads for prefetched signed exchanges won't be started
636       // until StartPrefetchedLinkHeaderPreloads() is called. See the header
637       // comment of PrefetchedSignedExchangeManager.
638       GetDocument()
639           ->Loader()
640           ->GetPrefetchedSignedExchangeManager()
641           ->StartPrefetchedLinkHeaderPreloads();
642     }
643   }
644 
645   // Defer preloads if any of the chunks contains a <meta> csp tag.
646   if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) {
647     pending_csp_meta_token_ =
648         &chunk->tokens.at(chunk->pending_csp_meta_token_index);
649   }
650 
651   if (preloader_) {
652     bool appcache_fetched = false;
653     if (GetDocument()->Loader()) {
654       appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() !=
655                           mojom::blink::kAppCacheNoCacheId);
656     }
657     bool appcache_initialized = GetDocument()->documentElement();
658     // Delay sending some requests if meta tag based CSP is present or
659     // if AppCache was used to fetch the HTML but was not yet initialized for
660     // this document.
661     if (pending_csp_meta_token_ ||
662         ((!base::FeatureList::IsEnabled(
663               blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) ||
664           appcache_fetched) &&
665          !appcache_initialized)) {
666       PreloadRequestStream link_rel_preloads;
667       for (auto& request : chunk->preloads) {
668         // Link rel preloads don't need to wait for AppCache but they
669         // should probably wait for CSP.
670         if (!pending_csp_meta_token_ && request->IsLinkRelPreload())
671           link_rel_preloads.push_back(std::move(request));
672         else
673           queued_preloads_.push_back(std::move(request));
674       }
675       preloader_->TakeAndPreload(link_rel_preloads);
676     } else {
677       // We can safely assume that there are no queued preloads request after
678       // the document element is available, as we empty the queue immediately
679       // after the document element is created in documentElementAvailable().
680       DCHECK(queued_preloads_.IsEmpty());
681       preloader_->TakeAndPreload(chunk->preloads);
682     }
683   }
684 
685   speculations_.push_back(std::move(chunk));
686 
687   if (!IsPaused() && !IsScheduledForUnpause())
688     parser_scheduler_->ScheduleForUnpause();
689 }
690 
DidReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData & data)691 void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser(
692     const DocumentEncodingData& data) {
693   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
694   GetDocument()->SetEncodingData(data);
695 }
696 
ValidateSpeculations(std::unique_ptr<TokenizedChunk> chunk)697 void HTMLDocumentParser::ValidateSpeculations(
698     std::unique_ptr<TokenizedChunk> chunk) {
699   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
700   DCHECK(chunk);
701   // TODO(kouhei): We should simplify codepath here by disallowing
702   // ValidateSpeculations
703   // while IsPaused, and last_chunk_before_pause_ can simply be
704   // pushed to speculations_.
705   if (IsPaused()) {
706     // We're waiting on a network script or stylesheet, just save the chunk,
707     // we'll get a second ValidateSpeculations call after the script or
708     // stylesheet completes. This call should have been made immediately after
709     // RunScriptsForPausedTreeBuilder in the script case which may have started
710     // a network load and left us waiting.
711     DCHECK(!last_chunk_before_pause_);
712     last_chunk_before_pause_ = std::move(chunk);
713     return;
714   }
715 
716   DCHECK(!last_chunk_before_pause_);
717   std::unique_ptr<HTMLTokenizer> tokenizer = std::move(tokenizer_);
718   std::unique_ptr<HTMLToken> token = std::move(token_);
719 
720   if (!tokenizer) {
721     // There must not have been any changes to the HTMLTokenizer state on the
722     // main thread, which means the speculation buffer is correct.
723     return;
724   }
725 
726   // Currently we're only smart enough to reuse the speculation buffer if the
727   // tokenizer both starts and ends in the DataState. That state is simplest
728   // because the HTMLToken is always in the Uninitialized state. We should
729   // consider whether we can reuse the speculation buffer in other states, but
730   // we'd likely need to do something more sophisticated with the HTMLToken.
731   if (chunk->tokenizer_state == HTMLTokenizer::kDataState &&
732       tokenizer->GetState() == HTMLTokenizer::kDataState &&
733       input_.Current().IsEmpty() &&
734       chunk->tree_builder_state ==
735           HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) {
736     DCHECK(token->IsUninitialized());
737     return;
738   }
739 
740   DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token),
741                                    std::move(tokenizer));
742 }
743 
DiscardSpeculationsAndResumeFrom(std::unique_ptr<TokenizedChunk> last_chunk_before_script,std::unique_ptr<HTMLToken> token,std::unique_ptr<HTMLTokenizer> tokenizer)744 void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom(
745     std::unique_ptr<TokenizedChunk> last_chunk_before_script,
746     std::unique_ptr<HTMLToken> token,
747     std::unique_ptr<HTMLTokenizer> tokenizer) {
748   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
749   // Clear back ref.
750   background_parser_->ClearParser();
751 
752   size_t discarded_token_count = 0;
753   for (const auto& speculation : speculations_) {
754     discarded_token_count += speculation->tokens.size();
755   }
756   g_discarded_token_count_for_testing += discarded_token_count;
757 
758   speculations_.clear();
759   pending_csp_meta_token_ = nullptr;
760   queued_preloads_.clear();
761 
762   std::unique_ptr<BackgroundHTMLParser::Checkpoint> checkpoint =
763       std::make_unique<BackgroundHTMLParser::Checkpoint>();
764   checkpoint->parser = this;
765   checkpoint->token = std::move(token);
766   checkpoint->tokenizer = std::move(tokenizer);
767   checkpoint->tree_builder_state =
768       HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get());
769   checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint;
770   checkpoint->preload_scanner_checkpoint =
771       last_chunk_before_script->preload_scanner_checkpoint;
772   checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy();
773   // FIXME: This should be passed in instead of cleared.
774   input_.Current().Clear();
775 
776   DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread());
777   loading_task_runner_->PostTask(
778       FROM_HERE,
779       WTF::Bind(&BackgroundHTMLParser::ResumeFrom, background_parser_,
780                 WTF::Passed(std::move(checkpoint))));
781 }
782 
ProcessTokenizedChunkFromBackgroundParser(std::unique_ptr<TokenizedChunk> pop_chunk,bool * reached_end_of_file)783 size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser(
784     std::unique_ptr<TokenizedChunk> pop_chunk,
785     bool* reached_end_of_file) {
786   TRACE_EVENT_WITH_FLOW0(
787       "blink,loading",
788       "HTMLDocumentParser::processTokenizedChunkFromBackgroundParser",
789       pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN);
790   base::AutoReset<bool> has_line_number(&is_parsing_at_line_number_, true);
791 
792   SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1);
793   SECURITY_DCHECK(!InPumpSession());
794   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
795   DCHECK(!IsParsingFragment());
796   DCHECK(!IsPaused());
797   DCHECK(!IsStopped());
798   DCHECK(CanParseAsynchronously());
799   DCHECK(!tokenizer_);
800   DCHECK(!token_);
801   DCHECK(!last_chunk_before_pause_);
802 
803   std::unique_ptr<TokenizedChunk> chunk(std::move(pop_chunk));
804   const CompactHTMLTokenStream& tokens = chunk->tokens;
805   size_t element_token_count = 0;
806 
807   loading_task_runner_->PostTask(
808       FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint,
809                            background_parser_, chunk->input_checkpoint));
810 
811   for (const auto& token : tokens) {
812     DCHECK(!IsWaitingForScripts());
813 
814     if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag ||
815                                     token.GetType() == HTMLToken::kEndTag))
816       element_token_count++;
817 
818     text_position_ = token.GetTextPosition();
819 
820     ConstructTreeFromCompactHTMLToken(token);
821 
822     if (IsStopped())
823       break;
824 
825     // Preloads were queued if there was a <meta> csp token in a tokenized
826     // chunk.
827     if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) {
828       pending_csp_meta_token_ = nullptr;
829       FetchQueuedPreloads();
830     }
831 
832     if (IsPaused()) {
833       // The script or stylesheet should be the last token of this bunch.
834       DCHECK_EQ(&token, &tokens.back());
835       if (IsWaitingForScripts())
836         RunScriptsForPausedTreeBuilder();
837       ValidateSpeculations(std::move(chunk));
838       break;
839     }
840 
841     if (token.GetType() == HTMLToken::kEndOfFile) {
842       // The EOF is assumed to be the last token of this bunch.
843       DCHECK_EQ(&token, &tokens.back());
844       // There should never be any chunks after the EOF.
845       DCHECK(speculations_.IsEmpty());
846       PrepareToStopParsing();
847       *reached_end_of_file = true;
848       break;
849     }
850 
851     DCHECK(!tokenizer_);
852     DCHECK(!token_);
853   }
854 
855   // Make sure all required pending text nodes are emitted before returning.
856   // This leaves "script", "style" and "svg" nodes text nodes intact.
857   if (!IsStopped())
858     tree_builder_->Flush(kFlushIfAtTextLimit);
859 
860   is_parsing_at_line_number_ = false;
861 
862   return element_token_count;
863 }
864 
PumpPendingSpeculations()865 void HTMLDocumentParser::PumpPendingSpeculations() {
866   // If this assert fails, you need to call ValidateSpeculations to make sure
867   // tokenizer_ and token_ don't have state that invalidates speculations_.
868   DCHECK(!tokenizer_);
869   DCHECK(!token_);
870   DCHECK(!last_chunk_before_pause_);
871   DCHECK(!IsPaused());
872   DCHECK(!IsStopped());
873   DCHECK(!IsScheduledForUnpause());
874   DCHECK(!InPumpSession());
875   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
876 
877   // FIXME: Here should never be reached when there is a blocking script,
878   // but it happens in unknown scenarios. See https://crbug.com/440901
879   if (IsWaitingForScripts()) {
880     parser_scheduler_->ScheduleForUnpause();
881     return;
882   }
883 
884   // Do not allow pumping speculations in nested event loops.
885   if (pump_speculations_session_nesting_level_) {
886     parser_scheduler_->ScheduleForUnpause();
887     return;
888   }
889 
890   probe::ParseHTML probe(GetDocument(), this);
891 
892   SpeculationsPumpSession session(pump_speculations_session_nesting_level_);
893   bool reached_end_of_file = false;
894   while (!speculations_.IsEmpty()) {
895     DCHECK(!IsScheduledForUnpause());
896     size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser(
897         speculations_.TakeFirst(), &reached_end_of_file);
898     session.AddedElementTokens(element_token_count);
899 
900     // Always check IsParsing first as document_ may be null. Surprisingly,
901     // IsScheduledForUnpause() may be set here as a result of
902     // ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript
903     // which invokes nested event loops. (e.g. inspector breakpoints)
904     CheckIfBlockingStylesheetAdded();
905     if (!IsParsing() || IsPaused() || IsScheduledForUnpause())
906       break;
907 
908     if (speculations_.IsEmpty() ||
909         parser_scheduler_->YieldIfNeeded(
910             session, speculations_.front()->starting_script))
911       break;
912   }
913 
914   if (metrics_reporter_) {
915     metrics_reporter_->AddChunk(session.ElapsedTime(),
916                                 session.ProcessedElementTokens());
917     if (reached_end_of_file)
918       metrics_reporter_->ReportMetricsAtParseEnd();
919   }
920 }
921 
ForcePlaintextForTextDocument()922 void HTMLDocumentParser::ForcePlaintextForTextDocument() {
923   if (CanParseAsynchronously()) {
924     // This method is called before any data is appended, so we have to start
925     // the background parser ourselves.
926     if (!have_background_parser_)
927       StartBackgroundParser();
928 
929     // This task should be synchronous, because otherwise synchronous
930     // tokenizing can happen before plaintext is forced.
931     background_parser_->ForcePlaintextForTextDocument();
932   } else
933     tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
934 }
935 
PumpTokenizer()936 bool HTMLDocumentParser::PumpTokenizer() {
937   DCHECK(!GetDocument()->IsPrefetchOnly());
938   DCHECK(!IsStopped());
939   DCHECK(tokenizer_);
940   DCHECK(token_);
941 
942   PumpSession session(pump_session_nesting_level_);
943 
944   // If we're in kForceSynchronousParsing, always run until all available input
945   // is consumed.
946   bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
947                                      task_runner_state_->IsSynchronous() ||
948                                      pump_session_nesting_level_ > 1;
949   TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete",
950                should_run_until_completion, "parser", (void*)this);
951 
952   // We tell the InspectorInstrumentation about every pump, even if we end up
953   // pumping nothing.  It can filter out empty pumps itself.
954   // FIXME: input_.Current().length() is only accurate if we end up parsing the
955   // whole buffer in this pump.  We should pass how much we parsed as part of
956   // DidWriteHTML instead of WillWriteHTML.
957   probe::ParseHTML probe(GetDocument(), this);
958 
959   bool should_yield = false;
960   int budget = max_tokenization_budget_;
961 
962   while (CanTakeNextToken() && !should_yield) {
963     {
964       RUNTIME_CALL_TIMER_SCOPE(
965           V8PerIsolateData::MainThreadIsolate(),
966           RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
967       if (!tokenizer_->NextToken(input_.Current(), Token()))
968         break;
969       budget--;
970     }
971     ConstructTreeFromHTMLToken();
972     if (!should_run_until_completion && !IsPaused()) {
973       DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
974       should_yield = budget <= 0;
975       should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
976       should_yield &= task_runner_state_->HaveExitedHeader();
977     } else {
978       should_yield = false;
979     }
980     DCHECK(IsStopped() || Token().IsUninitialized());
981   }
982 
983   if (IsStopped())
984     return false;
985 
986   // There should only be PendingText left since the tree-builder always flushes
987   // the task queue before returning. In case that ever changes, crash.
988   tree_builder_->Flush(kFlushAlways);
989   CHECK(!IsStopped());
990 
991   if (IsPaused()) {
992     DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);
993 
994     if (preloader_) {
995       if (!preload_scanner_) {
996         preload_scanner_ = CreatePreloadScanner(
997             TokenPreloadScanner::ScannerType::kMainDocument);
998         preload_scanner_->AppendToEnd(input_.Current());
999       }
1000       ScanAndPreload(preload_scanner_.get());
1001     }
1002   }
1003 
1004   // should_run_until_completion implies that we should not yield
1005   CHECK(!should_run_until_completion || !should_yield);
1006   return should_yield;
1007 }
1008 
SchedulePumpTokenizer()1009 void HTMLDocumentParser::SchedulePumpTokenizer() {
1010   TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
1011   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1012   DCHECK(!IsStopped());
1013   DCHECK(!InPumpSession());
1014   DCHECK(!task_runner_state_->ShouldComplete());
1015   if (task_runner_state_->IsScheduled()) {
1016     // If the parser is already scheduled, there's no need to do anything.
1017     return;
1018   }
1019   loading_task_runner_->PostTask(
1020       FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
1021                            WrapPersistent(this)));
1022   task_runner_state_->SetState(
1023       HTMLDocumentParserState::DeferredParserState::kScheduled);
1024 }
1025 
ScheduleEndIfDelayed()1026 void HTMLDocumentParser::ScheduleEndIfDelayed() {
1027   TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
1028   DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1029   DCHECK(!IsStopped());
1030   DCHECK(!InPumpSession());
1031   DCHECK(!task_runner_state_->ShouldComplete());
1032 
1033   // Schedule a pump callback if needed.
1034   if (!task_runner_state_->IsScheduled()) {
1035     loading_task_runner_->PostTask(
1036         FROM_HERE,
1037         WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
1038                   WrapPersistent(this)));
1039   }
1040   // If a pump is already scheduled, it's OK to just upgrade it to one
1041   // which calls EndIfDelayed afterwards.
1042   task_runner_state_->SetState(
1043       HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
1044 }
1045 
ConstructTreeFromHTMLToken()1046 void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
1047   DCHECK(!GetDocument()->IsPrefetchOnly());
1048 
1049   AtomicHTMLToken atomic_token(Token());
1050 
1051   // Check whether we've exited the header.
1052   if (!task_runner_state_->HaveExitedHeader()) {
1053     if (GetDocument()->body()) {
1054       task_runner_state_->SetExitedHeader();
1055     }
1056   }
1057 
1058   // We clear the token_ in case ConstructTreeFromAtomicToken
1059   // synchronously re-enters the parser. We don't clear the token immedately
1060   // for kCharacter tokens because the AtomicHTMLToken avoids copying the
1061   // characters by keeping a pointer to the underlying buffer in the
1062   // HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter
1063   // the parser.
1064   //
1065   // FIXME: Stop clearing the token_ once we start running the parser off
1066   // the main thread or once we stop allowing synchronous JavaScript
1067   // execution from ParseAttribute.
1068   if (Token().GetType() != HTMLToken::kCharacter)
1069     Token().Clear();
1070 
1071   tree_builder_->ConstructTree(&atomic_token);
1072   CheckIfBlockingStylesheetAdded();
1073 
1074   // FIXME: ConstructTree may synchronously cause Document to be detached.
1075   if (!token_)
1076     return;
1077 
1078   if (!Token().IsUninitialized()) {
1079     DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter);
1080     Token().Clear();
1081   }
1082 }
1083 
ConstructTreeFromCompactHTMLToken(const CompactHTMLToken & compact_token)1084 void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken(
1085     const CompactHTMLToken& compact_token) {
1086   DCHECK(!GetDocument()->IsPrefetchOnly());
1087   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1088   AtomicHTMLToken token(compact_token);
1089   tree_builder_->ConstructTree(&token);
1090   CheckIfBlockingStylesheetAdded();
1091 }
1092 
HasInsertionPoint()1093 bool HTMLDocumentParser::HasInsertionPoint() {
1094   // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
1095   // model of the EOF character differs slightly from the one in the spec
1096   // because our treatment is uniform between network-sourced and script-sourced
1097   // input streams whereas the spec treats them differently.
1098   return input_.HasInsertionPoint() ||
1099          (WasCreatedByScript() && !input_.HaveSeenEndOfFile());
1100 }
1101 
insert(const String & source)1102 void HTMLDocumentParser::insert(const String& source) {
1103   if (IsStopped())
1104     return;
1105 
1106   TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
1107                source.length(), "parser", (void*)this);
1108 
1109   if (!tokenizer_) {
1110     DCHECK(!InPumpSession());
1111     DCHECK(have_background_parser_ || WasCreatedByScript());
1112     token_ = std::make_unique<HTMLToken>();
1113     tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1114   }
1115 
1116   SegmentedString excluded_line_number_source(source);
1117   excluded_line_number_source.SetExcludeLineNumbers();
1118   input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);
1119 
1120   // Pump the the tokenizer to build the document from the given insert point.
1121   // Should process everything available and not defer anything.
1122   ShouldCompleteScope should_complete(task_runner_state_);
1123   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1124   // Call EndIfDelayed manually at the end to maintain preload behaviour.
1125   PumpTokenizerIfPossible();
1126 
1127   if (IsPaused()) {
1128     // Check the document.write() output with a separate preload scanner as
1129     // the main scanner can't deal with insertions.
1130     if (!insertion_preload_scanner_) {
1131       insertion_preload_scanner_ =
1132           CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
1133     }
1134     insertion_preload_scanner_->AppendToEnd(source);
1135     if (preloader_) {
1136       ScanAndPreload(insertion_preload_scanner_.get());
1137     }
1138   }
1139   EndIfDelayed();
1140 }
1141 
StartBackgroundParser()1142 void HTMLDocumentParser::StartBackgroundParser() {
1143   TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser");
1144   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1145   DCHECK(!IsStopped());
1146   DCHECK(CanParseAsynchronously());
1147   DCHECK(!have_background_parser_);
1148   DCHECK(GetDocument());
1149   have_background_parser_ = true;
1150 
1151   // Make sure that the viewport is up-to-date, so that the correct viewport
1152   // dimensions will be fed to the background parser and preload scanner.
1153   if (GetDocument()->Loader())
1154     GetDocument()->GetStyleEngine().UpdateViewport();
1155 
1156   std::unique_ptr<BackgroundHTMLParser::Configuration> config =
1157       std::make_unique<BackgroundHTMLParser::Configuration>();
1158   config->options = options_;
1159   config->parser = this;
1160   config->decoder = TakeDecoder();
1161 
1162   // The background parser is created on the main thread, but may otherwise
1163   // only be used from the parser thread.
1164   background_parser_ =
1165       BackgroundHTMLParser::Create(std::move(config), loading_task_runner_);
1166   // TODO(csharrison): This is a hack to initialize MediaValuesCached on the
1167   // correct thread. We should get rid of it.
1168 
1169   // TODO(domfarolino): Remove this once Priority Hints is no longer in Origin
1170   // Trial. This currently exists because the TokenPreloadScanner needs to know
1171   // the status of the Priority Hints Origin Trial, and has no way of figuring
1172   // this out on its own. See https://crbug.com/821464.
1173   bool priority_hints_origin_trial_enabled =
1174       RuntimeEnabledFeatures::PriorityHintsEnabled(
1175           GetDocument()->GetExecutionContext());
1176 
1177   background_parser_->Init(
1178       GetDocument()->Url(),
1179       std::make_unique<CachedDocumentParameters>(GetDocument()),
1180       MediaValuesCached::MediaValuesCachedData(*GetDocument()),
1181       priority_hints_origin_trial_enabled);
1182 }
1183 
StopBackgroundParser()1184 void HTMLDocumentParser::StopBackgroundParser() {
1185   DCHECK(CanParseAsynchronously());
1186   DCHECK(have_background_parser_);
1187   DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
1188 
1189   have_background_parser_ = false;
1190 
1191   // Make this sync, as lsan triggers on some unittests if the task runner is
1192   // used.
1193   background_parser_->Stop();
1194 }
1195 
Append(const String & input_source)1196 void HTMLDocumentParser::Append(const String& input_source) {
1197   TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
1198                input_source.length(), "parser", (void*)this);
1199 
1200   if (IsStopped())
1201     return;
1202 
1203   // We should never reach this point if we're using a parser thread, as
1204   // appendBytes() will directly ship the data to the thread.
1205   DCHECK(!CanParseAsynchronously());
1206 
1207   const SegmentedString source(input_source);
1208 
1209   if (!preload_scanner_ && GetDocument()->Url().IsValid() &&
1210       (!task_runner_state_->IsSynchronous() ||
1211        GetDocument()->IsPrefetchOnly() || IsPaused())) {
1212     // If we're operating with synchronous, budgeted foreground HTML parsing
1213     // or using the background parser, need to create a preload scanner to
1214     // make sure that parser-blocking Javascript requests are dispatched in
1215     // plenty of time, which prevents unnecessary delays.
1216     // When parsing without a budget (e.g. for HTML fragment parsing), it's
1217     // additional overhead to scan the string unless the parser's already
1218     // paused whilst executing a script.
1219     preload_scanner_ =
1220         CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
1221   }
1222 
1223   if (GetDocument()->IsPrefetchOnly()) {
1224     // Do not prefetch if there is an appcache.
1225     if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0)
1226       return;
1227 
1228     preload_scanner_->AppendToEnd(source);
1229     ScanAndPreload(preload_scanner_.get());
1230 
1231     // Return after the preload scanner, do not actually parse the document.
1232     return;
1233   }
1234   if (preload_scanner_) {
1235     if (input_.Current().IsEmpty() && !IsPaused()) {
1236       // We have parsed until the end of the current input and so are now
1237       // moving ahead of the preload scanner. Clear the scanner so we know to
1238       // scan starting from the current input point if we block again.
1239       preload_scanner_.reset();
1240     } else {
1241       preload_scanner_->AppendToEnd(source);
1242       if (preloader_) {
1243         if (!task_runner_state_->IsSynchronous() || IsPaused()) {
1244           // Should scan and preload if the parser's paused and operating
1245           // synchronously, or if the parser's operating in an asynchronous
1246           // mode.
1247           ScanAndPreload(preload_scanner_.get());
1248         }
1249       }
1250     }
1251   }
1252 
1253   input_.AppendToEnd(source);
1254 
1255   if (InPumpSession()) {
1256     // We've gotten data off the network in a nested write. We don't want to
1257     // consume any more of the input stream now.  Do not worry.  We'll consume
1258     // this data in a less-nested write().
1259     return;
1260   }
1261 
1262   // Schedule a tokenizer pump to process this new data.
1263   if (task_runner_state_->GetMode() ==
1264           ParserSynchronizationPolicy::kAllowDeferredParsing &&
1265       !task_runner_state_->ShouldComplete()) {
1266     SchedulePumpTokenizer();
1267   } else {
1268     PumpTokenizerIfPossible();
1269   }
1270 }
1271 
end()1272 void HTMLDocumentParser::end() {
1273   DCHECK(!IsDetached());
1274   DCHECK(!IsScheduledForUnpause());
1275 
1276   if (have_background_parser_)
1277     StopBackgroundParser();
1278 
1279   // Informs the the rest of WebCore that parsing is really finished (and
1280   // deletes this).
1281   tree_builder_->Finished();
1282 
1283   // All preloads should be done.
1284   preloader_ = nullptr;
1285 
1286   DocumentParser::StopParsing();
1287 }
1288 
AttemptToRunDeferredScriptsAndEnd()1289 void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
1290   DCHECK(IsStopping());
1291   // FIXME: It may not be correct to disable this for the background parser.
1292   // That means hasInsertionPoint() may not be correct in some cases.
1293   DCHECK(!HasInsertionPoint() || have_background_parser_);
1294   if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
1295     return;
1296   end();
1297 }
1298 
ShouldDelayEnd() const1299 bool HTMLDocumentParser::ShouldDelayEnd() const {
1300   return InPumpSession() || IsPaused() || IsExecutingScript() ||
1301          task_runner_state_->IsScheduled();
1302 }
1303 
AttemptToEnd()1304 void HTMLDocumentParser::AttemptToEnd() {
1305   // finish() indicates we will not receive any more data. If we are waiting on
1306   // an external script to load, we can't finish parsing quite yet.
1307   TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
1308                (void*)this);
1309 
1310   if (ShouldDelayEnd()) {
1311     end_was_delayed_ = true;
1312     return;
1313   }
1314   PrepareToStopParsing();
1315 }
1316 
EndIfDelayed()1317 void HTMLDocumentParser::EndIfDelayed() {
1318   TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
1319                (void*)this);
1320   ShouldCompleteScope should_complete(task_runner_state_);
1321   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1322   // If we've already been detached, don't bother ending.
1323   if (IsDetached())
1324     return;
1325 
1326   if (!end_was_delayed_ || ShouldDelayEnd())
1327     return;
1328 
1329   end_was_delayed_ = false;
1330   PrepareToStopParsing();
1331 }
1332 
Finish()1333 void HTMLDocumentParser::Finish() {
1334   // FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes
1335   // sense to call any methods on DocumentParser once it's been stopped.
1336   // However, FrameLoader::Stop calls DocumentParser::Finish unconditionally.
1337 
1338   ShouldCompleteScope should_complete(task_runner_state_);
1339   EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
1340   Flush();
1341   if (IsDetached())
1342     return;
1343 
1344   // Empty documents never got an append() call, and thus have never started a
1345   // background parser. In those cases, we ignore CanParseAsynchronously() and
1346   // fall through to the synchronous case.
1347   if (have_background_parser_) {
1348     if (!input_.HaveSeenEndOfFile())
1349       input_.CloseWithoutMarkingEndOfFile();
1350     loading_task_runner_->PostTask(
1351         FROM_HERE,
1352         WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_));
1353     return;
1354   }
1355 
1356   if (!tokenizer_) {
1357     DCHECK(!token_);
1358     // We're finishing before receiving any data. Rather than booting up the
1359     // background parser just to spin it down, we finish parsing synchronously.
1360     token_ = std::make_unique<HTMLToken>();
1361     tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1362   }
1363 
1364   // We're not going to get any more data off the network, so we tell the input
1365   // stream we've reached the end of file. finish() can be called more than
1366   // once, if the first time does not call end().
1367   if (!input_.HaveSeenEndOfFile())
1368     input_.MarkEndOfFile();
1369 
1370   if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
1371     // If there's any deferred work remaining, synchronously pump the tokenizer
1372     // one last time to make sure that everything's added to the document.
1373     PumpTokenizerIfPossible();
1374   }
1375 
1376   AttemptToEnd();
1377 }
1378 
IsExecutingScript() const1379 bool HTMLDocumentParser::IsExecutingScript() const {
1380   if (!script_runner_)
1381     return false;
1382   return script_runner_->IsExecutingScript();
1383 }
1384 
IsParsingAtLineNumber() const1385 bool HTMLDocumentParser::IsParsingAtLineNumber() const {
1386   if (CanParseAsynchronously()) {
1387     return is_parsing_at_line_number_ &&
1388            ScriptableDocumentParser::IsParsingAtLineNumber();
1389   }
1390   return ScriptableDocumentParser::IsParsingAtLineNumber();
1391 }
1392 
LineNumber() const1393 OrdinalNumber HTMLDocumentParser::LineNumber() const {
1394   if (have_background_parser_)
1395     return text_position_.line_;
1396 
1397   return input_.Current().CurrentLine();
1398 }
1399 
GetTextPosition() const1400 TextPosition HTMLDocumentParser::GetTextPosition() const {
1401   if (have_background_parser_)
1402     return text_position_;
1403 
1404   const SegmentedString& current_string = input_.Current();
1405   OrdinalNumber line = current_string.CurrentLine();
1406   OrdinalNumber column = current_string.CurrentColumn();
1407 
1408   return TextPosition(line, column);
1409 }
1410 
IsWaitingForScripts() const1411 bool HTMLDocumentParser::IsWaitingForScripts() const {
1412   // When the TreeBuilder encounters a </script> tag, it returns to the
1413   // HTMLDocumentParser where the script is transfered from the treebuilder to
1414   // the script runner. The script runner will hold the script until its loaded
1415   // and run. During any of this time, we want to count ourselves as "waiting
1416   // for a script" and thus run the preload scanner, as well as delay completion
1417   // of parsing.
1418   bool tree_builder_has_blocking_script =
1419       tree_builder_->HasParserBlockingScript();
1420   bool script_runner_has_blocking_script =
1421       script_runner_ && script_runner_->HasParserBlockingScript();
1422   // Since the parser is paused while a script runner has a blocking script, it
1423   // should never be possible to end up with both objects holding a blocking
1424   // script.
1425   DCHECK(
1426       !(tree_builder_has_blocking_script && script_runner_has_blocking_script));
1427   // If either object has a blocking script, the parser should be paused.
1428   return tree_builder_has_blocking_script ||
1429          script_runner_has_blocking_script ||
1430          reentry_permit_->ParserPauseFlag();
1431 }
1432 
ResumeParsingAfterPause()1433 void HTMLDocumentParser::ResumeParsingAfterPause() {
1434   // This function runs after a parser-blocking script has completed. There are
1435   // four possible cases:
1436   // 1) Parsing with kForceSynchronousParsing, where there is no background
1437   //    parser and a tokenizer_'s defined.
1438   // 2) Parsing with kAllowAsynchronousParsing, without a background parser. In
1439   //    this case, the document is usually being completed or parsing has
1440   //    otherwise stopped.
1441   // 3) Parsing with kAllowAsynchronousParsing with a background parser. In this
1442   //    case, need to add any pending speculations to the document.
1443   // 4) Parsing with kAllowDeferredParsing, with a tokenizer_.
1444   TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
1445                (void*)this);
1446   DCHECK(!IsExecutingScript());
1447   DCHECK(!IsPaused());
1448 
1449   CheckIfBlockingStylesheetAdded();
1450   if (IsStopped() || IsPaused())
1451     return;
1452 
1453   if (have_background_parser_) {  // Case 3)
1454     // If we paused in the middle of processing a token chunk,
1455     // deal with that before starting to pump.
1456     if (last_chunk_before_pause_) {
1457       ValidateSpeculations(std::move(last_chunk_before_pause_));
1458       DCHECK(!last_chunk_before_pause_);
1459       PumpPendingSpeculations();
1460     } else if (!IsScheduledForUnpause()) {
1461       // Otherwise, start pumping if we're not already scheduled to unpause
1462       // already.
1463       PumpPendingSpeculations();
1464     }
1465     return;
1466   }
1467 
1468   insertion_preload_scanner_.reset();
1469   if (tokenizer_) {
1470     // Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing.
1471     // kForceSynchronousParsing must pump the tokenizer synchronously,
1472     // otherwise it can be deferred.
1473     if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
1474         !task_runner_state_->ShouldComplete() && !InPumpSession()) {
1475       SchedulePumpTokenizer();
1476     } else {
1477       ShouldCompleteScope should_complete(task_runner_state_);
1478       PumpTokenizerIfPossible();
1479     }
1480   } else {
1481     // Case 2): kAllowAsynchronousParsing, no background parser available
1482     // (indicating possible Document shutdown).
1483     EndIfDelayed();
1484   }
1485 }
1486 
AppendCurrentInputStreamToPreloadScannerAndScan()1487 void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
1488   TRACE_EVENT1(
1489       "blink",
1490       "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
1491       "parser", (void*)this);
1492   DCHECK(preload_scanner_);
1493   DCHECK(preloader_);
1494   preload_scanner_->AppendToEnd(input_.Current());
1495   ScanAndPreload(preload_scanner_.get());
1496 }
1497 
NotifyScriptLoaded()1498 void HTMLDocumentParser::NotifyScriptLoaded() {
1499   TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
1500                (void*)this);
1501   DCHECK(script_runner_);
1502   DCHECK(!IsExecutingScript());
1503 
1504   scheduler::CooperativeSchedulingManager::AllowedStackScope
1505       allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());
1506 
1507   if (IsStopped()) {
1508     return;
1509   }
1510 
1511   if (IsStopping()) {
1512     AttemptToRunDeferredScriptsAndEnd();
1513     return;
1514   }
1515 
1516   script_runner_->ExecuteScriptsWaitingForLoad();
1517   if (!IsPaused())
1518     ResumeParsingAfterPause();
1519 }
1520 
ExecuteScriptsWaitingForResources()1521 void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
1522   TRACE_EVENT0("blink",
1523                "HTMLDocumentParser::ExecuteScriptsWaitingForResources");
1524   if (IsStopped())
1525     return;
1526 
1527   DCHECK(GetDocument()->IsScriptExecutionReady());
1528 
1529   if (is_waiting_for_stylesheets_)
1530     is_waiting_for_stylesheets_ = false;
1531 
1532   // Document only calls this when the Document owns the DocumentParser so this
1533   // will not be called in the DocumentFragment case.
1534   DCHECK(script_runner_);
1535   script_runner_->ExecuteScriptsWaitingForResources();
1536   if (!IsPaused())
1537     ResumeParsingAfterPause();
1538 }
1539 
DidAddPendingParserBlockingStylesheet()1540 void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
1541   // In-body CSS doesn't block painting. The parser needs to pause so that
1542   // the DOM doesn't include any elements that may depend on the CSS for style.
1543   // The stylesheet can be added and removed during the parsing of a single
1544   // token so don't actually set the bit to block parsing here, just track
1545   // the state of the added sheet in case it does persist beyond a single
1546   // token.
1547   added_pending_parser_blocking_stylesheet_ = true;
1548 }
1549 
DidLoadAllPendingParserBlockingStylesheets()1550 void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
1551   // Just toggle the stylesheet flag here (mostly for synchronous sheets).
1552   // The document will also call into executeScriptsWaitingForResources
1553   // which is when the parser will re-start, otherwise it will attempt to
1554   // resume twice which could cause state machine issues.
1555   added_pending_parser_blocking_stylesheet_ = false;
1556 }
1557 
CheckIfBlockingStylesheetAdded()1558 void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
1559   if (added_pending_parser_blocking_stylesheet_) {
1560     added_pending_parser_blocking_stylesheet_ = false;
1561     is_waiting_for_stylesheets_ = true;
1562   }
1563 }
1564 
ParseDocumentFragment(const String & source,DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy)1565 void HTMLDocumentParser::ParseDocumentFragment(
1566     const String& source,
1567     DocumentFragment* fragment,
1568     Element* context_element,
1569     ParserContentPolicy parser_content_policy) {
1570   auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
1571       fragment, context_element, parser_content_policy);
1572   parser->Append(source);
1573   parser->Finish();
1574   // Allows ~DocumentParser to assert it was detached before destruction.
1575   parser->Detach();
1576 }
1577 
AppendBytes(const char * data,size_t length)1578 void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
1579   TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
1580                (unsigned)length, "parser", (void*)this);
1581 
1582   DCHECK(Thread::MainThread()->IsCurrentThread());
1583 
1584   if (!length || IsStopped())
1585     return;
1586 
1587   if (CanParseAsynchronously()) {
1588     if (!have_background_parser_)
1589       StartBackgroundParser();
1590 
1591     std::unique_ptr<Vector<char>> buffer =
1592         std::make_unique<Vector<char>>(length);
1593     memcpy(buffer->data(), data, length);
1594 
1595     loading_task_runner_->PostTask(
1596         FROM_HERE,
1597         WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread,
1598                   background_parser_, WTF::Passed(std::move(buffer))));
1599     return;
1600   }
1601 
1602   DecodedDataDocumentParser::AppendBytes(data, length);
1603 }
1604 
Flush()1605 void HTMLDocumentParser::Flush() {
1606   TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
1607   // If we've got no decoder, we never received any data.
1608   if (IsDetached() || NeedsDecoder())
1609     return;
1610 
1611   if (CanParseAsynchronously()) {
1612     // In some cases, flush() is called without any invocation of appendBytes.
1613     // Fallback to synchronous parsing in that case.
1614     if (!have_background_parser_) {
1615       can_parse_asynchronously_ = false;
1616       token_ = std::make_unique<HTMLToken>();
1617       tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
1618       DecodedDataDocumentParser::Flush();
1619       return;
1620     }
1621 
1622     loading_task_runner_->PostTask(
1623         FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_));
1624   } else {
1625     DecodedDataDocumentParser::Flush();
1626   }
1627 }
1628 
SetDecoder(std::unique_ptr<TextResourceDecoder> decoder)1629 void HTMLDocumentParser::SetDecoder(
1630     std::unique_ptr<TextResourceDecoder> decoder) {
1631   DCHECK(decoder);
1632   DecodedDataDocumentParser::SetDecoder(std::move(decoder));
1633 
1634   if (have_background_parser_) {
1635     loading_task_runner_->PostTask(
1636         FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder,
1637                              background_parser_, WTF::Passed(TakeDecoder())));
1638   }
1639 }
1640 
DocumentElementAvailable()1641 void HTMLDocumentParser::DocumentElementAvailable() {
1642   TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
1643   Document* document = GetDocument();
1644   DCHECK(document);
1645   DCHECK(document->documentElement());
1646   Element* documentElement = GetDocument()->documentElement();
1647   if (documentElement->hasAttribute(u"\u26A1") ||
1648       documentElement->hasAttribute("amp") ||
1649       documentElement->hasAttribute("i-amphtml-layout")) {
1650     // The DocumentLoader fetches a main resource and handles the result.
1651     // But it may not be available if JavaScript appends HTML to the page later
1652     // in the page's lifetime. This can happen both from in-page JavaScript and
1653     // from extensions. See example callstacks linked from crbug.com/931330.
1654     if (document->Loader()) {
1655       document->Loader()->DidObserveLoadingBehavior(
1656           kLoadingBehaviorAmpDocumentLoaded);
1657     }
1658   }
1659   if (preloader_)
1660     FetchQueuedPreloads();
1661 }
1662 
CreatePreloadScanner(TokenPreloadScanner::ScannerType scanner_type)1663 std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
1664     TokenPreloadScanner::ScannerType scanner_type) {
1665   return std::make_unique<HTMLPreloadScanner>(
1666       options_, GetDocument()->Url(),
1667       std::make_unique<CachedDocumentParameters>(GetDocument()),
1668       MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type);
1669 }
1670 
ScanAndPreload(HTMLPreloadScanner * scanner)1671 void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
1672   TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
1673   DCHECK(preloader_);
1674   bool seen_csp_meta_tag = false;
1675   PreloadRequestStream requests = scanner->Scan(
1676       GetDocument()->ValidBaseElementURL(), nullptr, seen_csp_meta_tag);
1677   task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag);
1678   for (auto& request : requests) {
1679     queued_preloads_.push_back(std::move(request));
1680   }
1681   FetchQueuedPreloads();
1682 }
1683 
FetchQueuedPreloads()1684 void HTMLDocumentParser::FetchQueuedPreloads() {
1685   DCHECK(preloader_);
1686   TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");
1687 
1688   if (CanParseAsynchronously()) {
1689     if (pending_csp_meta_token_ || !GetDocument()->documentElement())
1690       return;
1691   }
1692 
1693   if (!queued_preloads_.IsEmpty())
1694     preloader_->TakeAndPreload(queued_preloads_);
1695 }
1696 
1697 }  // namespace blink
1698