1 /*
2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_BACKGROUND_HTML_PARSER_H_
27 #define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_BACKGROUND_HTML_PARSER_H_
28 
29 #include <memory>
30 
31 #include "base/macros.h"
32 #include "base/memory/weak_ptr.h"
33 #include "base/optional.h"
34 #include "base/single_thread_task_runner.h"
35 #include "third_party/blink/renderer/core/dom/document_encoding_data.h"
36 #include "third_party/blink/renderer/core/html/parser/background_html_input_stream.h"
37 #include "third_party/blink/renderer/core/html/parser/compact_html_token.h"
38 #include "third_party/blink/renderer/core/html/parser/html_parser_options.h"
39 #include "third_party/blink/renderer/core/html/parser/html_preload_scanner.h"
40 #include "third_party/blink/renderer/core/html/parser/html_source_tracker.h"
41 #include "third_party/blink/renderer/core/html/parser/html_tree_builder_simulator.h"
42 #include "third_party/blink/renderer/core/html/parser/text_resource_decoder.h"
43 #include "third_party/blink/renderer/core/page/viewport_description.h"
44 
45 namespace blink {
46 
47 class HTMLDocumentParser;
48 
49 class BackgroundHTMLParser {
50   USING_FAST_MALLOC(BackgroundHTMLParser);
51 
52  public:
53   struct Configuration {
54     USING_FAST_MALLOC(Configuration);
55 
56    public:
57     Configuration();
58     HTMLParserOptions options;
59     WeakPersistent<HTMLDocumentParser> parser;
60     std::unique_ptr<TextResourceDecoder> decoder;
61   };
62 
63   // The returned BackgroundHTMLParser must first be initialized by calling
64   // init(), and free by calling stop().
65   static base::WeakPtr<BackgroundHTMLParser> Create(
66       std::unique_ptr<Configuration>,
67       scoped_refptr<base::SingleThreadTaskRunner>);
68   void Init(const KURL& document_url,
69             std::unique_ptr<CachedDocumentParameters>,
70             const MediaValuesCached::MediaValuesCachedData&,
71             bool priority_hints_origin_trial_enabled);
72 
73   struct Checkpoint {
74     USING_FAST_MALLOC(Checkpoint);
75 
76    public:
77     WeakPersistent<HTMLDocumentParser> parser;
78     std::unique_ptr<HTMLToken> token;
79     std::unique_ptr<HTMLTokenizer> tokenizer;
80     HTMLTreeBuilderSimulator::State tree_builder_state;
81     HTMLInputCheckpoint input_checkpoint;
82     TokenPreloadScannerCheckpoint preload_scanner_checkpoint;
83     String unparsed_input;
84   };
85 
86   void AppendRawBytesFromMainThread(std::unique_ptr<Vector<char>>);
87   void SetDecoder(std::unique_ptr<TextResourceDecoder>);
88   void Flush();
89   void ResumeFrom(std::unique_ptr<Checkpoint>);
90   void StartedChunkWithCheckpoint(HTMLInputCheckpoint);
91   void Finish();
92   void Stop();
93 
94   void ForcePlaintextForTextDocument();
95 
96   void ClearParser();
97 
98  private:
99   BackgroundHTMLParser(std::unique_ptr<Configuration>,
100                        scoped_refptr<base::SingleThreadTaskRunner>);
101   ~BackgroundHTMLParser();
102 
103   void AppendDecodedBytes(const String&);
104   void MarkEndOfFile();
105   void PumpTokenizer();
106 
107   void EnqueueTokenizedChunk();
108   void UpdateDocument(const String& decoded_data);
109 
110   BackgroundHTMLInputStream input_;
111   HTMLSourceTracker source_tracker_;
112   std::unique_ptr<HTMLToken> token_;
113   std::unique_ptr<HTMLTokenizer> tokenizer_;
114   HTMLTreeBuilderSimulator tree_builder_simulator_;
115   HTMLParserOptions options_;
116   WeakPersistent<HTMLDocumentParser> parser_;
117 
118   CompactHTMLTokenStream pending_tokens_;
119   PreloadRequestStream pending_preloads_;
120   base::Optional<ViewportDescription> viewport_description_;
121   std::unique_ptr<TokenPreloadScanner> preload_scanner_;
122   std::unique_ptr<TextResourceDecoder> decoder_;
123   DocumentEncodingData last_seen_encoding_data_;
124   scoped_refptr<base::SingleThreadTaskRunner> loading_task_runner_;
125 
126   // Index into |pending_tokens_| of the last <meta> csp token found. Will be
127   // |TokenizedChunk::kNoPendingToken| if none have been found.
128   int pending_csp_meta_token_index_;
129 
130   bool starting_script_;
131 
132   base::WeakPtrFactory<BackgroundHTMLParser> weak_factory_{this};
133 
134   DISALLOW_COPY_AND_ASSIGN(BackgroundHTMLParser);
135 };
136 
137 }  // namespace blink
138 
139 #endif
140