1 /*
2  * Copyright (C) 2010 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24 
25 #include "third_party/blink/renderer/core/html/parser/text_document_parser.h"
26 
27 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
28 #include "third_party/blink/renderer/core/html/parser/parser_synchronization_policy.h"
29 #include "third_party/blink/renderer/core/html_names.h"
30 
31 namespace blink {
32 
TextDocumentParser(HTMLDocument & document,ParserSynchronizationPolicy sync_policy)33 TextDocumentParser::TextDocumentParser(HTMLDocument& document,
34                                        ParserSynchronizationPolicy sync_policy)
35     : HTMLDocumentParser(document, sync_policy),
36       have_inserted_fake_pre_element_(false) {}
37 
38 TextDocumentParser::~TextDocumentParser() = default;
39 
AppendBytes(const char * data,size_t length)40 void TextDocumentParser::AppendBytes(const char* data, size_t length) {
41   if (!length || IsStopped())
42     return;
43 
44   if (!have_inserted_fake_pre_element_)
45     InsertFakePreElement();
46   HTMLDocumentParser::AppendBytes(data, length);
47 }
48 
InsertFakePreElement()49 void TextDocumentParser::InsertFakePreElement() {
50   // In principle, we should create a specialized tree builder for
51   // TextDocuments, but instead we re-use the existing HTMLTreeBuilder. We
52   // create a fake token and give it to the tree builder rather than sending
53   // fake bytes through the front-end of the parser to avoid distrubing the
54   // line/column number calculations.
55   Vector<Attribute> attributes;
56   attributes.push_back(Attribute(
57       html_names::kStyleAttr, "word-wrap: break-word; white-space: pre-wrap;"));
58   AtomicHTMLToken fake_pre(HTMLToken::kStartTag,
59                            html_names::kPreTag.LocalName(), attributes);
60   TreeBuilder()->ConstructTree(&fake_pre);
61 
62   // The document could have been detached by an extension while the
63   // tree was being constructed.
64   if (IsStopped())
65     return;
66 
67   // Normally we would skip the first \n after a <pre> element, but we don't
68   // want to skip the first \n for text documents!
69   TreeBuilder()->SetShouldSkipLeadingNewline(false);
70 
71   // Although Text Documents expose a "pre" element in their DOM, they
72   // act like a <plaintext> tag, so we have to force plaintext mode.
73   ForcePlaintextForTextDocument();
74 
75   have_inserted_fake_pre_element_ = true;
76 }
77 
78 }  // namespace blink
79