1 /*
2  * This file is part of the XSL implementation.
3  *
4  * Copyright (C) 2004, 2005, 2006, 2008, 2012 Apple Inc. All rights reserved.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public License
17  * along with this library; see the file COPYING.LIB.  If not, write to
18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21 
22 #include "third_party/blink/renderer/core/xml/xsl_style_sheet.h"
23 
24 #include <libxml/uri.h>
25 #include <libxslt/xsltutils.h>
26 #include "services/network/public/mojom/fetch_api.mojom-blink.h"
27 #include "third_party/blink/renderer/core/dom/document.h"
28 #include "third_party/blink/renderer/core/dom/node.h"
29 #include "third_party/blink/renderer/core/dom/transform_source.h"
30 #include "third_party/blink/renderer/core/frame/local_frame.h"
31 #include "third_party/blink/renderer/core/loader/resource/xsl_style_sheet_resource.h"
32 #include "third_party/blink/renderer/core/xml/parser/xml_document_parser_scope.h"
33 #include "third_party/blink/renderer/core/xml/parser/xml_parser_input.h"
34 #include "third_party/blink/renderer/core/xml/xslt_processor.h"
35 #include "third_party/blink/renderer/platform/loader/fetch/fetch_initiator_type_names.h"
36 #include "third_party/blink/renderer/platform/loader/fetch/fetch_parameters.h"
37 
38 namespace blink {
39 
XSLStyleSheet(XSLStyleSheet * parent_style_sheet,const String & original_url,const KURL & final_url)40 XSLStyleSheet::XSLStyleSheet(XSLStyleSheet* parent_style_sheet,
41                              const String& original_url,
42                              const KURL& final_url)
43     : owner_node_(nullptr),
44       original_url_(original_url),
45       final_url_(final_url),
46       is_disabled_(false),
47       embedded_(false),
48       // Child sheets get marked as processed when the libxslt engine has
49       // finally seen them.
50       processed_(false),
51       stylesheet_doc_(nullptr),
52       stylesheet_doc_taken_(false),
53       compilation_failed_(false),
54       parent_style_sheet_(parent_style_sheet),
55       owner_document_(nullptr) {
56   DCHECK(RuntimeEnabledFeatures::XSLTEnabled());
57 }
58 
XSLStyleSheet(Node * parent_node,const String & original_url,const KURL & final_url,bool embedded)59 XSLStyleSheet::XSLStyleSheet(Node* parent_node,
60                              const String& original_url,
61                              const KURL& final_url,
62                              bool embedded)
63     : owner_node_(parent_node),
64       original_url_(original_url),
65       final_url_(final_url),
66       is_disabled_(false),
67       embedded_(embedded),
68       processed_(true),  // The root sheet starts off processed.
69       stylesheet_doc_(nullptr),
70       stylesheet_doc_taken_(false),
71       compilation_failed_(false),
72       parent_style_sheet_(nullptr),
73       owner_document_(nullptr) {
74   DCHECK(RuntimeEnabledFeatures::XSLTEnabled());
75 }
76 
XSLStyleSheet(Document * owner_document,Node * style_sheet_root_node,const String & original_url,const KURL & final_url,bool embedded)77 XSLStyleSheet::XSLStyleSheet(Document* owner_document,
78                              Node* style_sheet_root_node,
79                              const String& original_url,
80                              const KURL& final_url,
81                              bool embedded)
82     : owner_node_(style_sheet_root_node),
83       original_url_(original_url),
84       final_url_(final_url),
85       is_disabled_(false),
86       embedded_(embedded),
87       processed_(true),  // The root sheet starts off processed.
88       stylesheet_doc_(nullptr),
89       stylesheet_doc_taken_(false),
90       compilation_failed_(false),
91       parent_style_sheet_(nullptr),
92       owner_document_(owner_document) {
93   DCHECK(RuntimeEnabledFeatures::XSLTEnabled());
94 }
95 
~XSLStyleSheet()96 XSLStyleSheet::~XSLStyleSheet() {
97   if (!stylesheet_doc_taken_)
98     xmlFreeDoc(stylesheet_doc_);
99 }
100 
CheckLoaded()101 void XSLStyleSheet::CheckLoaded() {
102   if (XSLStyleSheet* style_sheet = parentStyleSheet())
103     style_sheet->CheckLoaded();
104   if (ownerNode())
105     ownerNode()->SheetLoaded();
106 }
107 
GetDocument()108 xmlDocPtr XSLStyleSheet::GetDocument() {
109   if (embedded_ && OwnerDocument() && OwnerDocument()->GetTransformSource())
110     return (xmlDocPtr)OwnerDocument()->GetTransformSource()->PlatformSource();
111   return stylesheet_doc_;
112 }
113 
ClearDocuments()114 void XSLStyleSheet::ClearDocuments() {
115   stylesheet_doc_ = nullptr;
116   for (unsigned i = 0; i < children_.size(); ++i)
117     children_.at(i)->ClearDocuments();
118 }
119 
ParseString(const String & source)120 bool XSLStyleSheet::ParseString(const String& source) {
121   // Parse in a single chunk into an xmlDocPtr
122   if (!stylesheet_doc_taken_)
123     xmlFreeDoc(stylesheet_doc_);
124   stylesheet_doc_taken_ = false;
125 
126   FrameConsole* console = nullptr;
127   if (LocalFrame* frame = OwnerDocument()->GetFrame())
128     console = &frame->Console();
129 
130   XMLDocumentParserScope scope(OwnerDocument(), XSLTProcessor::GenericErrorFunc,
131                                XSLTProcessor::ParseErrorFunc, console);
132   XMLParserInput input(source);
133 
134   xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(input.Data(), input.size());
135   if (!ctxt)
136     return 0;
137 
138   if (parent_style_sheet_) {
139     // The XSL transform may leave the newly-transformed document
140     // with references to the symbol dictionaries of the style sheet
141     // and any of its children. XML document disposal can corrupt memory
142     // if a document uses more than one symbol dictionary, so we
143     // ensure that all child stylesheets use the same dictionaries as their
144     // parents.
145     xmlDictFree(ctxt->dict);
146     ctxt->dict = parent_style_sheet_->stylesheet_doc_->dict;
147     xmlDictReference(ctxt->dict);
148   }
149 
150   stylesheet_doc_ =
151       xmlCtxtReadMemory(ctxt, input.Data(), input.size(),
152                         final_url_.GetString().Utf8().c_str(), input.Encoding(),
153                         XML_PARSE_NOENT | XML_PARSE_DTDATTR |
154                             XML_PARSE_NOWARNING | XML_PARSE_NOCDATA);
155 
156   xmlFreeParserCtxt(ctxt);
157   LoadChildSheets();
158   return stylesheet_doc_;
159 }
160 
LoadChildSheets()161 void XSLStyleSheet::LoadChildSheets() {
162   if (!GetDocument())
163     return;
164 
165   xmlNodePtr stylesheet_root = GetDocument()->children;
166 
167   // Top level children may include other things such as DTD nodes, we ignore
168   // those.
169   while (stylesheet_root && stylesheet_root->type != XML_ELEMENT_NODE)
170     stylesheet_root = stylesheet_root->next;
171 
172   if (embedded_) {
173     // We have to locate (by ID) the appropriate embedded stylesheet
174     // element, so that we can walk the import/include list.
175     xmlAttrPtr id_node = xmlGetID(
176         GetDocument(), (const xmlChar*)(final_url_.GetString().Utf8().c_str()));
177     if (!id_node)
178       return;
179     stylesheet_root = id_node->parent;
180   } else {
181     // FIXME: Need to handle an external URI with a # in it. This is a
182     // pretty minor edge case, so we'll deal with it later.
183   }
184 
185   if (stylesheet_root) {
186     // Walk the children of the root element and look for import/include
187     // elements. Imports must occur first.
188     xmlNodePtr curr = stylesheet_root->children;
189     while (curr) {
190       if (curr->type != XML_ELEMENT_NODE) {
191         curr = curr->next;
192         continue;
193       }
194       if (IS_XSLT_ELEM(curr) && IS_XSLT_NAME(curr, "import")) {
195         xmlChar* uri_ref =
196             xsltGetNsProp(curr, (const xmlChar*)"href", XSLT_NAMESPACE);
197         LoadChildSheet(String::FromUTF8((const char*)uri_ref));
198         xmlFree(uri_ref);
199       } else {
200         break;
201       }
202       curr = curr->next;
203     }
204 
205     // Now handle includes.
206     while (curr) {
207       if (curr->type == XML_ELEMENT_NODE && IS_XSLT_ELEM(curr) &&
208           IS_XSLT_NAME(curr, "include")) {
209         xmlChar* uri_ref =
210             xsltGetNsProp(curr, (const xmlChar*)"href", XSLT_NAMESPACE);
211         LoadChildSheet(String::FromUTF8((const char*)uri_ref));
212         xmlFree(uri_ref);
213       }
214       curr = curr->next;
215     }
216   }
217 }
218 
LoadChildSheet(const String & href)219 void XSLStyleSheet::LoadChildSheet(const String& href) {
220   // Use parent styleheet's URL as the base URL
221   KURL url(BaseURL(), href);
222 
223   // Check for a cycle in our import chain. If we encounter a stylesheet in
224   // our parent chain with the same URL, then just bail.
225   for (XSLStyleSheet* parent_sheet = parentStyleSheet(); parent_sheet;
226        parent_sheet = parent_sheet->parentStyleSheet()) {
227     if (url == parent_sheet->BaseURL())
228       return;
229   }
230 
231   const String& url_string = url.GetString();
232   ResourceLoaderOptions fetch_options;
233   fetch_options.initiator_info.name = fetch_initiator_type_names::kXml;
234   FetchParameters params(
235       ResourceRequest(OwnerDocument()->CompleteURL(url_string)), fetch_options);
236   params.MutableResourceRequest().SetMode(
237       network::mojom::RequestMode::kSameOrigin);
238   XSLStyleSheetResource* resource = XSLStyleSheetResource::FetchSynchronously(
239       params, OwnerDocument()->Fetcher());
240   if (!resource->Sheet())
241     return;
242 
243   XSLStyleSheet* style_sheet = MakeGarbageCollected<XSLStyleSheet>(
244       this, url_string, resource->GetResponse().CurrentRequestUrl());
245   children_.push_back(style_sheet);
246   style_sheet->ParseString(resource->Sheet());
247   CheckLoaded();
248 }
249 
CompileStyleSheet()250 xsltStylesheetPtr XSLStyleSheet::CompileStyleSheet() {
251   // FIXME: Hook up error reporting for the stylesheet compilation process.
252   if (embedded_)
253     return xsltLoadStylesheetPI(GetDocument());
254 
255   // Certain libxslt versions are corrupting the xmlDoc on compilation
256   // failures - hence attempting to recompile after a failure is unsafe.
257   if (compilation_failed_)
258     return nullptr;
259 
260   // xsltParseStylesheetDoc makes the document part of the stylesheet
261   // so we have to release our pointer to it.
262   DCHECK(!stylesheet_doc_taken_);
263   xsltStylesheetPtr result = xsltParseStylesheetDoc(stylesheet_doc_);
264   if (result)
265     stylesheet_doc_taken_ = true;
266   else
267     compilation_failed_ = true;
268   return result;
269 }
270 
OwnerDocument()271 Document* XSLStyleSheet::OwnerDocument() {
272   for (XSLStyleSheet* style_sheet = this; style_sheet;
273        style_sheet = style_sheet->parentStyleSheet()) {
274     if (style_sheet->owner_document_)
275       return style_sheet->owner_document_.Get();
276     Node* node = style_sheet->ownerNode();
277     if (node)
278       return &node->GetDocument();
279   }
280   return nullptr;
281 }
282 
LocateStylesheetSubResource(xmlDocPtr parent_doc,const xmlChar * uri)283 xmlDocPtr XSLStyleSheet::LocateStylesheetSubResource(xmlDocPtr parent_doc,
284                                                      const xmlChar* uri) {
285   bool matched_parent = (parent_doc == GetDocument());
286   for (unsigned i = 0; i < children_.size(); ++i) {
287     XSLStyleSheet* child = children_.at(i).Get();
288     if (matched_parent) {
289       if (child->Processed())
290         continue;  // libxslt has been given this sheet already.
291 
292       // Check the URI of the child stylesheet against the doc URI.
293       // In order to ensure that libxml canonicalized both URLs, we get
294       // the original href string from the import rule and canonicalize it
295       // using libxml before comparing it with the URI argument.
296       std::string import_href = child->href().Utf8();
297       xmlChar* base = xmlNodeGetBase(parent_doc, (xmlNodePtr)parent_doc);
298       xmlChar* child_uri =
299           xmlBuildURI((const xmlChar*)import_href.c_str(), base);
300       bool equal_ur_is = xmlStrEqual(uri, child_uri);
301       xmlFree(base);
302       xmlFree(child_uri);
303       if (equal_ur_is) {
304         child->MarkAsProcessed();
305         return child->GetDocument();
306       }
307       continue;
308     }
309     xmlDocPtr result = child->LocateStylesheetSubResource(parent_doc, uri);
310     if (result)
311       return result;
312   }
313 
314   return nullptr;
315 }
316 
MarkAsProcessed()317 void XSLStyleSheet::MarkAsProcessed() {
318   DCHECK(!processed_);
319   DCHECK(!stylesheet_doc_taken_);
320   processed_ = true;
321   stylesheet_doc_taken_ = true;
322 }
323 
Trace(Visitor * visitor)324 void XSLStyleSheet::Trace(Visitor* visitor) {
325   visitor->Trace(owner_node_);
326   visitor->Trace(children_);
327   visitor->Trace(parent_style_sheet_);
328   visitor->Trace(owner_document_);
329   StyleSheet::Trace(visitor);
330 }
331 
332 }  // namespace blink
333