1 /*
2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xalan.internal.xsltc.dom;
22 
23 import com.sun.org.apache.xalan.internal.xsltc.DOM;
24 import com.sun.org.apache.xalan.internal.xsltc.DOMCache;
25 import com.sun.org.apache.xalan.internal.xsltc.DOMEnhancedForDTM;
26 import com.sun.org.apache.xalan.internal.xsltc.TransletException;
27 import com.sun.org.apache.xalan.internal.xsltc.compiler.util.ErrorMsg;
28 import com.sun.org.apache.xalan.internal.xsltc.runtime.AbstractTranslet;
29 import com.sun.org.apache.xalan.internal.xsltc.trax.TemplatesImpl;
30 import com.sun.org.apache.xml.internal.dtm.DTM;
31 import com.sun.org.apache.xml.internal.dtm.DTMAxisIterator;
32 import com.sun.org.apache.xml.internal.dtm.DTMManager;
33 import com.sun.org.apache.xml.internal.dtm.ref.EmptyIterator;
34 import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
35 import java.io.FileNotFoundException;
36 import javax.xml.transform.stream.StreamSource;
37 import jdk.xml.internal.JdkConstants;
38 import jdk.xml.internal.SecuritySupport;
39 
40 /**
41  * @author Morten Jorgensen
42  * @LastModified: May 2021
43  */
44 public final class LoadDocument {
45 
46     private static final String NAMESPACE_FEATURE =
47        "http://xml.org/sax/features/namespaces";
48 
49     /**
50      * Interprets the arguments passed from the document() function (see
51      * com/sun/org/apache/xalan/internal/xsltc/compiler/DocumentCall.java) and returns an
52      * iterator containing the requested nodes. Builds a union-iterator if
53      * several documents are requested.
54      * 2 arguments arg1 and arg2.  document(Obj, node-set) call
55      */
documentF(Object arg1, DTMAxisIterator arg2, String xslURI, AbstractTranslet translet, DOM dom)56     public static DTMAxisIterator documentF(Object arg1, DTMAxisIterator arg2,
57                             String xslURI, AbstractTranslet translet, DOM dom)
58     throws TransletException {
59         String baseURI = null;
60         final int arg2FirstNode = arg2.next();
61         if (arg2FirstNode == DTMAxisIterator.END) {
62             //  the second argument node-set is empty
63             return EmptyIterator.getInstance();
64         } else {
65             //System.err.println("arg2FirstNode name: "
66             //                   + dom.getNodeName(arg2FirstNode )+"["
67             //                   +Integer.toHexString(arg2FirstNode )+"]");
68             baseURI = dom.getDocumentURI(arg2FirstNode);
69             if (!SystemIDResolver.isAbsoluteURI(baseURI))
70                baseURI = SystemIDResolver.getAbsoluteURIFromRelative(baseURI);
71         }
72 
73         try {
74             if (arg1 instanceof String) {
75                 if (((String)arg1).length() == 0) {
76                     return document(xslURI, "", translet, dom);
77                 } else {
78                     return document((String)arg1, baseURI, translet, dom);
79                 }
80             } else if (arg1 instanceof DTMAxisIterator) {
81                 return document((DTMAxisIterator)arg1, baseURI, translet, dom);
82             } else {
83                 final String err = "document("+arg1.toString()+")";
84                 throw new IllegalArgumentException(err);
85             }
86         } catch (Exception e) {
87             throw new TransletException(e);
88         }
89     }
90     /**
91      * Interprets the arguments passed from the document() function (see
92      * com/sun/org/apache/xalan/internal/xsltc/compiler/DocumentCall.java) and returns an
93      * iterator containing the requested nodes. Builds a union-iterator if
94      * several documents are requested.
95      * 1 arguments arg.  document(Obj) call
96      */
documentF(Object arg, String xslURI, AbstractTranslet translet, DOM dom)97     public static DTMAxisIterator documentF(Object arg, String xslURI,
98                     AbstractTranslet translet, DOM dom)
99     throws TransletException {
100         try {
101             if (arg instanceof String) {
102                 if (xslURI == null )
103                     xslURI = "";
104 
105                 String baseURI = xslURI;
106                 if (!SystemIDResolver.isAbsoluteURI(xslURI))
107                    baseURI = SystemIDResolver.getAbsoluteURIFromRelative(xslURI);
108 
109                 String href = (String)arg;
110                 if (href.length() == 0) {
111                     href = "";
112                     // %OPT% Optimization to cache the stylesheet DOM.
113                     // The stylesheet DOM is built once and cached
114                     // in the Templates object.
115                     TemplatesImpl templates = (TemplatesImpl)translet.getTemplates();
116                     DOM sdom = null;
117                     if (templates != null) {
118                         sdom = templates.getStylesheetDOM();
119                     }
120 
121                     // If the cached dom exists, we need to migrate it
122                     // to the new DTMManager and create a DTMAxisIterator
123                     // for the document.
124                     if (sdom != null) {
125                         return document(sdom, translet, dom);
126                     }
127                     else {
128                         return document(href, baseURI, translet, dom, true);
129                     }
130                 }
131                 else {
132                     return document(href, baseURI, translet, dom);
133                 }
134             } else if (arg instanceof DTMAxisIterator) {
135                 return document((DTMAxisIterator)arg, null, translet, dom);
136             } else {
137                 final String err = "document("+arg.toString()+")";
138                 throw new IllegalArgumentException(err);
139             }
140         } catch (Exception e) {
141             throw new TransletException(e);
142         }
143     }
144 
document(String uri, String base, AbstractTranslet translet, DOM dom)145     private static DTMAxisIterator document(String uri, String base,
146                     AbstractTranslet translet, DOM dom)
147         throws Exception
148     {
149         return document(uri, base, translet, dom, false);
150     }
151 
document(String uri, String base, AbstractTranslet translet, DOM dom, boolean cacheDOM)152     private static DTMAxisIterator document(String uri, String base,
153                     AbstractTranslet translet, DOM dom,
154                     boolean cacheDOM)
155     throws Exception
156     {
157         try {
158         final String originalUri = uri;
159         MultiDOM multiplexer = (MultiDOM)dom;
160 
161         // Prepend URI base to URI (from context)
162         if (base != null && !base.equals("")) {
163             uri = SystemIDResolver.getAbsoluteURI(uri, base);
164         }
165 
166         // Return an empty iterator if the URI is clearly invalid
167         // (to prevent some unncessary MalformedURL exceptions).
168         if (uri == null || uri.equals("")) {
169             return(EmptyIterator.getInstance());
170         }
171 
172         // Check if this DOM has already been added to the multiplexer
173         int mask = multiplexer.getDocumentMask(uri);
174         if (mask != -1) {
175             DOM newDom = ((DOMAdapter)multiplexer.getDOMAdapter(uri))
176                                        .getDOMImpl();
177             if (newDom instanceof DOMEnhancedForDTM) {
178                 return new SingletonIterator(((DOMEnhancedForDTM)newDom)
179                                                                .getDocument(),
180                                              true);
181             }
182         }
183 
184         // Check if we can get the DOM from a DOMCache
185         DOMCache cache = translet.getDOMCache();
186         DOM newdom;
187 
188         mask = multiplexer.nextMask(); // peek
189 
190         if (cache != null) {
191             newdom = cache.retrieveDocument(base, originalUri, translet);
192             if (newdom == null) {
193                 final Exception e = new FileNotFoundException(originalUri);
194                 throw new TransletException(e);
195             }
196         } else {
197             String accessError = SecuritySupport.checkAccess(uri, translet.getAllowedProtocols(), JdkConstants.ACCESS_EXTERNAL_ALL);
198             if (accessError != null) {
199                 ErrorMsg msg = new ErrorMsg(ErrorMsg.ACCESSING_XSLT_TARGET_ERR,
200                         SecuritySupport.sanitizePath(uri), accessError);
201                 throw new Exception(msg.toString());
202             }
203 
204             // Parse the input document and construct DOM object
205             // Trust the DTMManager to pick the right parser and
206             // set up the DOM correctly.
207             XSLTCDTMManager dtmManager = (XSLTCDTMManager)multiplexer
208                                                               .getDTMManager();
209             DOMEnhancedForDTM enhancedDOM =
210                     (DOMEnhancedForDTM) dtmManager.getDTM(new StreamSource(uri),
211                                             false, null, true, false,
212                                             translet.hasIdCall(), cacheDOM);
213             newdom = enhancedDOM;
214 
215             // Cache the stylesheet DOM in the Templates object
216             if (cacheDOM) {
217                 TemplatesImpl templates = (TemplatesImpl)translet.getTemplates();
218                 if (templates != null) {
219                     templates.setStylesheetDOM(enhancedDOM);
220                 }
221             }
222 
223             translet.prepassDocument(enhancedDOM);
224             enhancedDOM.setDocumentURI(uri);
225         }
226 
227         // Wrap the DOM object in a DOM adapter and add to multiplexer
228         final DOMAdapter domAdapter = translet.makeDOMAdapter(newdom);
229         multiplexer.addDOMAdapter(domAdapter);
230 
231         // Create index for any key elements
232         translet.buildKeys(domAdapter, null, null, newdom.getDocument());
233 
234         // Return a singleton iterator containing the root node
235         return new SingletonIterator(newdom.getDocument(), true);
236         } catch (Exception e) {
237             throw e;
238         }
239     }
240 
241 
document(DTMAxisIterator arg1, String baseURI, AbstractTranslet translet, DOM dom)242     private static DTMAxisIterator document(DTMAxisIterator arg1,
243                                             String baseURI,
244                                             AbstractTranslet translet, DOM dom)
245     throws Exception
246     {
247         UnionIterator union = new UnionIterator(dom);
248         int node = DTM.NULL;
249 
250         while ((node = arg1.next()) != DTM.NULL) {
251             String uri = dom.getStringValueX(node);
252             //document(node-set) if true;  document(node-set,node-set) if false
253             if (baseURI  == null) {
254                baseURI = dom.getDocumentURI(node);
255                if (!SystemIDResolver.isAbsoluteURI(baseURI))
256                     baseURI = SystemIDResolver.getAbsoluteURIFromRelative(baseURI);
257             }
258             union.addIterator(document(uri, baseURI, translet, dom));
259         }
260         return(union);
261     }
262 
263     /**
264      * Create a DTMAxisIterator for the newdom. This is currently only
265      * used to create an iterator for the cached stylesheet DOM.
266      *
267      * @param newdom the cached stylesheet DOM
268      * @param translet the translet
269      * @param the main dom (should be a MultiDOM)
270      * @return a DTMAxisIterator from the document root
271      */
document(DOM newdom, AbstractTranslet translet, DOM dom)272     private static DTMAxisIterator document(DOM newdom,
273                                             AbstractTranslet translet,
274                                             DOM dom)
275         throws Exception
276     {
277         DTMManager dtmManager = ((MultiDOM)dom).getDTMManager();
278         // Need to migrate the cached DTM to the new DTMManager
279         if (dtmManager != null && newdom instanceof DTM) {
280             ((DTM)newdom).migrateTo(dtmManager);
281         }
282 
283         translet.prepassDocument(newdom);
284 
285         // Wrap the DOM object in a DOM adapter and add to multiplexer
286         final DOMAdapter domAdapter = translet.makeDOMAdapter(newdom);
287         ((MultiDOM)dom).addDOMAdapter(domAdapter);
288 
289         // Create index for any key elements
290         translet.buildKeys(domAdapter, null, null,
291                            newdom.getDocument());
292 
293         // Return a singleton iterator containing the root node
294         return new SingletonIterator(newdom.getDocument(), true);
295     }
296 
297 }
298