1 /*
2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 /*
21  * $Id: DocumentCache.java,v 1.2.4.1 2005/09/06 06:15:22 pvedula Exp $
22  */
23 
24 package com.sun.org.apache.xalan.internal.xsltc.dom;
25 
26 import com.sun.org.apache.xalan.internal.xsltc.DOM;
27 import com.sun.org.apache.xalan.internal.xsltc.DOMCache;
28 import com.sun.org.apache.xalan.internal.xsltc.DOMEnhancedForDTM;
29 import com.sun.org.apache.xalan.internal.xsltc.Translet;
30 import com.sun.org.apache.xalan.internal.xsltc.runtime.AbstractTranslet;
31 import com.sun.org.apache.xalan.internal.xsltc.runtime.BasisLibrary;
32 import com.sun.org.apache.xalan.internal.xsltc.runtime.Constants;
33 import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
34 import java.io.File;
35 import java.io.PrintWriter;
36 import java.net.URL;
37 import java.net.URLConnection;
38 import java.nio.file.Paths;
39 import java.util.Date;
40 import java.util.HashMap;
41 import java.util.Map;
42 import javax.xml.parsers.ParserConfigurationException;
43 import javax.xml.parsers.SAXParser;
44 import javax.xml.parsers.SAXParserFactory;
45 import javax.xml.transform.TransformerException;
46 import javax.xml.transform.sax.SAXSource;
47 import org.xml.sax.InputSource;
48 import org.xml.sax.SAXException;
49 import org.xml.sax.XMLReader;
50 
51 /**
52  * @author Morten Jorgensen
53  */
54 public final class DocumentCache implements DOMCache {
55 
56     private int       _size;
57     private Map<String, CachedDocument> _references;
58     private String[]  _URIs;
59     private int       _count;
60     private int       _current;
61     private SAXParser _parser;
62     private XMLReader _reader;
63     private XSLTCDTMManager _dtmManager;
64 
65     private static final int REFRESH_INTERVAL = 1000;
66 
67     /*
68      * Inner class containing a DOMImpl object and DTD handler
69      */
70     public final class CachedDocument {
71 
72         // Statistics data
73         private long _firstReferenced;
74         private long _lastReferenced;
75         private long _accessCount;
76         private long _lastModified;
77         private long _lastChecked;
78         private long _buildTime;
79 
80         // DOM and DTD handler references
81         private DOMEnhancedForDTM _dom = null;
82 
83         /**
84          * Constructor - load document and initialise statistics
85          */
CachedDocument(String uri)86         public CachedDocument(String uri) {
87             // Initialise statistics variables
88             final long stamp = System.currentTimeMillis();
89             _firstReferenced = stamp;
90             _lastReferenced  = stamp;
91             _accessCount     = 0;
92             loadDocument(uri);
93 
94             _buildTime = System.currentTimeMillis() - stamp;
95         }
96 
97         /**
98          * Loads the document and updates build-time (latency) statistics
99          */
loadDocument(String uri)100         public void loadDocument(String uri) {
101 
102             try {
103                 final long stamp = System.currentTimeMillis();
104                 _dom = (DOMEnhancedForDTM)_dtmManager.getDTM(
105                                  new SAXSource(_reader, new InputSource(uri)),
106                                  false, null, true, false);
107                 _dom.setDocumentURI(uri);
108 
109                 // The build time can be used for statistics for a better
110                 // priority algorithm (currently round robin).
111                 final long thisTime = System.currentTimeMillis() - stamp;
112                 if (_buildTime > 0)
113                     _buildTime = (_buildTime + thisTime) >>> 1;
114                 else
115                     _buildTime = thisTime;
116             }
117             catch (Exception e) {
118                 _dom = null;
119             }
120         }
121 
getDocument()122         public DOM getDocument()       { return(_dom); }
123 
getFirstReferenced()124         public long getFirstReferenced()   { return(_firstReferenced); }
125 
getLastReferenced()126         public long getLastReferenced()    { return(_lastReferenced); }
127 
getAccessCount()128         public long getAccessCount()       { return(_accessCount); }
129 
incAccessCount()130         public void incAccessCount()       { _accessCount++; }
131 
getLastModified()132         public long getLastModified()      { return(_lastModified); }
133 
setLastModified(long t)134         public void setLastModified(long t){ _lastModified = t; }
135 
getLatency()136         public long getLatency()           { return(_buildTime); }
137 
getLastChecked()138         public long getLastChecked()       { return(_lastChecked); }
139 
setLastChecked(long t)140         public void setLastChecked(long t) { _lastChecked = t; }
141 
getEstimatedSize()142         public long getEstimatedSize() {
143             if (_dom != null)
144                 return(_dom.getSize() << 5); // ???
145             else
146                 return(0);
147         }
148 
149     }
150 
151     /**
152      * DocumentCache constructor
153      */
DocumentCache(int size)154     public DocumentCache(int size) throws SAXException {
155         this(size, null);
156         try {
157             _dtmManager = XSLTCDTMManager.createNewDTMManagerInstance();
158         } catch (Exception e) {
159             throw new SAXException(e);
160         }
161     }
162 
163     /**
164      * DocumentCache constructor
165      */
DocumentCache(int size, XSLTCDTMManager dtmManager)166     public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException {
167         _dtmManager = dtmManager;
168         _count = 0;
169         _current = 0;
170         _size  = size;
171         _references = new HashMap<>(_size+2);
172         _URIs = new String[_size];
173 
174         try {
175             // Create a SAX parser and get the XMLReader object it uses
176             final SAXParserFactory factory = SAXParserFactory.newInstance();
177             try {
178                 factory.setFeature(Constants.NAMESPACE_FEATURE,true);
179             }
180             catch (Exception e) {
181                 factory.setNamespaceAware(true);
182             }
183             _parser = factory.newSAXParser();
184             _reader = _parser.getXMLReader();
185         }
186         catch (ParserConfigurationException e) {
187             BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR);
188         }
189     }
190 
191     /**
192      * Returns the time-stamp for a document's last update
193      */
getLastModified(String uri)194     private final long getLastModified(String uri) {
195         try {
196             URL url = new URL(uri);
197             URLConnection connection = url.openConnection();
198             long timestamp = connection.getLastModified();
199             // Check for a "file:" URI (courtesy of Brian Ewins)
200             if (timestamp == 0){ // get 0 for local URI
201                 if ("file".equals(url.getProtocol())){
202                     File localfile = Paths.get(url.toURI()).toFile();
203                     timestamp = localfile.lastModified();
204                 }
205             }
206             return(timestamp);
207         }
208         // Brutal handling of all exceptions
209         catch (Exception e) {
210             return(System.currentTimeMillis());
211         }
212     }
213 
214     /**
215      *
216      */
lookupDocument(String uri)217     private CachedDocument lookupDocument(String uri) {
218         return(_references.get(uri));
219     }
220 
221     /**
222      *
223      */
insertDocument(String uri, CachedDocument doc)224     private synchronized void insertDocument(String uri, CachedDocument doc) {
225         if (_count < _size) {
226             // Insert out URI in circular buffer
227             _URIs[_count++] = uri;
228             _current = 0;
229         }
230         else {
231             // Remove oldest URI from reference map
232             _references.remove(_URIs[_current]);
233             // Insert our URI in circular buffer
234             _URIs[_current] = uri;
235             if (++_current >= _size) _current = 0;
236         }
237         _references.put(uri, doc);
238     }
239 
240     /**
241      *
242      */
replaceDocument(String uri, CachedDocument doc)243     private synchronized void replaceDocument(String uri, CachedDocument doc) {
244         if (doc == null)
245             insertDocument(uri, doc);
246         else
247             _references.put(uri, doc);
248     }
249 
250     /**
251      * Returns a document either by finding it in the cache or
252      * downloading it and putting it in the cache.
253      */
254     @Override
retrieveDocument(String baseURI, String href, Translet trs)255     public DOM retrieveDocument(String baseURI, String href, Translet trs) {
256         CachedDocument doc;
257 
258     String uri = href;
259     if (baseURI != null && !baseURI.equals("")) {
260         try {
261             uri = SystemIDResolver.getAbsoluteURI(uri, baseURI);
262         } catch (TransformerException te) {
263             // ignore
264         }
265     }
266 
267         // Try to get the document from the cache first
268         if ((doc = lookupDocument(uri)) == null) {
269             doc = new CachedDocument(uri);
270             if (doc == null) return null; // better error handling needed!!!
271             doc.setLastModified(getLastModified(uri));
272             insertDocument(uri, doc);
273         }
274         // If the document is in the cache we must check if it is still valid
275         else {
276             long now = System.currentTimeMillis();
277             long chk = doc.getLastChecked();
278             doc.setLastChecked(now);
279             // Has the modification time for this file been checked lately?
280             if (now > (chk + REFRESH_INTERVAL)) {
281                 doc.setLastChecked(now);
282                 long last = getLastModified(uri);
283                 // Reload document if it has been modified since last download
284                 if (last > doc.getLastModified()) {
285                     doc = new CachedDocument(uri);
286                     if (doc == null) return null;
287                     doc.setLastModified(getLastModified(uri));
288                     replaceDocument(uri, doc);
289                 }
290             }
291 
292         }
293 
294         // Get the references to the actual DOM and DTD handler
295         final DOM dom = doc.getDocument();
296 
297         // The dom reference may be null if the URL pointed to a
298         // non-existing document
299         if (dom == null) return null;
300 
301         doc.incAccessCount(); // For statistics
302 
303         final AbstractTranslet translet = (AbstractTranslet)trs;
304 
305         // Give the translet an early opportunity to extract any
306         // information from the DOM object that it would like.
307         translet.prepassDocument(dom);
308 
309         return(doc.getDocument());
310     }
311 
312     /**
313      * Outputs the cache statistics
314      */
getStatistics(PrintWriter out)315     public void getStatistics(PrintWriter out) {
316         out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+
317                     "<tr><td><b>Document URI</b></td>"+
318                     "<td><center><b>Build time</b></center></td>"+
319                     "<td><center><b>Access count</b></center></td>"+
320                     "<td><center><b>Last accessed</b></center></td>"+
321                     "<td><center><b>Last modified</b></center></td></tr>");
322 
323         for (int i=0; i<_count; i++) {
324             CachedDocument doc = _references.get(_URIs[i]);
325             out.print("<tr><td><a href=\""+_URIs[i]+"\">"+
326                       "<font size=-1>"+_URIs[i]+"</font></a></td>");
327             out.print("<td><center>"+doc.getLatency()+"ms</center></td>");
328             out.print("<td><center>"+doc.getAccessCount()+"</center></td>");
329             out.print("<td><center>"+(new Date(doc.getLastReferenced()))+
330                       "</center></td>");
331             out.print("<td><center>"+(new Date(doc.getLastModified()))+
332                       "</center></td>");
333             out.println("</tr>");
334         }
335 
336         out.println("</table></center>");
337     }
338 }
339