1 /*
2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;
22 
23 import com.sun.org.apache.xml.internal.dtm.DTM;
24 import com.sun.org.apache.xml.internal.dtm.DTMManager;
25 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
26 import com.sun.org.apache.xml.internal.utils.IntStack;
27 import com.sun.org.apache.xml.internal.utils.IntVector;
28 import com.sun.org.apache.xml.internal.utils.StringVector;
29 import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
30 import java.util.Vector;
31 import javax.xml.transform.Source;
32 import org.xml.sax.SAXException;
33 
34 /**
35  * This is a subclass of SAX2DTM which has been modified to meet the needs of
36  * Result Tree Frameworks (RTFs). The differences are:
37  *
38  * 1) Multiple XML trees may be appended to the single DTM. This means
39  * that the root node of each document is _not_ node 0. Some code has
40  * had to be deoptimized to support this mode of operation, and an
41  * explicit mechanism for obtaining the Node Handle of the root node
42  * has been provided.
43  *
44  * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
45  * most recently added trees off the end of the DTM as stylesheet elements
46  * (and thus variable contexts) are exited.
47  *
48  * PLEASE NOTE that this class may be _heavily_ dependent upon the
49  * internals of the SAX2DTM superclass, and must be maintained in
50  * parallel with that code.  Arguably, they should be conditionals
51  * within a single class... but they have deen separated for
52  * performance reasons. (In fact, one could even argue about which is
53  * the superclass and which is the subclass; the current arrangement
54  * is as much about preserving stability of existing code during
55  * development as anything else.)
56  *
57  * %REVIEW% In fact, since the differences are so minor, I think it
58  * may be possible/practical to fold them back into the base
59  * SAX2DTM. Consider that as a future code-size optimization.
60  *
61  * @LastModified: Oct 2017
62  */
63 public class SAX2RTFDTM extends SAX2DTM
64 {
65   /** Set true to monitor SAX events and similar diagnostic info. */
66   private static final boolean DEBUG = false;
67 
68   /** Most recently started Document, or null if the DTM is empty.  */
69   private int m_currentDocumentNode=NULL;
70 
71   /** Tail-pruning mark: Number of nodes in use */
72   IntStack mark_size=new IntStack();
73   /** Tail-pruning mark: Number of data items in use */
74   IntStack mark_data_size=new IntStack();
75   /** Tail-pruning mark: Number of size-of-data fields in use */
76   IntStack mark_char_size=new IntStack();
77   /** Tail-pruning mark: Number of dataOrQName slots in use */
78   IntStack mark_doq_size=new IntStack();
79   /** Tail-pruning mark: Number of namespace declaration sets in use
80    * %REVIEW% I don't think number of NS sets is ever different from number
81    * of NS elements. We can probabably reduce these to a single stack and save
82    * some storage.
83    * */
84   IntStack mark_nsdeclset_size=new IntStack();
85   /** Tail-pruning mark: Number of naespace declaration elements in use
86    * %REVIEW% I don't think number of NS sets is ever different from number
87    * of NS elements. We can probabably reduce these to a single stack and save
88    * some storage.
89    */
90   IntStack mark_nsdeclelem_size=new IntStack();
91 
92   /**
93    * Tail-pruning mark:  initial number of nodes in use
94    */
95   int m_emptyNodeCount;
96 
97   /**
98    * Tail-pruning mark:  initial number of namespace declaration sets
99    */
100   int m_emptyNSDeclSetCount;
101 
102   /**
103    * Tail-pruning mark:  initial number of namespace declaration elements
104    */
105   int m_emptyNSDeclSetElemsCount;
106 
107   /**
108    * Tail-pruning mark:  initial number of data items in use
109    */
110   int m_emptyDataCount;
111 
112   /**
113    * Tail-pruning mark:  initial number of characters in use
114    */
115   int m_emptyCharsCount;
116 
117   /**
118    * Tail-pruning mark:  default initial number of dataOrQName slots in use
119    */
120   int m_emptyDataQNCount;
121 
SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing)122   public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
123                  DTMWSFilter whiteSpaceFilter,
124                  XMLStringFactory xstringfactory,
125                  boolean doIndexing)
126   {
127     super(mgr, source, dtmIdentity, whiteSpaceFilter,
128           xstringfactory, doIndexing);
129 
130     // NEVER track source locators for RTFs; they aren't meaningful. I think.
131     // (If we did track them, we'd need to tail-prune these too.)
132     //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;
133     m_useSourceLocationProperty=false;
134     m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
135                                                      : null;
136     m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
137     m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
138 
139     // Record initial sizes of fields that are pushed and restored
140     // for RTF tail-pruning.  More entries can be popped than pushed, so
141     // we need this to mark the primordial state of the DTM.
142     m_emptyNodeCount = m_size;
143     m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
144                                  ? 0 : m_namespaceDeclSets.size();
145     m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
146                                       ? 0 : m_namespaceDeclSetElements.size();
147     m_emptyDataCount = m_data.size();
148     m_emptyCharsCount = m_chars.size();
149     m_emptyDataQNCount = m_dataOrQName.size();
150   }
151 
152   /**
153    * Given a DTM, find the owning document node. In the case of
154    * SAX2RTFDTM, which may contain multiple documents, this returns
155    * the <b>most recently started</b> document, or null if the DTM is
156    * empty or no document is currently under construction.
157    *
158    * %REVIEW% Should we continue to report the most recent after
159    * construction has ended? I think not, given that it may have been
160    * tail-pruned.
161    *
162    *  @return int Node handle of Document node, or null if this DTM does not
163    *  contain an "active" document.
164    * */
getDocument()165   public int getDocument()
166   {
167     return makeNodeHandle(m_currentDocumentNode);
168   }
169 
170   /**
171    * Given a node handle, find the owning document node, using DTM semantics
172    * (Document owns itself) rather than DOM semantics (Document has no owner).
173    *
174    * (I'm counting on the fact that getOwnerDocument() is implemented on top
175    * of this call, in the superclass, to avoid having to rewrite that one.
176    * Be careful if that code changes!)
177    *
178    * @param nodeHandle the id of the node.
179    * @return int Node handle of owning document
180    */
getDocumentRoot(int nodeHandle)181   public int getDocumentRoot(int nodeHandle)
182   {
183     for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
184       if (_type(id)==DTM.DOCUMENT_NODE) {
185         return makeNodeHandle(id);
186       }
187     }
188 
189     return DTM.NULL; // Safety net; should never happen
190   }
191 
192   /**
193    * Given a node identifier, find the owning document node.  Unlike the DOM,
194    * this considers the owningDocument of a Document to be itself. Note that
195    * in shared DTMs this may not be zero.
196    *
197    * @param nodeIdentifier the id of the starting node.
198    * @return int Node identifier of the root of this DTM tree
199    */
_documentRoot(int nodeIdentifier)200   protected int _documentRoot(int nodeIdentifier)
201   {
202     if(nodeIdentifier==NULL) return NULL;
203 
204     for (int parent=_parent(nodeIdentifier);
205          parent!=NULL;
206          nodeIdentifier=parent,parent=_parent(nodeIdentifier))
207       ;
208 
209     return nodeIdentifier;
210   }
211 
212   /**
213    * Receive notification of the beginning of a new RTF document.
214    *
215    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
216    * might want to consider folding the start/endDocument changes back
217    * into the main SAX2DTM so we don't have to expose so many fields
218    * (even as Protected) and carry the additional code.
219    *
220    * @throws SAXException Any SAX exception, possibly
221    *            wrapping another exception.
222    * @see org.xml.sax.ContentHandler#startDocument
223    * */
startDocument()224   public void startDocument() throws SAXException
225   {
226     // Re-initialize the tree append process
227     m_endDocumentOccured = false;
228     m_prefixMappings = new Vector<>();
229     m_contextIndexes = new IntStack();
230     m_parents = new IntStack();
231 
232     m_currentDocumentNode=m_size;
233     super.startDocument();
234   }
235 
236   /**
237    * Receive notification of the end of the document.
238    *
239    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
240    * might want to consider folding the start/endDocument changes back
241    * into the main SAX2DTM so we don't have to expose so many fields
242    * (even as Protected).
243    *
244    * @throws SAXException Any SAX exception, possibly
245    *            wrapping another exception.
246    * @see org.xml.sax.ContentHandler#endDocument
247    * */
endDocument()248   public void endDocument() throws SAXException
249   {
250     charactersFlush();
251 
252     m_nextsib.setElementAt(NULL,m_currentDocumentNode);
253 
254     if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
255       m_firstch.setElementAt(NULL,m_currentDocumentNode);
256 
257     if (DTM.NULL != m_previous)
258       m_nextsib.setElementAt(DTM.NULL,m_previous);
259 
260     m_parents = null;
261     m_prefixMappings = null;
262     m_contextIndexes = null;
263 
264     m_currentDocumentNode= NULL; // no longer open
265     m_endDocumentOccured = true;
266   }
267 
268 
269   /** "Tail-pruning" support for RTFs.
270    *
271    * This function pushes information about the current size of the
272    * DTM's data structures onto a stack, for use by popRewindMark()
273    * (which see).
274    *
275    * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
276    * RTFs will not be indexed, so I can simply panic if that case
277    * arises. Hey, it works...
278    * */
pushRewindMark()279   public void pushRewindMark()
280   {
281     if(m_indexing || m_elemIndexes!=null)
282       throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
283 
284     // Values from DTMDefaultBase
285     // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
286     mark_size.push(m_size);
287     mark_nsdeclset_size.push((m_namespaceDeclSets==null)
288                                    ? 0
289                                    : m_namespaceDeclSets.size());
290     mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
291                                    ? 0
292                                    : m_namespaceDeclSetElements.size());
293 
294     // Values from SAX2DTM
295     mark_data_size.push(m_data.size());
296     mark_char_size.push(m_chars.size());
297     mark_doq_size.push(m_dataOrQName.size());
298   }
299 
300   /** "Tail-pruning" support for RTFs.
301    *
302    * This function pops the information previously saved by
303    * pushRewindMark (which see) and uses it to discard all nodes added
304    * to the DTM after that time. We expect that this will allow us to
305    * reuse storage more effectively.
306    *
307    * This is _not_ intended to be called while a document is still being
308    * constructed -- only between endDocument and the next startDocument
309    *
310    * %REVIEW% WARNING: This is the first use of some of the truncation
311    * methods.  If Xalan blows up after this is called, that's a likely
312    * place to check.
313    *
314    * %REVIEW% Our original design for DTMs permitted them to share
315    * string pools.  If there any risk that this might be happening, we
316    * can _not_ rewind and recover the string storage. One solution
317    * might to assert that DTMs used for RTFs Must Not take advantage
318    * of that feature, but this seems excessively fragile. Another, much
319    * less attractive, would be to just let them leak... Nah.
320    *
321    * @return true if and only if the pop completely emptied the
322    * RTF. That response is used when determining how to unspool
323    * RTF-started-while-RTF-open situations.
324    * */
popRewindMark()325   public boolean popRewindMark()
326   {
327     boolean top=mark_size.empty();
328 
329     m_size=top ? m_emptyNodeCount : mark_size.pop();
330     m_exptype.setSize(m_size);
331     m_firstch.setSize(m_size);
332     m_nextsib.setSize(m_size);
333     m_prevsib.setSize(m_size);
334     m_parent.setSize(m_size);
335 
336     m_elemIndexes=null;
337 
338     int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
339     if (m_namespaceDeclSets!=null) {
340       m_namespaceDeclSets.setSize(ds);
341     }
342 
343     int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
344     if (m_namespaceDeclSetElements!=null) {
345       m_namespaceDeclSetElements.setSize(ds1);
346     }
347 
348     // Values from SAX2DTM - m_data always has a reserved entry
349     m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
350     m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
351     m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
352 
353     // Return true iff DTM now empty
354     return m_size==0;
355   }
356 
357   /** @return true if a DTM tree is currently under construction.
358    * */
isTreeIncomplete()359   public boolean isTreeIncomplete()
360   {
361     return !m_endDocumentOccured;
362   }
363 }
364