1 /*
2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xml.internal.serializer;
22 
23 import com.sun.org.apache.xml.internal.utils.AttList;
24 import com.sun.org.apache.xml.internal.utils.DOM2Helper;
25 import javax.xml.transform.Result;
26 import org.w3c.dom.Comment;
27 import org.w3c.dom.Element;
28 import org.w3c.dom.EntityReference;
29 import org.w3c.dom.NamedNodeMap;
30 import org.w3c.dom.Node;
31 import org.w3c.dom.ProcessingInstruction;
32 import org.w3c.dom.Text;
33 import org.xml.sax.ContentHandler;
34 import org.xml.sax.Locator;
35 import org.xml.sax.ext.LexicalHandler;
36 import org.xml.sax.helpers.LocatorImpl;
37 
38 /**
39  * This class does a pre-order walk of the DOM tree, calling a ContentHandler
40  * interface as it goes.
41  *
42  * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
43  * It exists to cut the serializers dependancy on that package.
44  *
45  * @xsl.usage internal
46  */
47 
48 public final class TreeWalker
49 {
50 
51   /** Local reference to a ContentHandler          */
52   final private ContentHandler m_contentHandler;
53   /**
54    * If m_contentHandler is a SerializationHandler, then this is
55    * a reference to the same object.
56    */
57   final private SerializationHandler m_Serializer;
58 
59   /** Locator object for this TreeWalker          */
60   final private LocatorImpl m_locator = new LocatorImpl();
61 
62   /**
63    * Get the ContentHandler used for the tree walk.
64    *
65    * @return the ContentHandler used for the tree walk
66    */
getContentHandler()67   public ContentHandler getContentHandler()
68   {
69     return m_contentHandler;
70   }
71 
TreeWalker(ContentHandler ch)72   public TreeWalker(ContentHandler ch) {
73       this(ch, null);
74   }
75   /**
76    * Constructor.
77    * @param   contentHandler The implemention of the
78    * contentHandler operation (toXMLString, digest, ...)
79    */
TreeWalker(ContentHandler contentHandler, String systemId)80   public TreeWalker(ContentHandler contentHandler, String systemId)
81   {
82       // Set the content handler
83       m_contentHandler = contentHandler;
84       if (m_contentHandler instanceof SerializationHandler) {
85           m_Serializer = (SerializationHandler) m_contentHandler;
86       } else {
87           m_Serializer = null;
88       }
89       // Set the system ID, if it is given
90       m_contentHandler.setDocumentLocator(m_locator);
91       if (systemId != null) {
92           m_locator.setSystemId(systemId);
93       }
94   }
95 
96   /**
97    * Perform a pre-order traversal non-recursive style.
98    *
99    * Note that TreeWalker assumes that the subtree is intended to represent
100    * a complete (though not necessarily well-formed) document and, during a
101    * traversal, startDocument and endDocument will always be issued to the
102    * SAX listener.
103    *
104    * @param pos Node in the tree where to start traversal
105    *
106    * @throws TransformerException
107    */
traverse(Node pos)108   public void traverse(Node pos) throws org.xml.sax.SAXException
109   {
110 
111     this.m_contentHandler.startDocument();
112 
113     Node top = pos;
114 
115     while (null != pos)
116     {
117       startNode(pos);
118 
119       Node nextNode = pos.getFirstChild();
120 
121       while (null == nextNode)
122       {
123         endNode(pos);
124 
125         if (top.equals(pos))
126           break;
127 
128         nextNode = pos.getNextSibling();
129 
130         if (null == nextNode)
131         {
132           pos = pos.getParentNode();
133 
134           if ((null == pos) || (top.equals(pos)))
135           {
136             if (null != pos)
137               endNode(pos);
138 
139             nextNode = null;
140 
141             break;
142           }
143         }
144       }
145 
146       pos = nextNode;
147     }
148     this.m_contentHandler.endDocument();
149   }
150 
151   /**
152    * Perform a pre-order traversal non-recursive style.
153 
154    * Note that TreeWalker assumes that the subtree is intended to represent
155    * a complete (though not necessarily well-formed) document and, during a
156    * traversal, startDocument and endDocument will always be issued to the
157    * SAX listener.
158    *
159    * @param pos Node in the tree where to start traversal
160    * @param top Node in the tree where to end traversal
161    *
162    * @throws TransformerException
163    */
traverse(Node pos, Node top)164   public void traverse(Node pos, Node top) throws org.xml.sax.SAXException
165   {
166 
167     this.m_contentHandler.startDocument();
168 
169     while (null != pos)
170     {
171       startNode(pos);
172 
173       Node nextNode = pos.getFirstChild();
174 
175       while (null == nextNode)
176       {
177         endNode(pos);
178 
179         if ((null != top) && top.equals(pos))
180           break;
181 
182         nextNode = pos.getNextSibling();
183 
184         if (null == nextNode)
185         {
186           pos = pos.getParentNode();
187 
188           if ((null == pos) || ((null != top) && top.equals(pos)))
189           {
190             nextNode = null;
191 
192             break;
193           }
194         }
195       }
196 
197       pos = nextNode;
198     }
199     this.m_contentHandler.endDocument();
200   }
201 
202   // Flag indicating whether following text to be processed is raw text
203   boolean nextIsRaw = false;
204 
205   /**
206    * Optimized dispatch of characters.
207    */
dispatachChars(Node node)208   private final void dispatachChars(Node node)
209      throws org.xml.sax.SAXException
210   {
211     if(m_Serializer != null)
212     {
213       this.m_Serializer.characters(node);
214     }
215     else
216     {
217       String data = ((Text) node).getData();
218       this.m_contentHandler.characters(data.toCharArray(), 0, data.length());
219     }
220   }
221 
222   /**
223    * Start processing given node
224    *
225    *
226    * @param node Node to process
227    *
228    * @throws org.xml.sax.SAXException
229    */
startNode(Node node)230   protected void startNode(Node node) throws org.xml.sax.SAXException
231   {
232 
233 //   TODO: <REVIEW>
234 //    A Serializer implements ContentHandler, but not NodeConsumer
235 //    so drop this reference to NodeConsumer which would otherwise
236 //    pull in all sorts of things
237 //    if (m_contentHandler instanceof NodeConsumer)
238 //    {
239 //      ((NodeConsumer) m_contentHandler).setOriginatingNode(node);
240 //    }
241 //    TODO: </REVIEW>
242 
243                 if (node instanceof Locator)
244                 {
245                         Locator loc = (Locator)node;
246                         m_locator.setColumnNumber(loc.getColumnNumber());
247                         m_locator.setLineNumber(loc.getLineNumber());
248                         m_locator.setPublicId(loc.getPublicId());
249                         m_locator.setSystemId(loc.getSystemId());
250                 }
251                 else
252                 {
253                         m_locator.setColumnNumber(0);
254       m_locator.setLineNumber(0);
255                 }
256 
257     switch (node.getNodeType())
258     {
259     case Node.COMMENT_NODE :
260     {
261       String data = ((Comment) node).getData();
262 
263       if (m_contentHandler instanceof LexicalHandler)
264       {
265         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
266 
267         lh.comment(data.toCharArray(), 0, data.length());
268       }
269     }
270     break;
271     case Node.DOCUMENT_FRAGMENT_NODE :
272 
273       // ??;
274       break;
275     case Node.DOCUMENT_NODE :
276 
277       break;
278     case Node.ELEMENT_NODE :
279       Element elem_node = (Element) node;
280       {
281           // Make sure the namespace node
282           // for the element itself is declared
283           // to the ContentHandler
284           String uri = elem_node.getNamespaceURI();
285           if (uri != null) {
286               String prefix = elem_node.getPrefix();
287               if (prefix==null)
288                 prefix="";
289               this.m_contentHandler.startPrefixMapping(prefix,uri);
290           }
291       }
292       NamedNodeMap atts = elem_node.getAttributes();
293       int nAttrs = atts.getLength();
294       // System.out.println("TreeWalker#startNode: "+node.getNodeName());
295 
296 
297       // Make sure the namespace node of
298       // each attribute is declared to the ContentHandler
299       for (int i = 0; i < nAttrs; i++)
300       {
301         final Node attr = atts.item(i);
302         final String attrName = attr.getNodeName();
303         final int colon = attrName.indexOf(':');
304         final String prefix;
305 
306         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
307         {
308           // Use "" instead of null, as Xerces likes "" for the
309           // name of the default namespace.  Fix attributed
310           // to "Steven Murray" <smurray@ebt.com>.
311           if (colon < 0)
312             prefix = "";
313           else
314             prefix = attrName.substring(colon + 1);
315 
316           this.m_contentHandler.startPrefixMapping(prefix,
317                                                    attr.getNodeValue());
318         }
319         else if (colon > 0) {
320             prefix = attrName.substring(0,colon);
321             String uri = attr.getNamespaceURI();
322             if (uri != null)
323                 this.m_contentHandler.startPrefixMapping(prefix,uri);
324         }
325       }
326 
327       String ns = DOM2Helper.getNamespaceOfNode(node);
328       if(null == ns)
329         ns = "";
330       this.m_contentHandler.startElement(ns,
331                                          DOM2Helper.getLocalNameOfNode(node),
332                                          node.getNodeName(),
333                                          new AttList(atts));
334       break;
335     case Node.PROCESSING_INSTRUCTION_NODE :
336     {
337       ProcessingInstruction pi = (ProcessingInstruction) node;
338       String name = pi.getNodeName();
339 
340       // String data = pi.getData();
341       if (name.equals("xslt-next-is-raw"))
342       {
343         nextIsRaw = true;
344       }
345       else
346       {
347         this.m_contentHandler.processingInstruction(pi.getNodeName(),
348                                                     pi.getData());
349       }
350     }
351     break;
352     case Node.CDATA_SECTION_NODE :
353     {
354       boolean isLexH = (m_contentHandler instanceof LexicalHandler);
355       LexicalHandler lh = isLexH
356                           ? ((LexicalHandler) this.m_contentHandler) : null;
357 
358       if (isLexH)
359       {
360         lh.startCDATA();
361       }
362 
363       dispatachChars(node);
364 
365       {
366         if (isLexH)
367         {
368           lh.endCDATA();
369         }
370       }
371     }
372     break;
373     case Node.TEXT_NODE :
374     {
375       //String data = ((Text) node).getData();
376 
377       if (nextIsRaw)
378       {
379         nextIsRaw = false;
380 
381         m_contentHandler.processingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "");
382         dispatachChars(node);
383         m_contentHandler.processingInstruction(Result.PI_ENABLE_OUTPUT_ESCAPING, "");
384       }
385       else
386       {
387         dispatachChars(node);
388       }
389     }
390     break;
391     case Node.ENTITY_REFERENCE_NODE :
392     {
393       EntityReference eref = (EntityReference) node;
394 
395       if (m_contentHandler instanceof LexicalHandler)
396       {
397         ((LexicalHandler) this.m_contentHandler).startEntity(
398           eref.getNodeName());
399       }
400       else
401       {
402 
403         // warning("Can not output entity to a pure SAX ContentHandler");
404       }
405     }
406     break;
407     default :
408     }
409   }
410 
411   /**
412    * End processing of given node
413    *
414    *
415    * @param node Node we just finished processing
416    *
417    * @throws org.xml.sax.SAXException
418    */
endNode(Node node)419   protected void endNode(Node node) throws org.xml.sax.SAXException
420   {
421 
422     switch (node.getNodeType())
423     {
424     case Node.DOCUMENT_NODE :
425       break;
426 
427     case Node.ELEMENT_NODE :
428       String ns = DOM2Helper.getNamespaceOfNode(node);
429       if(null == ns)
430         ns = "";
431       this.m_contentHandler.endElement(ns,
432               DOM2Helper.getLocalNameOfNode(node),
433               node.getNodeName());
434 
435       if (m_Serializer == null) {
436       // Don't bother with endPrefixMapping calls if the ContentHandler is a
437       // SerializationHandler because SerializationHandler's ignore the
438       // endPrefixMapping() calls anyways. . . .  This is an optimization.
439       Element elem_node = (Element) node;
440       NamedNodeMap atts = elem_node.getAttributes();
441       int nAttrs = atts.getLength();
442 
443       // do the endPrefixMapping calls in reverse order
444       // of the startPrefixMapping calls
445       for (int i = (nAttrs-1); 0 <= i; i--)
446       {
447         final Node attr = atts.item(i);
448         final String attrName = attr.getNodeName();
449         final int colon = attrName.indexOf(':');
450         final String prefix;
451 
452         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
453         {
454           // Use "" instead of null, as Xerces likes "" for the
455           // name of the default namespace.  Fix attributed
456           // to "Steven Murray" <smurray@ebt.com>.
457           if (colon < 0)
458             prefix = "";
459           else
460             prefix = attrName.substring(colon + 1);
461 
462           this.m_contentHandler.endPrefixMapping(prefix);
463         }
464         else if (colon > 0) {
465             prefix = attrName.substring(0, colon);
466             this.m_contentHandler.endPrefixMapping(prefix);
467         }
468       }
469       {
470           String uri = elem_node.getNamespaceURI();
471           if (uri != null) {
472               String prefix = elem_node.getPrefix();
473               if (prefix==null)
474                 prefix="";
475               this.m_contentHandler.endPrefixMapping(prefix);
476           }
477       }
478       }
479       break;
480     case Node.CDATA_SECTION_NODE :
481       break;
482     case Node.ENTITY_REFERENCE_NODE :
483     {
484       EntityReference eref = (EntityReference) node;
485 
486       if (m_contentHandler instanceof LexicalHandler)
487       {
488         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
489 
490         lh.endEntity(eref.getNodeName());
491       }
492     }
493     break;
494     default :
495     }
496   }
497 }  //TreeWalker
498