1 /*
2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Copyright 2001-2004 The Apache Software Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 /*
20  * $Id: ToTextStream.java,v 1.2.4.1 2005/09/21 10:35:34 pvedula Exp $
21  */
22 package com.sun.org.apache.xml.internal.serializer;
23 
24 import java.io.IOException;
25 
26 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
27 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
28 import org.xml.sax.Attributes;
29 import org.xml.sax.SAXException;
30 
31 /**
32  * This class is not a public API.
33  * It is only public because it is used in other packages.
34  * This class converts SAX or SAX-like calls to a
35  * serialized document for xsl:output method of "text".
36  * @xsl.usage internal
37  * @LastModified: Sept 2018
38  */
39 public final class ToTextStream extends ToStream
40 {
41 
42 
43   /**
44    * Default constructor.
45    */
ToTextStream()46   public ToTextStream()
47   {
48     super();
49   }
50 
51 
52 
53   /**
54    * Receive notification of the beginning of a document.
55    *
56    * <p>The SAX parser will invoke this method only once, before any
57    * other methods in this interface or in DTDHandler (except for
58    * setDocumentLocator).</p>
59    *
60    * @throws org.xml.sax.SAXException Any SAX exception, possibly
61    *            wrapping another exception.
62    *
63    * @throws org.xml.sax.SAXException
64    */
startDocumentInternal()65   protected void startDocumentInternal() throws org.xml.sax.SAXException
66   {
67     super.startDocumentInternal();
68 
69     m_needToCallStartDocument = false;
70 
71     // No action for the moment.
72   }
73 
74   /**
75    * Receive notification of the end of a document.
76    *
77    * <p>The SAX parser will invoke this method only once, and it will
78    * be the last method invoked during the parse.  The parser shall
79    * not invoke this method until it has either abandoned parsing
80    * (because of an unrecoverable error) or reached the end of
81    * input.</p>
82    *
83    * @throws org.xml.sax.SAXException Any SAX exception, possibly
84    *            wrapping another exception.
85    *
86    * @throws org.xml.sax.SAXException
87    */
endDocument()88   public void endDocument() throws org.xml.sax.SAXException
89   {
90     flushPending();
91     flushWriter();
92     if (m_tracer != null)
93         super.fireEndDoc();
94   }
95 
96   /**
97    * Receive notification of the beginning of an element.
98    *
99    * <p>The Parser will invoke this method at the beginning of every
100    * element in the XML document; there will be a corresponding
101    * endElement() event for every startElement() event (even when the
102    * element is empty). All of the element's content will be
103    * reported, in order, before the corresponding endElement()
104    * event.</p>
105    *
106    * <p>If the element name has a namespace prefix, the prefix will
107    * still be attached.  Note that the attribute list provided will
108    * contain only attributes with explicit values (specified or
109    * defaulted): #IMPLIED attributes will be omitted.</p>
110    *
111    *
112    * @param namespaceURI The Namespace URI, or the empty string if the
113    *        element has no Namespace URI or if Namespace
114    *        processing is not being performed.
115    * @param localName The local name (without prefix), or the
116    *        empty string if Namespace processing is not being
117    *        performed.
118    * @param name The qualified name (with prefix), or the
119    *        empty string if qualified names are not available.
120    * @param atts The attributes attached to the element, if any.
121    * @throws org.xml.sax.SAXException Any SAX exception, possibly
122    *            wrapping another exception.
123    * @see #endElement
124    * @see org.xml.sax.AttributeList
125    *
126    * @throws org.xml.sax.SAXException
127    */
startElement( String namespaceURI, String localName, String name, Attributes atts)128   public void startElement(
129           String namespaceURI, String localName, String name, Attributes atts)
130             throws org.xml.sax.SAXException
131   {
132     // time to fire off startElement event
133     if (m_tracer != null) {
134         super.fireStartElem(name);
135         this.firePseudoAttributes();
136     }
137     return;
138   }
139 
140   /**
141    * Receive notification of the end of an element.
142    *
143    * <p>The SAX parser will invoke this method at the end of every
144    * element in the XML document; there will be a corresponding
145    * startElement() event for every endElement() event (even when the
146    * element is empty).</p>
147    *
148    * <p>If the element name has a namespace prefix, the prefix will
149    * still be attached to the name.</p>
150    *
151    *
152    * @param namespaceURI The Namespace URI, or the empty string if the
153    *        element has no Namespace URI or if Namespace
154    *        processing is not being performed.
155    * @param localName The local name (without prefix), or the
156    *        empty string if Namespace processing is not being
157    *        performed.
158    * @param name The qualified name (with prefix), or the
159    *        empty string if qualified names are not available.
160    * @throws org.xml.sax.SAXException Any SAX exception, possibly
161    *            wrapping another exception.
162    *
163    * @throws org.xml.sax.SAXException
164    */
endElement(String namespaceURI, String localName, String name)165   public void endElement(String namespaceURI, String localName, String name)
166           throws org.xml.sax.SAXException
167   {
168         if (m_tracer != null)
169             super.fireEndElem(name);
170   }
171 
172   /**
173    * Receive notification of character data.
174    *
175    * <p>The Parser will call this method to report each chunk of
176    * character data.  SAX parsers may return all contiguous character
177    * data in a single chunk, or they may split it into several
178    * chunks; however, all of the characters in any single event
179    * must come from the same external entity, so that the Locator
180    * provides useful information.</p>
181    *
182    * <p>The application must not attempt to read from the array
183    * outside of the specified range.</p>
184    *
185    * <p>Note that some parsers will report whitespace using the
186    * ignorableWhitespace() method rather than this one (validating
187    * parsers must do so).</p>
188    *
189    * @param ch The characters from the XML document.
190    * @param start The start position in the array.
191    * @param length The number of characters to read from the array.
192    * @throws org.xml.sax.SAXException Any SAX exception, possibly
193    *            wrapping another exception.
194    * @see #ignorableWhitespace
195    * @see org.xml.sax.Locator
196    */
characters(char ch[], int start, int length)197   public void characters(char ch[], int start, int length)
198           throws org.xml.sax.SAXException
199   {
200 
201     flushPending();
202 
203     try
204     {
205         if (inTemporaryOutputState()) {
206             /* leave characters un-processed as we are
207              * creating temporary output, the output generated by
208              * this serializer will be input to a final serializer
209              * later on and it will do the processing in final
210              * output state (not temporary output state).
211              *
212              * A "temporary" ToTextStream serializer is used to
213              * evaluate attribute value templates (for example),
214              * and the result of evaluating such a thing
215              * is fed into a final serializer later on.
216              */
217             m_writer.write(ch, start, length);
218         }
219         else {
220             // In final output state we do process the characters!
221             writeNormalizedChars(ch, start, length, m_lineSepUse);
222         }
223 
224         if (m_tracer != null)
225             super.fireCharEvent(ch, start, length);
226     }
227     catch(IOException ioe)
228     {
229       throw new SAXException(ioe);
230     }
231   }
232 
233   /**
234    * If available, when the disable-output-escaping attribute is used,
235    * output raw text without escaping.
236    *
237    * @param ch The characters from the XML document.
238    * @param start The start position in the array.
239    * @param length The number of characters to read from the array.
240    *
241    * @throws org.xml.sax.SAXException Any SAX exception, possibly
242    *            wrapping another exception.
243    */
charactersRaw(char ch[], int start, int length)244   public void charactersRaw(char ch[], int start, int length)
245           throws org.xml.sax.SAXException
246   {
247 
248     try
249     {
250       writeNormalizedChars(ch, start, length, m_lineSepUse);
251     }
252     catch(IOException ioe)
253     {
254       throw new SAXException(ioe);
255     }
256   }
257 
258     /**
259      * Normalize the characters, but don't escape.  Different from
260      * SerializerToXML#writeNormalizedChars because it does not attempt to do
261      * XML escaping at all.
262      *
263      * @param ch The characters from the XML document.
264      * @param start The start position in the array.
265      * @param length The number of characters to read from the array.
266      * @param useLineSep true if the operating systems
267      * end-of-line separator should be output rather than a new-line character.
268      *
269      * @throws IOException
270      * @throws org.xml.sax.SAXException
271      */
writeNormalizedChars( final char ch[], final int start, final int length, final boolean useLineSep)272     void writeNormalizedChars(
273         final char ch[],
274             final int start,
275             final int length,
276             final boolean useLineSep)
277             throws IOException, org.xml.sax.SAXException
278     {
279         final String encoding = getEncoding();
280         final java.io.Writer writer = m_writer;
281         final int end = start + length;
282 
283         /* copy a few "constants" before the loop for performance */
284         final char S_LINEFEED = CharInfo.S_LINEFEED;
285 
286         // This for() loop always increments i by one at the end
287         // of the loop.  Additional increments of i adjust for when
288         // two input characters (a high/low UTF16 surrogate pair)
289         // are processed.
290         for (int i = start; i < end; i++) {
291             final char c = ch[i];
292 
293             if (S_LINEFEED == c && useLineSep) {
294                 writer.write(m_lineSep, 0, m_lineSepLen);
295                 // one input char processed
296             } else if (m_encodingInfo.isInEncoding(c)) {
297                 writer.write(c);
298                 // one input char processed
299             } else if (Encodings.isHighUTF16Surrogate(c) ||
300                        Encodings.isLowUTF16Surrogate(c)) {
301                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
302                 if (codePoint >= 0) {
303                     // move the index if the low surrogate is consumed
304                     // as writeUTF16Surrogate has written the pair
305                     if (Encodings.isHighUTF16Surrogate(c)) {
306                         i++;
307                     }
308 
309                     // printing to the console is not appropriate, but will leave
310                     // it as is for compatibility.
311                     if (codePoint >0) {
312                         // I think we can just emit the message,
313                         // not crash and burn.
314                         final String integralValue = Integer.toString(codePoint);
315                         final String msg = Utils.messages.createMessage(
316                             MsgKey.ER_ILLEGAL_CHARACTER,
317                             new Object[] { integralValue, encoding });
318 
319                         //Older behavior was to throw the message,
320                         //but newer gentler behavior is to write a message to System.err
321                         //throw new SAXException(msg);
322                         System.err.println(msg);
323                     }
324                 }
325             } else {
326                 // Don't know what to do with this char, it is
327                 // not in the encoding and not a high char in
328                 // a surrogate pair, so write out as an entity ref
329                 if (encoding != null) {
330                     /* The output encoding is known,
331                      * so somthing is wrong.
332                      */
333 
334                     // not in the encoding, so write out a character reference
335                     writer.write('&');
336                     writer.write('#');
337                     writer.write(Integer.toString(c));
338                     writer.write(';');
339 
340                     // I think we can just emit the message,
341                     // not crash and burn.
342                     final String integralValue = Integer.toString(c);
343                     final String msg = Utils.messages.createMessage(
344                         MsgKey.ER_ILLEGAL_CHARACTER,
345                         new Object[] { integralValue, encoding });
346 
347                     //Older behavior was to throw the message,
348                     //but newer gentler behavior is to write a message to System.err
349                     //throw new SAXException(msg);
350                     System.err.println(msg);
351                 } else {
352                     /* The output encoding is not known,
353                      * so just write it out as-is.
354                      */
355                     writer.write(c);
356                 }
357 
358                 // one input char was processed
359             }
360         }
361     }
362 
363   /**
364    * Receive notification of cdata.
365    *
366    * <p>The Parser will call this method to report each chunk of
367    * character data.  SAX parsers may return all contiguous character
368    * data in a single chunk, or they may split it into several
369    * chunks; however, all of the characters in any single event
370    * must come from the same external entity, so that the Locator
371    * provides useful information.</p>
372    *
373    * <p>The application must not attempt to read from the array
374    * outside of the specified range.</p>
375    *
376    * <p>Note that some parsers will report whitespace using the
377    * ignorableWhitespace() method rather than this one (validating
378    * parsers must do so).</p>
379    *
380    * @param ch The characters from the XML document.
381    * @param start The start position in the array.
382    * @param length The number of characters to read from the array.
383    * @throws org.xml.sax.SAXException Any SAX exception, possibly
384    *            wrapping another exception.
385    * @see #ignorableWhitespace
386    * @see org.xml.sax.Locator
387    */
cdata(char ch[], int start, int length)388   public void cdata(char ch[], int start, int length)
389           throws org.xml.sax.SAXException
390   {
391     try
392     {
393         writeNormalizedChars(ch, start, length, m_lineSepUse);
394         if (m_tracer != null)
395             super.fireCDATAEvent(ch, start, length);
396     }
397     catch(IOException ioe)
398     {
399       throw new SAXException(ioe);
400     }
401   }
402 
403   /**
404    * Receive notification of ignorable whitespace in element content.
405    *
406    * <p>Validating Parsers must use this method to report each chunk
407    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
408    * section 2.10): non-validating parsers may also use this method
409    * if they are capable of parsing and using content models.</p>
410    *
411    * <p>SAX parsers may return all contiguous whitespace in a single
412    * chunk, or they may split it into several chunks; however, all of
413    * the characters in any single event must come from the same
414    * external entity, so that the Locator provides useful
415    * information.</p>
416    *
417    * <p>The application must not attempt to read from the array
418    * outside of the specified range.</p>
419    *
420    * @param ch The characters from the XML document.
421    * @param start The start position in the array.
422    * @param length The number of characters to read from the array.
423    * @throws org.xml.sax.SAXException Any SAX exception, possibly
424    *            wrapping another exception.
425    * @see #characters
426    *
427    * @throws org.xml.sax.SAXException
428    */
ignorableWhitespace(char ch[], int start, int length)429   public void ignorableWhitespace(char ch[], int start, int length)
430           throws org.xml.sax.SAXException
431   {
432 
433     try
434     {
435       writeNormalizedChars(ch, start, length, m_lineSepUse);
436     }
437     catch(IOException ioe)
438     {
439       throw new SAXException(ioe);
440     }
441   }
442 
443   /**
444    * Receive notification of a processing instruction.
445    *
446    * <p>The Parser will invoke this method once for each processing
447    * instruction found: note that processing instructions may occur
448    * before or after the main document element.</p>
449    *
450    * <p>A SAX parser should never report an XML declaration (XML 1.0,
451    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
452    * using this method.</p>
453    *
454    * @param target The processing instruction target.
455    * @param data The processing instruction data, or null if
456    *        none was supplied.
457    * @throws org.xml.sax.SAXException Any SAX exception, possibly
458    *            wrapping another exception.
459    *
460    * @throws org.xml.sax.SAXException
461    */
processingInstruction(String target, String data)462   public void processingInstruction(String target, String data)
463           throws org.xml.sax.SAXException
464   {
465     // flush anything pending first
466     flushPending();
467 
468     if (m_tracer != null)
469         super.fireEscapingEvent(target, data);
470   }
471 
472   /**
473    * Called when a Comment is to be constructed.
474    * Note that Xalan will normally invoke the other version of this method.
475    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
476    *
477    * @param   data  The comment data.
478    * @throws org.xml.sax.SAXException Any SAX exception, possibly
479    *            wrapping another exception.
480    */
comment(String data)481   public void comment(String data) throws org.xml.sax.SAXException
482   {
483       final int length = data.length();
484       if (length > m_charsBuff.length)
485       {
486           m_charsBuff = new char[length*2 + 1];
487       }
488       data.getChars(0, length, m_charsBuff, 0);
489       comment(m_charsBuff, 0, length);
490   }
491 
492   /**
493    * Report an XML comment anywhere in the document.
494    *
495    * This callback will be used for comments inside or outside the
496    * document element, including comments in the external DTD
497    * subset (if read).
498    *
499    * @param ch An array holding the characters in the comment.
500    * @param start The starting position in the array.
501    * @param length The number of characters to use from the array.
502    * @throws org.xml.sax.SAXException The application may raise an exception.
503    */
comment(char ch[], int start, int length)504   public void comment(char ch[], int start, int length)
505           throws org.xml.sax.SAXException
506   {
507 
508     flushPending();
509     if (m_tracer != null)
510         super.fireCommentEvent(ch, start, length);
511   }
512 
513   /**
514    * Receive notivication of a entityReference.
515    *
516    * @param name non-null reference to the name of the entity.
517    *
518    * @throws org.xml.sax.SAXException
519    */
entityReference(String name)520   public void entityReference(String name) throws org.xml.sax.SAXException
521   {
522         if (m_tracer != null)
523             super.fireEntityReference(name);
524   }
525 
526     /**
527      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
528      */
addAttribute( String uri, String localName, String rawName, String type, String value, boolean XSLAttribute)529     public void addAttribute(
530         String uri,
531         String localName,
532         String rawName,
533         String type,
534         String value,
535         boolean XSLAttribute)
536     {
537         // do nothing, just forget all about the attribute
538     }
539 
540     /**
541      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
542      */
endCDATA()543     public void endCDATA() throws SAXException
544     {
545         // do nothing
546     }
547 
548     /**
549      * @see ExtendedContentHandler#endElement(String)
550      */
endElement(String elemName)551     public void endElement(String elemName) throws SAXException
552     {
553         if (m_tracer != null)
554             super.fireEndElem(elemName);
555     }
556 
557     /**
558      * From XSLTC
559      */
startElement( String elementNamespaceURI, String elementLocalName, String elementName)560     public void startElement(
561     String elementNamespaceURI,
562     String elementLocalName,
563     String elementName)
564     throws SAXException
565     {
566         if (m_needToCallStartDocument)
567             startDocumentInternal();
568         // time to fire off startlement event.
569         if (m_tracer != null) {
570             super.fireStartElem(elementName);
571             this.firePseudoAttributes();
572         }
573 
574         return;
575     }
576 
577 
578     /**
579      * From XSLTC
580      */
characters(String characters)581     public void characters(String characters)
582     throws SAXException
583     {
584         final int length = characters.length();
585         if (length > m_charsBuff.length)
586         {
587             m_charsBuff = new char[length*2 + 1];
588         }
589         characters.getChars(0, length, m_charsBuff, 0);
590         characters(m_charsBuff, 0, length);
591     }
592 
593 
594     /**
595      * From XSLTC
596      */
addAttribute(String name, String value)597     public void addAttribute(String name, String value)
598     {
599         // do nothing, forget about the attribute
600     }
601 
602     /**
603      * Add a unique attribute
604      */
addUniqueAttribute(String qName, String value, int flags)605     public void addUniqueAttribute(String qName, String value, int flags)
606         throws SAXException
607     {
608         // do nothing, forget about the attribute
609     }
610 
startPrefixMapping( String prefix, String uri, boolean shouldFlush)611     public boolean startPrefixMapping(
612         String prefix,
613         String uri,
614         boolean shouldFlush)
615         throws SAXException
616     {
617         // no namespace support for HTML
618         return false;
619     }
620 
621 
startPrefixMapping(String prefix, String uri)622     public void startPrefixMapping(String prefix, String uri)
623         throws org.xml.sax.SAXException
624     {
625         // no namespace support for HTML
626     }
627 
628 
namespaceAfterStartElement( final String prefix, final String uri)629     public void namespaceAfterStartElement(
630         final String prefix,
631         final String uri)
632         throws SAXException
633     {
634         // no namespace support for HTML
635     }
636 
flushPending()637     public void flushPending() throws org.xml.sax.SAXException
638     {
639             if (m_needToCallStartDocument)
640             {
641                 startDocumentInternal();
642                 m_needToCallStartDocument = false;
643             }
644     }
645 }
646