1 /*
2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xml.internal.serializer;
22 
23 import java.io.IOException;
24 
25 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
26 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
27 import org.xml.sax.Attributes;
28 import org.xml.sax.SAXException;
29 
30 /**
31  * This class is not a public API.
32  * It is only public because it is used in other packages.
33  * This class converts SAX or SAX-like calls to a
34  * serialized document for xsl:output method of "text".
35  * @xsl.usage internal
36  * @LastModified: Sept 2018
37  */
38 public final class ToTextStream extends ToStream
39 {
40 
41 
42   /**
43    * Default constructor.
44    */
ToTextStream()45   public ToTextStream()
46   {
47     super();
48   }
49 
50 
51 
52   /**
53    * Receive notification of the beginning of a document.
54    *
55    * <p>The SAX parser will invoke this method only once, before any
56    * other methods in this interface or in DTDHandler (except for
57    * setDocumentLocator).</p>
58    *
59    * @throws org.xml.sax.SAXException Any SAX exception, possibly
60    *            wrapping another exception.
61    *
62    * @throws org.xml.sax.SAXException
63    */
startDocumentInternal()64   protected void startDocumentInternal() throws org.xml.sax.SAXException
65   {
66     super.startDocumentInternal();
67 
68     m_needToCallStartDocument = false;
69 
70     // No action for the moment.
71   }
72 
73   /**
74    * Receive notification of the end of a document.
75    *
76    * <p>The SAX parser will invoke this method only once, and it will
77    * be the last method invoked during the parse.  The parser shall
78    * not invoke this method until it has either abandoned parsing
79    * (because of an unrecoverable error) or reached the end of
80    * input.</p>
81    *
82    * @throws org.xml.sax.SAXException Any SAX exception, possibly
83    *            wrapping another exception.
84    *
85    * @throws org.xml.sax.SAXException
86    */
endDocument()87   public void endDocument() throws org.xml.sax.SAXException
88   {
89     flushPending();
90     flushWriter();
91     if (m_tracer != null)
92         super.fireEndDoc();
93   }
94 
95   /**
96    * Receive notification of the beginning of an element.
97    *
98    * <p>The Parser will invoke this method at the beginning of every
99    * element in the XML document; there will be a corresponding
100    * endElement() event for every startElement() event (even when the
101    * element is empty). All of the element's content will be
102    * reported, in order, before the corresponding endElement()
103    * event.</p>
104    *
105    * <p>If the element name has a namespace prefix, the prefix will
106    * still be attached.  Note that the attribute list provided will
107    * contain only attributes with explicit values (specified or
108    * defaulted): #IMPLIED attributes will be omitted.</p>
109    *
110    *
111    * @param namespaceURI The Namespace URI, or the empty string if the
112    *        element has no Namespace URI or if Namespace
113    *        processing is not being performed.
114    * @param localName The local name (without prefix), or the
115    *        empty string if Namespace processing is not being
116    *        performed.
117    * @param name The qualified name (with prefix), or the
118    *        empty string if qualified names are not available.
119    * @param atts The attributes attached to the element, if any.
120    * @throws org.xml.sax.SAXException Any SAX exception, possibly
121    *            wrapping another exception.
122    * @see #endElement
123    * @see org.xml.sax.AttributeList
124    *
125    * @throws org.xml.sax.SAXException
126    */
startElement( String namespaceURI, String localName, String name, Attributes atts)127   public void startElement(
128           String namespaceURI, String localName, String name, Attributes atts)
129             throws org.xml.sax.SAXException
130   {
131     // time to fire off startElement event
132     if (m_tracer != null) {
133         super.fireStartElem(name);
134         this.firePseudoAttributes();
135     }
136     return;
137   }
138 
139   /**
140    * Receive notification of the end of an element.
141    *
142    * <p>The SAX parser will invoke this method at the end of every
143    * element in the XML document; there will be a corresponding
144    * startElement() event for every endElement() event (even when the
145    * element is empty).</p>
146    *
147    * <p>If the element name has a namespace prefix, the prefix will
148    * still be attached to the name.</p>
149    *
150    *
151    * @param namespaceURI The Namespace URI, or the empty string if the
152    *        element has no Namespace URI or if Namespace
153    *        processing is not being performed.
154    * @param localName The local name (without prefix), or the
155    *        empty string if Namespace processing is not being
156    *        performed.
157    * @param name The qualified name (with prefix), or the
158    *        empty string if qualified names are not available.
159    * @throws org.xml.sax.SAXException Any SAX exception, possibly
160    *            wrapping another exception.
161    *
162    * @throws org.xml.sax.SAXException
163    */
endElement(String namespaceURI, String localName, String name)164   public void endElement(String namespaceURI, String localName, String name)
165           throws org.xml.sax.SAXException
166   {
167         if (m_tracer != null)
168             super.fireEndElem(name);
169   }
170 
171   /**
172    * Receive notification of character data.
173    *
174    * <p>The Parser will call this method to report each chunk of
175    * character data.  SAX parsers may return all contiguous character
176    * data in a single chunk, or they may split it into several
177    * chunks; however, all of the characters in any single event
178    * must come from the same external entity, so that the Locator
179    * provides useful information.</p>
180    *
181    * <p>The application must not attempt to read from the array
182    * outside of the specified range.</p>
183    *
184    * <p>Note that some parsers will report whitespace using the
185    * ignorableWhitespace() method rather than this one (validating
186    * parsers must do so).</p>
187    *
188    * @param ch The characters from the XML document.
189    * @param start The start position in the array.
190    * @param length The number of characters to read from the array.
191    * @throws org.xml.sax.SAXException Any SAX exception, possibly
192    *            wrapping another exception.
193    * @see #ignorableWhitespace
194    * @see org.xml.sax.Locator
195    */
characters(char ch[], int start, int length)196   public void characters(char ch[], int start, int length)
197           throws org.xml.sax.SAXException
198   {
199 
200     flushPending();
201 
202     try
203     {
204         if (inTemporaryOutputState()) {
205             /* leave characters un-processed as we are
206              * creating temporary output, the output generated by
207              * this serializer will be input to a final serializer
208              * later on and it will do the processing in final
209              * output state (not temporary output state).
210              *
211              * A "temporary" ToTextStream serializer is used to
212              * evaluate attribute value templates (for example),
213              * and the result of evaluating such a thing
214              * is fed into a final serializer later on.
215              */
216             m_writer.write(ch, start, length);
217         }
218         else {
219             // In final output state we do process the characters!
220             writeNormalizedChars(ch, start, length, m_lineSepUse);
221         }
222 
223         if (m_tracer != null)
224             super.fireCharEvent(ch, start, length);
225     }
226     catch(IOException ioe)
227     {
228       throw new SAXException(ioe);
229     }
230   }
231 
232   /**
233    * If available, when the disable-output-escaping attribute is used,
234    * output raw text without escaping.
235    *
236    * @param ch The characters from the XML document.
237    * @param start The start position in the array.
238    * @param length The number of characters to read from the array.
239    *
240    * @throws org.xml.sax.SAXException Any SAX exception, possibly
241    *            wrapping another exception.
242    */
charactersRaw(char ch[], int start, int length)243   public void charactersRaw(char ch[], int start, int length)
244           throws org.xml.sax.SAXException
245   {
246 
247     try
248     {
249       writeNormalizedChars(ch, start, length, m_lineSepUse);
250     }
251     catch(IOException ioe)
252     {
253       throw new SAXException(ioe);
254     }
255   }
256 
257     /**
258      * Normalize the characters, but don't escape.  Different from
259      * SerializerToXML#writeNormalizedChars because it does not attempt to do
260      * XML escaping at all.
261      *
262      * @param ch The characters from the XML document.
263      * @param start The start position in the array.
264      * @param length The number of characters to read from the array.
265      * @param useLineSep true if the operating systems
266      * end-of-line separator should be output rather than a new-line character.
267      *
268      * @throws IOException
269      * @throws org.xml.sax.SAXException
270      */
writeNormalizedChars( final char ch[], final int start, final int length, final boolean useLineSep)271     void writeNormalizedChars(
272         final char ch[],
273             final int start,
274             final int length,
275             final boolean useLineSep)
276             throws IOException, org.xml.sax.SAXException
277     {
278         final String encoding = getEncoding();
279         final java.io.Writer writer = m_writer;
280         final int end = start + length;
281 
282         /* copy a few "constants" before the loop for performance */
283         final char S_LINEFEED = CharInfo.S_LINEFEED;
284 
285         // This for() loop always increments i by one at the end
286         // of the loop.  Additional increments of i adjust for when
287         // two input characters (a high/low UTF16 surrogate pair)
288         // are processed.
289         for (int i = start; i < end; i++) {
290             final char c = ch[i];
291 
292             if (S_LINEFEED == c && useLineSep) {
293                 writer.write(m_lineSep, 0, m_lineSepLen);
294                 // one input char processed
295             } else if (m_encodingInfo.isInEncoding(c)) {
296                 writer.write(c);
297                 // one input char processed
298             } else if (Encodings.isHighUTF16Surrogate(c) ||
299                        Encodings.isLowUTF16Surrogate(c)) {
300                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
301                 if (codePoint >= 0) {
302                     // move the index if the low surrogate is consumed
303                     // as writeUTF16Surrogate has written the pair
304                     if (Encodings.isHighUTF16Surrogate(c)) {
305                         i++;
306                     }
307 
308                     // printing to the console is not appropriate, but will leave
309                     // it as is for compatibility.
310                     if (codePoint >0) {
311                         // I think we can just emit the message,
312                         // not crash and burn.
313                         final String integralValue = Integer.toString(codePoint);
314                         final String msg = Utils.messages.createMessage(
315                             MsgKey.ER_ILLEGAL_CHARACTER,
316                             new Object[] { integralValue, encoding });
317 
318                         //Older behavior was to throw the message,
319                         //but newer gentler behavior is to write a message to System.err
320                         //throw new SAXException(msg);
321                         System.err.println(msg);
322                     }
323                 }
324             } else {
325                 // Don't know what to do with this char, it is
326                 // not in the encoding and not a high char in
327                 // a surrogate pair, so write out as an entity ref
328                 if (encoding != null) {
329                     /* The output encoding is known,
330                      * so somthing is wrong.
331                      */
332 
333                     // not in the encoding, so write out a character reference
334                     writer.write('&');
335                     writer.write('#');
336                     writer.write(Integer.toString(c));
337                     writer.write(';');
338 
339                     // I think we can just emit the message,
340                     // not crash and burn.
341                     final String integralValue = Integer.toString(c);
342                     final String msg = Utils.messages.createMessage(
343                         MsgKey.ER_ILLEGAL_CHARACTER,
344                         new Object[] { integralValue, encoding });
345 
346                     //Older behavior was to throw the message,
347                     //but newer gentler behavior is to write a message to System.err
348                     //throw new SAXException(msg);
349                     System.err.println(msg);
350                 } else {
351                     /* The output encoding is not known,
352                      * so just write it out as-is.
353                      */
354                     writer.write(c);
355                 }
356 
357                 // one input char was processed
358             }
359         }
360     }
361 
362   /**
363    * Receive notification of cdata.
364    *
365    * <p>The Parser will call this method to report each chunk of
366    * character data.  SAX parsers may return all contiguous character
367    * data in a single chunk, or they may split it into several
368    * chunks; however, all of the characters in any single event
369    * must come from the same external entity, so that the Locator
370    * provides useful information.</p>
371    *
372    * <p>The application must not attempt to read from the array
373    * outside of the specified range.</p>
374    *
375    * <p>Note that some parsers will report whitespace using the
376    * ignorableWhitespace() method rather than this one (validating
377    * parsers must do so).</p>
378    *
379    * @param ch The characters from the XML document.
380    * @param start The start position in the array.
381    * @param length The number of characters to read from the array.
382    * @throws org.xml.sax.SAXException Any SAX exception, possibly
383    *            wrapping another exception.
384    * @see #ignorableWhitespace
385    * @see org.xml.sax.Locator
386    */
cdata(char ch[], int start, int length)387   public void cdata(char ch[], int start, int length)
388           throws org.xml.sax.SAXException
389   {
390     try
391     {
392         writeNormalizedChars(ch, start, length, m_lineSepUse);
393         if (m_tracer != null)
394             super.fireCDATAEvent(ch, start, length);
395     }
396     catch(IOException ioe)
397     {
398       throw new SAXException(ioe);
399     }
400   }
401 
402   /**
403    * Receive notification of ignorable whitespace in element content.
404    *
405    * <p>Validating Parsers must use this method to report each chunk
406    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
407    * section 2.10): non-validating parsers may also use this method
408    * if they are capable of parsing and using content models.</p>
409    *
410    * <p>SAX parsers may return all contiguous whitespace in a single
411    * chunk, or they may split it into several chunks; however, all of
412    * the characters in any single event must come from the same
413    * external entity, so that the Locator provides useful
414    * information.</p>
415    *
416    * <p>The application must not attempt to read from the array
417    * outside of the specified range.</p>
418    *
419    * @param ch The characters from the XML document.
420    * @param start The start position in the array.
421    * @param length The number of characters to read from the array.
422    * @throws org.xml.sax.SAXException Any SAX exception, possibly
423    *            wrapping another exception.
424    * @see #characters
425    *
426    * @throws org.xml.sax.SAXException
427    */
ignorableWhitespace(char ch[], int start, int length)428   public void ignorableWhitespace(char ch[], int start, int length)
429           throws org.xml.sax.SAXException
430   {
431 
432     try
433     {
434       writeNormalizedChars(ch, start, length, m_lineSepUse);
435     }
436     catch(IOException ioe)
437     {
438       throw new SAXException(ioe);
439     }
440   }
441 
442   /**
443    * Receive notification of a processing instruction.
444    *
445    * <p>The Parser will invoke this method once for each processing
446    * instruction found: note that processing instructions may occur
447    * before or after the main document element.</p>
448    *
449    * <p>A SAX parser should never report an XML declaration (XML 1.0,
450    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
451    * using this method.</p>
452    *
453    * @param target The processing instruction target.
454    * @param data The processing instruction data, or null if
455    *        none was supplied.
456    * @throws org.xml.sax.SAXException Any SAX exception, possibly
457    *            wrapping another exception.
458    *
459    * @throws org.xml.sax.SAXException
460    */
processingInstruction(String target, String data)461   public void processingInstruction(String target, String data)
462           throws org.xml.sax.SAXException
463   {
464     // flush anything pending first
465     flushPending();
466 
467     if (m_tracer != null)
468         super.fireEscapingEvent(target, data);
469   }
470 
471   /**
472    * Called when a Comment is to be constructed.
473    * Note that Xalan will normally invoke the other version of this method.
474    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
475    *
476    * @param   data  The comment data.
477    * @throws org.xml.sax.SAXException Any SAX exception, possibly
478    *            wrapping another exception.
479    */
comment(String data)480   public void comment(String data) throws org.xml.sax.SAXException
481   {
482       final int length = data.length();
483       if (length > m_charsBuff.length)
484       {
485           m_charsBuff = new char[length*2 + 1];
486       }
487       data.getChars(0, length, m_charsBuff, 0);
488       comment(m_charsBuff, 0, length);
489   }
490 
491   /**
492    * Report an XML comment anywhere in the document.
493    *
494    * This callback will be used for comments inside or outside the
495    * document element, including comments in the external DTD
496    * subset (if read).
497    *
498    * @param ch An array holding the characters in the comment.
499    * @param start The starting position in the array.
500    * @param length The number of characters to use from the array.
501    * @throws org.xml.sax.SAXException The application may raise an exception.
502    */
comment(char ch[], int start, int length)503   public void comment(char ch[], int start, int length)
504           throws org.xml.sax.SAXException
505   {
506 
507     flushPending();
508     if (m_tracer != null)
509         super.fireCommentEvent(ch, start, length);
510   }
511 
512   /**
513    * Receive notivication of a entityReference.
514    *
515    * @param name non-null reference to the name of the entity.
516    *
517    * @throws org.xml.sax.SAXException
518    */
entityReference(String name)519   public void entityReference(String name) throws org.xml.sax.SAXException
520   {
521         if (m_tracer != null)
522             super.fireEntityReference(name);
523   }
524 
525     /**
526      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
527      */
addAttribute( String uri, String localName, String rawName, String type, String value, boolean XSLAttribute)528     public void addAttribute(
529         String uri,
530         String localName,
531         String rawName,
532         String type,
533         String value,
534         boolean XSLAttribute)
535     {
536         // do nothing, just forget all about the attribute
537     }
538 
539     /**
540      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
541      */
endCDATA()542     public void endCDATA() throws SAXException
543     {
544         // do nothing
545     }
546 
547     /**
548      * @see ExtendedContentHandler#endElement(String)
549      */
endElement(String elemName)550     public void endElement(String elemName) throws SAXException
551     {
552         if (m_tracer != null)
553             super.fireEndElem(elemName);
554     }
555 
556     /**
557      * From XSLTC
558      */
startElement( String elementNamespaceURI, String elementLocalName, String elementName)559     public void startElement(
560     String elementNamespaceURI,
561     String elementLocalName,
562     String elementName)
563     throws SAXException
564     {
565         if (m_needToCallStartDocument)
566             startDocumentInternal();
567         // time to fire off startlement event.
568         if (m_tracer != null) {
569             super.fireStartElem(elementName);
570             this.firePseudoAttributes();
571         }
572 
573         return;
574     }
575 
576 
577     /**
578      * From XSLTC
579      */
characters(String characters)580     public void characters(String characters)
581     throws SAXException
582     {
583         final int length = characters.length();
584         if (length > m_charsBuff.length)
585         {
586             m_charsBuff = new char[length*2 + 1];
587         }
588         characters.getChars(0, length, m_charsBuff, 0);
589         characters(m_charsBuff, 0, length);
590     }
591 
592 
593     /**
594      * From XSLTC
595      */
addAttribute(String name, String value)596     public void addAttribute(String name, String value)
597     {
598         // do nothing, forget about the attribute
599     }
600 
601     /**
602      * Add a unique attribute
603      */
addUniqueAttribute(String qName, String value, int flags)604     public void addUniqueAttribute(String qName, String value, int flags)
605         throws SAXException
606     {
607         // do nothing, forget about the attribute
608     }
609 
startPrefixMapping( String prefix, String uri, boolean shouldFlush)610     public boolean startPrefixMapping(
611         String prefix,
612         String uri,
613         boolean shouldFlush)
614         throws SAXException
615     {
616         // no namespace support for HTML
617         return false;
618     }
619 
620 
startPrefixMapping(String prefix, String uri)621     public void startPrefixMapping(String prefix, String uri)
622         throws org.xml.sax.SAXException
623     {
624         // no namespace support for HTML
625     }
626 
627 
namespaceAfterStartElement( final String prefix, final String uri)628     public void namespaceAfterStartElement(
629         final String prefix,
630         final String uri)
631         throws SAXException
632     {
633         // no namespace support for HTML
634     }
635 
flushPending()636     public void flushPending() throws org.xml.sax.SAXException
637     {
638             if (m_needToCallStartDocument)
639             {
640                 startDocumentInternal();
641                 m_needToCallStartDocument = false;
642             }
643     }
644 }
645