1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xerces.internal.impl.io;
23 
24 import java.io.InputStream;
25 import java.io.IOException;
26 import java.io.Reader;
27 
28 import com.sun.xml.internal.stream.util.BufferAllocator;
29 import com.sun.xml.internal.stream.util.ThreadLocalBufferAllocator;
30 
31 /**
32  * Reader for UCS-2 and UCS-4 encodings.
33  * (i.e., encodings from ISO-10646-UCS-(2|4)).
34  *
35  * @xerces.internal
36  *
37  * @author Neil Graham, IBM
38  *
39  */
40 public class UCSReader extends Reader {
41 
42     //
43     // Constants
44     //
45 
46     /** Default byte buffer size (8192, larger than that of ASCIIReader
47      * since it's reasonable to surmise that the average UCS-4-encoded
48      * file should be 4 times as large as the average ASCII-encoded file).
49      */
50     public static final int DEFAULT_BUFFER_SIZE = 8192;
51 
52     public static final short UCS2LE = 1;
53     public static final short UCS2BE = 2;
54     public static final short UCS4LE = 4;
55     public static final short UCS4BE = 8;
56 
57     //
58     // Data
59     //
60 
61     /** Input stream. */
62     protected InputStream fInputStream;
63 
64     /** Byte buffer. */
65     protected byte[] fBuffer;
66 
67     // what kind of data we're dealing with
68     protected short fEncoding;
69 
70     //
71     // Constructors
72     //
73 
74     /**
75      * Constructs an ASCII reader from the specified input stream
76      * using the default buffer size.  The Endian-ness and whether this is
77      * UCS-2 or UCS-4 needs also to be known in advance.
78      *
79      * @param inputStream The input stream.
80      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
81      */
UCSReader(InputStream inputStream, short encoding)82     public UCSReader(InputStream inputStream, short encoding) {
83         this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
84     } // <init>(InputStream, short)
85 
86     /**
87      * Constructs an ASCII reader from the specified input stream
88      * and buffer size.  The Endian-ness and whether this is
89      * UCS-2 or UCS-4 needs also to be known in advance.
90      *
91      * @param inputStream The input stream.
92      * @param size        The initial buffer size.
93      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
94      */
UCSReader(InputStream inputStream, int size, short encoding)95     public UCSReader(InputStream inputStream, int size, short encoding) {
96         fInputStream = inputStream;
97         BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
98         fBuffer = ba.getByteBuffer(size);
99         if (fBuffer == null) {
100             fBuffer = new byte[size];
101         }
102         fEncoding = encoding;
103     } // <init>(InputStream,int,short)
104 
105     //
106     // Reader methods
107     //
108 
109     /**
110      * Read a single character.  This method will block until a character is
111      * available, an I/O error occurs, or the end of the stream is reached.
112      *
113      * <p> Subclasses that intend to support efficient single-character input
114      * should override this method.
115      *
116      * @return     The character read, as an integer in the range 0 to 127
117      *             (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
118      *             been reached
119      *
120      * @exception  IOException  If an I/O error occurs
121      */
read()122     public int read() throws IOException {
123         int b0 = fInputStream.read() & 0xff;
124         if (b0 == 0xff)
125             return -1;
126         int b1 = fInputStream.read() & 0xff;
127         if (b1 == 0xff)
128             return -1;
129         if(fEncoding >=4) {
130             int b2 = fInputStream.read() & 0xff;
131             if (b2 == 0xff)
132                 return -1;
133             int b3 = fInputStream.read() & 0xff;
134             if (b3 == 0xff)
135                 return -1;
136             System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
137             if (fEncoding == UCS4BE)
138                 return (b0<<24)+(b1<<16)+(b2<<8)+b3;
139             else
140                 return (b3<<24)+(b2<<16)+(b1<<8)+b0;
141         } else { // UCS-2
142             if (fEncoding == UCS2BE)
143                 return (b0<<8)+b1;
144             else
145                 return (b1<<8)+b0;
146         }
147     } // read():int
148 
149     /**
150      * Read characters into a portion of an array.  This method will block
151      * until some input is available, an I/O error occurs, or the end of the
152      * stream is reached.
153      *
154      * @param      ch     Destination buffer
155      * @param      offset Offset at which to start storing characters
156      * @param      length Maximum number of characters to read
157      *
158      * @return     The number of characters read, or -1 if the end of the
159      *             stream has been reached
160      *
161      * @exception  IOException  If an I/O error occurs
162      */
read(char ch[], int offset, int length)163     public int read(char ch[], int offset, int length) throws IOException {
164         int byteLength = length << ((fEncoding >= 4)?2:1);
165         if (byteLength > fBuffer.length) {
166             byteLength = fBuffer.length;
167         }
168         int count = fInputStream.read(fBuffer, 0, byteLength);
169         if(count == -1) return -1;
170         // try and make count be a multiple of the number of bytes we're looking for
171         if(fEncoding >= 4) { // BigEndian
172             // this looks ugly, but it avoids an if at any rate...
173             int numToRead = (4 - (count & 3) & 3);
174             for(int i=0; i<numToRead; i++) {
175                 int charRead = fInputStream.read();
176                 if(charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
177                     for (int j = i;j<numToRead; j++)
178                         fBuffer[count+j] = 0;
179                     break;
180                 } else {
181                     fBuffer[count+i] = (byte)charRead;
182                 }
183             }
184             count += numToRead;
185         } else {
186             int numToRead = count & 1;
187             if(numToRead != 0) {
188                 count++;
189                 int charRead = fInputStream.read();
190                 if(charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
191                     fBuffer[count] = 0;
192                 } else {
193                     fBuffer[count] = (byte)charRead;
194                 }
195             }
196         }
197 
198         // now count is a multiple of the right number of bytes
199         int numChars = count >> ((fEncoding >= 4)?2:1);
200         int curPos = 0;
201         for (int i = 0; i < numChars; i++) {
202             int b0 = fBuffer[curPos++] & 0xff;
203             int b1 = fBuffer[curPos++] & 0xff;
204             if(fEncoding >=4) {
205                 int b2 = fBuffer[curPos++] & 0xff;
206                 int b3 = fBuffer[curPos++] & 0xff;
207                 if (fEncoding == UCS4BE)
208                     ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
209                 else
210                     ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
211             } else { // UCS-2
212                 if (fEncoding == UCS2BE)
213                     ch[offset+i] = (char)((b0<<8)+b1);
214                 else
215                     ch[offset+i] = (char)((b1<<8)+b0);
216             }
217         }
218         return numChars;
219     } // read(char[],int,int)
220 
221     /**
222      * Skip characters.  This method will block until some characters are
223      * available, an I/O error occurs, or the end of the stream is reached.
224      *
225      * @param  n  The number of characters to skip
226      *
227      * @return    The number of characters actually skipped
228      *
229      * @exception  IOException  If an I/O error occurs
230      */
skip(long n)231     public long skip(long n) throws IOException {
232         // charWidth will represent the number of bits to move
233         // n leftward to get num of bytes to skip, and then move the result rightward
234         // to get num of chars effectively skipped.
235         // The trick with &'ing, as with elsewhere in this dcode, is
236         // intended to avoid an expensive use of / that might not be optimized
237         // away.
238         int charWidth = (fEncoding >=4)?2:1;
239         long bytesSkipped = fInputStream.skip(n<<charWidth);
240         if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
241         return (bytesSkipped >> charWidth) + 1;
242     } // skip(long):long
243 
244     /**
245      * Tell whether this stream is ready to be read.
246      *
247      * @return True if the next read() is guaranteed not to block for input,
248      * false otherwise.  Note that returning false does not guarantee that the
249      * next read will block.
250      *
251      * @exception  IOException  If an I/O error occurs
252      */
ready()253     public boolean ready() throws IOException {
254             return false;
255     } // ready()
256 
257     /**
258      * Tell whether this stream supports the mark() operation.
259      */
markSupported()260     public boolean markSupported() {
261             return fInputStream.markSupported();
262     } // markSupported()
263 
264     /**
265      * Mark the present position in the stream.  Subsequent calls to reset()
266      * will attempt to reposition the stream to this point.  Not all
267      * character-input streams support the mark() operation.
268      *
269      * @param  readAheadLimit  Limit on the number of characters that may be
270      *                         read while still preserving the mark.  After
271      *                         reading this many characters, attempting to
272      *                         reset the stream may fail.
273      *
274      * @exception  IOException  If the stream does not support mark(),
275      *                          or if some other I/O error occurs
276      */
mark(int readAheadLimit)277     public void mark(int readAheadLimit) throws IOException {
278             fInputStream.mark(readAheadLimit);
279     } // mark(int)
280 
281     /**
282      * Reset the stream.  If the stream has been marked, then attempt to
283      * reposition it at the mark.  If the stream has not been marked, then
284      * attempt to reset it in some way appropriate to the particular stream,
285      * for example by repositioning it to its starting point.  Not all
286      * character-input streams support the reset() operation, and some support
287      * reset() without supporting mark().
288      *
289      * @exception  IOException  If the stream has not been marked,
290      *                          or if the mark has been invalidated,
291      *                          or if the stream does not support reset(),
292      *                          or if some other I/O error occurs
293      */
reset()294     public void reset() throws IOException {
295         fInputStream.reset();
296     } // reset()
297 
298     /**
299      * Close the stream.  Once a stream has been closed, further read(),
300      * ready(), mark(), or reset() invocations will throw an IOException.
301      * Closing a previously-closed stream, however, has no effect.
302      *
303      * @exception  IOException  If an I/O error occurs
304      */
close()305      public void close() throws IOException {
306          BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator();
307          ba.returnByteBuffer(fBuffer);
308          fBuffer = null;
309          fInputStream.close();
310      } // close()
311 
312 } // class UCSReader
313