1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl.io; 23 24 import java.io.InputStream; 25 import java.io.IOException; 26 import java.io.Reader; 27 28 import com.sun.xml.internal.stream.util.BufferAllocator; 29 import com.sun.xml.internal.stream.util.ThreadLocalBufferAllocator; 30 31 /** 32 * Reader for UCS-2 and UCS-4 encodings. 33 * (i.e., encodings from ISO-10646-UCS-(2|4)). 34 * 35 * @xerces.internal 36 * 37 * @author Neil Graham, IBM 38 * 39 */ 40 public class UCSReader extends Reader { 41 42 // 43 // Constants 44 // 45 46 /** Default byte buffer size (8192, larger than that of ASCIIReader 47 * since it's reasonable to surmise that the average UCS-4-encoded 48 * file should be 4 times as large as the average ASCII-encoded file). 49 */ 50 public static final int DEFAULT_BUFFER_SIZE = 8192; 51 52 public static final short UCS2LE = 1; 53 public static final short UCS2BE = 2; 54 public static final short UCS4LE = 4; 55 public static final short UCS4BE = 8; 56 57 // 58 // Data 59 // 60 61 /** Input stream. */ 62 protected InputStream fInputStream; 63 64 /** Byte buffer. */ 65 protected byte[] fBuffer; 66 67 // what kind of data we're dealing with 68 protected short fEncoding; 69 70 // 71 // Constructors 72 // 73 74 /** 75 * Constructs an ASCII reader from the specified input stream 76 * using the default buffer size. The Endian-ness and whether this is 77 * UCS-2 or UCS-4 needs also to be known in advance. 78 * 79 * @param inputStream The input stream. 80 * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE. 81 */ UCSReader(InputStream inputStream, short encoding)82 public UCSReader(InputStream inputStream, short encoding) { 83 this(inputStream, DEFAULT_BUFFER_SIZE, encoding); 84 } // <init>(InputStream, short) 85 86 /** 87 * Constructs an ASCII reader from the specified input stream 88 * and buffer size. The Endian-ness and whether this is 89 * UCS-2 or UCS-4 needs also to be known in advance. 90 * 91 * @param inputStream The input stream. 92 * @param size The initial buffer size. 93 * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE. 94 */ UCSReader(InputStream inputStream, int size, short encoding)95 public UCSReader(InputStream inputStream, int size, short encoding) { 96 fInputStream = inputStream; 97 BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator(); 98 fBuffer = ba.getByteBuffer(size); 99 if (fBuffer == null) { 100 fBuffer = new byte[size]; 101 } 102 fEncoding = encoding; 103 } // <init>(InputStream,int,short) 104 105 // 106 // Reader methods 107 // 108 109 /** 110 * Read a single character. This method will block until a character is 111 * available, an I/O error occurs, or the end of the stream is reached. 112 * 113 * <p> Subclasses that intend to support efficient single-character input 114 * should override this method. 115 * 116 * @return The character read, as an integer in the range 0 to 127 117 * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has 118 * been reached 119 * 120 * @exception IOException If an I/O error occurs 121 */ read()122 public int read() throws IOException { 123 int b0 = fInputStream.read() & 0xff; 124 if (b0 == 0xff) 125 return -1; 126 int b1 = fInputStream.read() & 0xff; 127 if (b1 == 0xff) 128 return -1; 129 if(fEncoding >=4) { 130 int b2 = fInputStream.read() & 0xff; 131 if (b2 == 0xff) 132 return -1; 133 int b3 = fInputStream.read() & 0xff; 134 if (b3 == 0xff) 135 return -1; 136 System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff)); 137 if (fEncoding == UCS4BE) 138 return (b0<<24)+(b1<<16)+(b2<<8)+b3; 139 else 140 return (b3<<24)+(b2<<16)+(b1<<8)+b0; 141 } else { // UCS-2 142 if (fEncoding == UCS2BE) 143 return (b0<<8)+b1; 144 else 145 return (b1<<8)+b0; 146 } 147 } // read():int 148 149 /** 150 * Read characters into a portion of an array. This method will block 151 * until some input is available, an I/O error occurs, or the end of the 152 * stream is reached. 153 * 154 * @param ch Destination buffer 155 * @param offset Offset at which to start storing characters 156 * @param length Maximum number of characters to read 157 * 158 * @return The number of characters read, or -1 if the end of the 159 * stream has been reached 160 * 161 * @exception IOException If an I/O error occurs 162 */ read(char ch[], int offset, int length)163 public int read(char ch[], int offset, int length) throws IOException { 164 int byteLength = length << ((fEncoding >= 4)?2:1); 165 if (byteLength > fBuffer.length) { 166 byteLength = fBuffer.length; 167 } 168 int count = fInputStream.read(fBuffer, 0, byteLength); 169 if(count == -1) return -1; 170 // try and make count be a multiple of the number of bytes we're looking for 171 if(fEncoding >= 4) { // BigEndian 172 // this looks ugly, but it avoids an if at any rate... 173 int numToRead = (4 - (count & 3) & 3); 174 for(int i=0; i<numToRead; i++) { 175 int charRead = fInputStream.read(); 176 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls. 177 for (int j = i;j<numToRead; j++) 178 fBuffer[count+j] = 0; 179 break; 180 } else { 181 fBuffer[count+i] = (byte)charRead; 182 } 183 } 184 count += numToRead; 185 } else { 186 int numToRead = count & 1; 187 if(numToRead != 0) { 188 count++; 189 int charRead = fInputStream.read(); 190 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls. 191 fBuffer[count] = 0; 192 } else { 193 fBuffer[count] = (byte)charRead; 194 } 195 } 196 } 197 198 // now count is a multiple of the right number of bytes 199 int numChars = count >> ((fEncoding >= 4)?2:1); 200 int curPos = 0; 201 for (int i = 0; i < numChars; i++) { 202 int b0 = fBuffer[curPos++] & 0xff; 203 int b1 = fBuffer[curPos++] & 0xff; 204 if(fEncoding >=4) { 205 int b2 = fBuffer[curPos++] & 0xff; 206 int b3 = fBuffer[curPos++] & 0xff; 207 if (fEncoding == UCS4BE) 208 ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3); 209 else 210 ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0); 211 } else { // UCS-2 212 if (fEncoding == UCS2BE) 213 ch[offset+i] = (char)((b0<<8)+b1); 214 else 215 ch[offset+i] = (char)((b1<<8)+b0); 216 } 217 } 218 return numChars; 219 } // read(char[],int,int) 220 221 /** 222 * Skip characters. This method will block until some characters are 223 * available, an I/O error occurs, or the end of the stream is reached. 224 * 225 * @param n The number of characters to skip 226 * 227 * @return The number of characters actually skipped 228 * 229 * @exception IOException If an I/O error occurs 230 */ skip(long n)231 public long skip(long n) throws IOException { 232 // charWidth will represent the number of bits to move 233 // n leftward to get num of bytes to skip, and then move the result rightward 234 // to get num of chars effectively skipped. 235 // The trick with &'ing, as with elsewhere in this dcode, is 236 // intended to avoid an expensive use of / that might not be optimized 237 // away. 238 int charWidth = (fEncoding >=4)?2:1; 239 long bytesSkipped = fInputStream.skip(n<<charWidth); 240 if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth; 241 return (bytesSkipped >> charWidth) + 1; 242 } // skip(long):long 243 244 /** 245 * Tell whether this stream is ready to be read. 246 * 247 * @return True if the next read() is guaranteed not to block for input, 248 * false otherwise. Note that returning false does not guarantee that the 249 * next read will block. 250 * 251 * @exception IOException If an I/O error occurs 252 */ ready()253 public boolean ready() throws IOException { 254 return false; 255 } // ready() 256 257 /** 258 * Tell whether this stream supports the mark() operation. 259 */ markSupported()260 public boolean markSupported() { 261 return fInputStream.markSupported(); 262 } // markSupported() 263 264 /** 265 * Mark the present position in the stream. Subsequent calls to reset() 266 * will attempt to reposition the stream to this point. Not all 267 * character-input streams support the mark() operation. 268 * 269 * @param readAheadLimit Limit on the number of characters that may be 270 * read while still preserving the mark. After 271 * reading this many characters, attempting to 272 * reset the stream may fail. 273 * 274 * @exception IOException If the stream does not support mark(), 275 * or if some other I/O error occurs 276 */ mark(int readAheadLimit)277 public void mark(int readAheadLimit) throws IOException { 278 fInputStream.mark(readAheadLimit); 279 } // mark(int) 280 281 /** 282 * Reset the stream. If the stream has been marked, then attempt to 283 * reposition it at the mark. If the stream has not been marked, then 284 * attempt to reset it in some way appropriate to the particular stream, 285 * for example by repositioning it to its starting point. Not all 286 * character-input streams support the reset() operation, and some support 287 * reset() without supporting mark(). 288 * 289 * @exception IOException If the stream has not been marked, 290 * or if the mark has been invalidated, 291 * or if the stream does not support reset(), 292 * or if some other I/O error occurs 293 */ reset()294 public void reset() throws IOException { 295 fInputStream.reset(); 296 } // reset() 297 298 /** 299 * Close the stream. Once a stream has been closed, further read(), 300 * ready(), mark(), or reset() invocations will throw an IOException. 301 * Closing a previously-closed stream, however, has no effect. 302 * 303 * @exception IOException If an I/O error occurs 304 */ close()305 public void close() throws IOException { 306 BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator(); 307 ba.returnByteBuffer(fBuffer); 308 fBuffer = null; 309 fInputStream.close(); 310 } // close() 311 312 } // class UCSReader 313