1 /* InputStreamReader.java -- Reader than transforms bytes to chars 2 Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005, 2006 3 Free Software Foundation, Inc. 4 5 This file is part of GNU Classpath. 6 7 GNU Classpath is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 GNU Classpath is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GNU Classpath; see the file COPYING. If not, write to the 19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20 02110-1301 USA. 21 22 Linking this library statically or dynamically with other modules is 23 making a combined work based on this library. Thus, the terms and 24 conditions of the GNU General Public License cover the whole 25 combination. 26 27 As a special exception, the copyright holders of this library give you 28 permission to link this library with independent modules to produce an 29 executable, regardless of the license terms of these independent 30 modules, and to copy and distribute the resulting executable under 31 terms of your choice, provided that you also meet, for each linked 32 independent module, the terms and conditions of the license of that 33 module. An independent module is a module which is not derived from 34 or based on this library. If you modify this library, you may extend 35 this exception to your version of the library, but you are not 36 obligated to do so. If you do not wish to do so, delete this 37 exception statement from your version. */ 38 39 40 package java.io; 41 42 import gnu.classpath.SystemProperties; 43 import gnu.java.nio.charset.EncodingHelper; 44 45 import java.nio.ByteBuffer; 46 import java.nio.CharBuffer; 47 import java.nio.charset.Charset; 48 import java.nio.charset.CharsetDecoder; 49 import java.nio.charset.CoderResult; 50 import java.nio.charset.CodingErrorAction; 51 52 /** 53 * This class reads characters from a byte input stream. The characters 54 * read are converted from bytes in the underlying stream by a 55 * decoding layer. The decoding layer transforms bytes to chars according 56 * to an encoding standard. There are many available encodings to choose 57 * from. The desired encoding can either be specified by name, or if no 58 * encoding is selected, the system default encoding will be used. The 59 * system default encoding name is determined from the system property 60 * <code>file.encoding</code>. The only encodings that are guaranteed to 61 * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 62 * Unforunately, Java does not provide a mechanism for listing the 63 * ecodings that are supported in a given implementation. 64 * <p> 65 * Here is a list of standard encoding names that may be available: 66 * <p> 67 * <ul> 68 * <li>8859_1 (ISO-8859-1/Latin-1)</li> 69 * <li>8859_2 (ISO-8859-2/Latin-2)</li> 70 * <li>8859_3 (ISO-8859-3/Latin-3)</li> 71 * <li>8859_4 (ISO-8859-4/Latin-4)</li> 72 * <li>8859_5 (ISO-8859-5/Latin-5)</li> 73 * <li>8859_6 (ISO-8859-6/Latin-6)</li> 74 * <li>8859_7 (ISO-8859-7/Latin-7)</li> 75 * <li>8859_8 (ISO-8859-8/Latin-8)</li> 76 * <li>8859_9 (ISO-8859-9/Latin-9)</li> 77 * <li>ASCII (7-bit ASCII)</li> 78 * <li>UTF8 (UCS Transformation Format-8)</li> 79 * <li>More later</li> 80 * </ul> 81 * <p> 82 * It is recommended that applications do not use 83 * <code>InputStreamReader</code>'s 84 * directly. Rather, for efficiency purposes, an object of this class 85 * should be wrapped by a <code>BufferedReader</code>. 86 * <p> 87 * Due to a deficiency the Java class library design, there is no standard 88 * way for an application to install its own byte-character encoding. 89 * 90 * @see BufferedReader 91 * @see InputStream 92 * 93 * @author Robert Schuster 94 * @author Aaron M. Renn (arenn@urbanophile.com) 95 * @author Per Bothner (bothner@cygnus.com) 96 * @date April 22, 1998. 97 */ 98 public class InputStreamReader extends Reader 99 { 100 /** 101 * The input stream. 102 */ 103 private InputStream in; 104 105 /** 106 * The charset decoder. 107 */ 108 private CharsetDecoder decoder; 109 110 /** 111 * End of stream reached. 112 */ 113 private boolean isDone = false; 114 115 /** 116 * Need this. 117 */ 118 private float maxBytesPerChar; 119 120 /** 121 * Buffer holding surplus loaded bytes (if any) 122 */ 123 private ByteBuffer byteBuffer; 124 125 /** 126 * java.io canonical name of the encoding. 127 */ 128 private String encoding; 129 130 /** 131 * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 132 * output buffer. In this case we need to save the surrogate char. 133 */ 134 private char savedSurrogate; 135 private boolean hasSavedSurrogate = false; 136 137 /** 138 * A byte array to be reused in read(byte[], int, int). 139 */ 140 private byte[] bytesCache; 141 142 /** 143 * Locks the bytesCache above in read(byte[], int, int). 144 */ 145 private Object cacheLock = new Object(); 146 147 /** 148 * This method initializes a new instance of <code>InputStreamReader</code> 149 * to read from the specified stream using the default encoding. 150 * 151 * @param in The <code>InputStream</code> to read from 152 */ InputStreamReader(InputStream in)153 public InputStreamReader(InputStream in) 154 { 155 if (in == null) 156 throw new NullPointerException(); 157 this.in = in; 158 try 159 { 160 encoding = SystemProperties.getProperty("file.encoding"); 161 // Don't use NIO if avoidable 162 if(EncodingHelper.isISOLatin1(encoding)) 163 { 164 encoding = "ISO8859_1"; 165 maxBytesPerChar = 1f; 166 decoder = null; 167 return; 168 } 169 Charset cs = EncodingHelper.getCharset(encoding); 170 decoder = cs.newDecoder(); 171 encoding = EncodingHelper.getOldCanonical(cs.name()); 172 try { 173 maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 174 } catch(UnsupportedOperationException _){ 175 maxBytesPerChar = 1f; 176 } 177 decoder.onMalformedInput(CodingErrorAction.REPLACE); 178 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 179 decoder.reset(); 180 } catch(RuntimeException e) { 181 encoding = "ISO8859_1"; 182 maxBytesPerChar = 1f; 183 decoder = null; 184 } catch(UnsupportedEncodingException e) { 185 encoding = "ISO8859_1"; 186 maxBytesPerChar = 1f; 187 decoder = null; 188 } 189 } 190 191 /** 192 * This method initializes a new instance of <code>InputStreamReader</code> 193 * to read from the specified stream using a caller supplied character 194 * encoding scheme. Note that due to a deficiency in the Java language 195 * design, there is no way to determine which encodings are supported. 196 * 197 * @param in The <code>InputStream</code> to read from 198 * @param encoding_name The name of the encoding scheme to use 199 * 200 * @exception UnsupportedEncodingException If the encoding scheme 201 * requested is not available. 202 */ InputStreamReader(InputStream in, String encoding_name)203 public InputStreamReader(InputStream in, String encoding_name) 204 throws UnsupportedEncodingException 205 { 206 if (in == null 207 || encoding_name == null) 208 throw new NullPointerException(); 209 210 this.in = in; 211 // Don't use NIO if avoidable 212 if(EncodingHelper.isISOLatin1(encoding_name)) 213 { 214 encoding = "ISO8859_1"; 215 maxBytesPerChar = 1f; 216 decoder = null; 217 return; 218 } 219 try { 220 Charset cs = EncodingHelper.getCharset(encoding_name); 221 try { 222 maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 223 } catch(UnsupportedOperationException _){ 224 maxBytesPerChar = 1f; 225 } 226 227 decoder = cs.newDecoder(); 228 decoder.onMalformedInput(CodingErrorAction.REPLACE); 229 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 230 decoder.reset(); 231 232 // The encoding should be the old name, if such exists. 233 encoding = EncodingHelper.getOldCanonical(cs.name()); 234 } catch(RuntimeException e) { 235 encoding = "ISO8859_1"; 236 maxBytesPerChar = 1f; 237 decoder = null; 238 } 239 } 240 241 /** 242 * Creates an InputStreamReader that uses a decoder of the given 243 * charset to decode the bytes in the InputStream into 244 * characters. 245 * 246 * @since 1.4 247 */ InputStreamReader(InputStream in, Charset charset)248 public InputStreamReader(InputStream in, Charset charset) { 249 if (in == null) 250 throw new NullPointerException(); 251 this.in = in; 252 decoder = charset.newDecoder(); 253 254 try { 255 maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 256 } catch(UnsupportedOperationException _){ 257 maxBytesPerChar = 1f; 258 } 259 260 decoder.onMalformedInput(CodingErrorAction.REPLACE); 261 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 262 decoder.reset(); 263 encoding = EncodingHelper.getOldCanonical(charset.name()); 264 } 265 266 /** 267 * Creates an InputStreamReader that uses the given charset decoder 268 * to decode the bytes in the InputStream into characters. 269 * 270 * @since 1.4 271 */ InputStreamReader(InputStream in, CharsetDecoder decoder)272 public InputStreamReader(InputStream in, CharsetDecoder decoder) { 273 if (in == null) 274 throw new NullPointerException(); 275 this.in = in; 276 this.decoder = decoder; 277 278 Charset charset = decoder.charset(); 279 try { 280 if (charset == null) 281 maxBytesPerChar = 1f; 282 else 283 maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 284 } catch(UnsupportedOperationException _){ 285 maxBytesPerChar = 1f; 286 } 287 288 decoder.onMalformedInput(CodingErrorAction.REPLACE); 289 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 290 decoder.reset(); 291 if (charset == null) 292 encoding = "US-ASCII"; 293 else 294 encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 295 } 296 297 /** 298 * This method closes this stream, as well as the underlying 299 * <code>InputStream</code>. 300 * 301 * @exception IOException If an error occurs 302 */ close()303 public void close() throws IOException 304 { 305 synchronized (lock) 306 { 307 // Makes sure all intermediate data is released by the decoder. 308 if (decoder != null) 309 decoder.reset(); 310 if (in != null) 311 in.close(); 312 in = null; 313 isDone = true; 314 decoder = null; 315 } 316 } 317 318 /** 319 * This method returns the name of the encoding that is currently in use 320 * by this object. If the stream has been closed, this method is allowed 321 * to return <code>null</code>. 322 * 323 * @return The current encoding name 324 */ getEncoding()325 public String getEncoding() 326 { 327 return in != null ? encoding : null; 328 } 329 330 /** 331 * This method checks to see if the stream is ready to be read. It 332 * will return <code>true</code> if is, or <code>false</code> if it is not. 333 * If the stream is not ready to be read, it could (although is not required 334 * to) block on the next read attempt. 335 * 336 * @return <code>true</code> if the stream is ready to be read, 337 * <code>false</code> otherwise 338 * 339 * @exception IOException If an error occurs 340 */ ready()341 public boolean ready() throws IOException 342 { 343 if (in == null) 344 throw new IOException("Reader has been closed"); 345 346 return in.available() != 0; 347 } 348 349 /** 350 * This method reads up to <code>length</code> characters from the stream into 351 * the specified array starting at index <code>offset</code> into the 352 * array. 353 * 354 * @param buf The character array to recieve the data read 355 * @param offset The offset into the array to start storing characters 356 * @param length The requested number of characters to read. 357 * 358 * @return The actual number of characters read, or -1 if end of stream. 359 * 360 * @exception IOException If an error occurs 361 */ read(char[] buf, int offset, int length)362 public int read(char[] buf, int offset, int length) throws IOException 363 { 364 if (in == null) 365 throw new IOException("Reader has been closed"); 366 if (isDone) 367 return -1; 368 if(decoder != null) 369 { 370 int totalBytes = (int)((double) length * maxBytesPerChar); 371 if (byteBuffer != null) 372 totalBytes = Math.max(totalBytes, byteBuffer.remaining()); 373 byte[] bytes; 374 // Fetch cached bytes array if available and big enough. 375 synchronized(cacheLock) 376 { 377 bytes = bytesCache; 378 if (bytes == null || bytes.length < totalBytes) 379 bytes = new byte[totalBytes]; 380 else 381 bytesCache = null; 382 } 383 384 int remaining = 0; 385 if(byteBuffer != null) 386 { 387 remaining = byteBuffer.remaining(); 388 byteBuffer.get(bytes, 0, remaining); 389 } 390 int read; 391 if(totalBytes - remaining > 0) 392 { 393 read = in.read(bytes, remaining, totalBytes - remaining); 394 if(read == -1){ 395 read = remaining; 396 isDone = true; 397 } else 398 read += remaining; 399 } else 400 read = remaining; 401 byteBuffer = ByteBuffer.wrap(bytes, 0, read); 402 CharBuffer cb = CharBuffer.wrap(buf, offset, length); 403 int startPos = cb.position(); 404 405 if(hasSavedSurrogate){ 406 hasSavedSurrogate = false; 407 cb.put(savedSurrogate); 408 read++; 409 } 410 411 CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 412 decoder.reset(); 413 // 1 char remains which is the first half of a surrogate pair. 414 if(cr.isOverflow() && cb.hasRemaining()){ 415 CharBuffer overflowbuf = CharBuffer.allocate(2); 416 cr = decoder.decode(byteBuffer, overflowbuf, isDone); 417 overflowbuf.flip(); 418 if(overflowbuf.hasRemaining()) 419 { 420 cb.put(overflowbuf.get()); 421 savedSurrogate = overflowbuf.get(); 422 hasSavedSurrogate = true; 423 isDone = false; 424 } 425 } 426 427 if(byteBuffer.hasRemaining()) { 428 byteBuffer.compact(); 429 byteBuffer.flip(); 430 isDone = false; 431 } else 432 byteBuffer = null; 433 434 read = cb.position() - startPos; 435 436 // Put cached bytes array back if we are finished and the cache 437 // is null or smaller than the used bytes array. 438 synchronized (cacheLock) 439 { 440 if (byteBuffer == null 441 && (bytesCache == null || bytesCache.length < bytes.length)) 442 bytesCache = bytes; 443 } 444 return (read <= 0) ? -1 : read; 445 } 446 else 447 { 448 byte[] bytes; 449 // Fetch cached bytes array if available and big enough. 450 synchronized (cacheLock) 451 { 452 bytes = bytesCache; 453 if (bytes == null || length < bytes.length) 454 bytes = new byte[length]; 455 else 456 bytesCache = null; 457 } 458 459 int read = in.read(bytes); 460 for(int i=0;i<read;i++) 461 buf[offset+i] = (char)(bytes[i]&0xFF); 462 463 // Put back byte array into cache if appropriate. 464 synchronized (cacheLock) 465 { 466 if (bytesCache == null || bytesCache.length < bytes.length) 467 bytesCache = bytes; 468 } 469 return read; 470 } 471 } 472 473 /** 474 * Reads an char from the input stream and returns it 475 * as an int in the range of 0-65535. This method also will return -1 if 476 * the end of the stream has been reached. 477 * <p> 478 * This method will block until the char can be read. 479 * 480 * @return The char read or -1 if end of stream 481 * 482 * @exception IOException If an error occurs 483 */ read()484 public int read() throws IOException 485 { 486 char[] buf = new char[1]; 487 int count = read(buf, 0, 1); 488 return count > 0 ? buf[0] : -1; 489 } 490 491 /** 492 * Skips the specified number of chars in the stream. It 493 * returns the actual number of chars skipped, which may be less than the 494 * requested amount. 495 * 496 * @param count The requested number of chars to skip 497 * 498 * @return The actual number of chars skipped. 499 * 500 * @exception IOException If an error occurs 501 */ skip(long count)502 public long skip(long count) throws IOException 503 { 504 if (in == null) 505 throw new IOException("Reader has been closed"); 506 507 return super.skip(count); 508 } 509 } 510