1 /* InputStreamReader.java -- Reader than transforms bytes to chars 2 Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package java.io; 40 41 import gnu.gcj.convert.*; 42 import java.nio.charset.Charset; 43 import java.nio.charset.CharsetDecoder; 44 45 /** 46 * This class reads characters from a byte input stream. The characters 47 * read are converted from bytes in the underlying stream by a 48 * decoding layer. The decoding layer transforms bytes to chars according 49 * to an encoding standard. There are many available encodings to choose 50 * from. The desired encoding can either be specified by name, or if no 51 * encoding is selected, the system default encoding will be used. The 52 * system default encoding name is determined from the system property 53 * <code>file.encoding</code>. The only encodings that are guaranteed to 54 * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 55 * Unforunately, Java does not provide a mechanism for listing the 56 * ecodings that are supported in a given implementation. 57 * <p> 58 * Here is a list of standard encoding names that may be available: 59 * <p> 60 * <ul> 61 * <li>8859_1 (ISO-8859-1/Latin-1)</li> 62 * <li>8859_2 (ISO-8859-2/Latin-2)</li> 63 * <li>8859_3 (ISO-8859-3/Latin-3)</li> 64 * <li>8859_4 (ISO-8859-4/Latin-4)</li> 65 * <li>8859_5 (ISO-8859-5/Latin-5)</li> 66 * <li>8859_6 (ISO-8859-6/Latin-6)</li> 67 * <li>8859_7 (ISO-8859-7/Latin-7)</li> 68 * <li>8859_8 (ISO-8859-8/Latin-8)</li> 69 * <li>8859_9 (ISO-8859-9/Latin-9)</li> 70 * <li>ASCII (7-bit ASCII)</li> 71 * <li>UTF8 (UCS Transformation Format-8)</li> 72 * <li>More later</li> 73 * </ul> 74 * <p> 75 * It is recommended that applications do not use 76 * <code>InputStreamReader</code>'s 77 * directly. Rather, for efficiency purposes, an object of this class 78 * should be wrapped by a <code>BufferedReader</code>. 79 * <p> 80 * Due to a deficiency the Java class library design, there is no standard 81 * way for an application to install its own byte-character encoding. 82 * 83 * @see BufferedReader 84 * @see InputStream 85 * 86 * @author Aaron M. Renn (arenn@urbanophile.com) 87 * @author Per Bothner (bothner@cygnus.com) 88 * @date April 22, 1998. 89 */ 90 public class InputStreamReader extends Reader 91 { 92 BufferedInputStream in; 93 94 // Buffer of chars read from in and converted but not consumed. 95 char[] work; 96 // Next available character (in work buffer) to read. 97 int wpos; 98 // Last available character (in work buffer) to read. 99 int wcount; 100 101 /* 102 * This is the byte-character decoder class that does the reading and 103 * translation of bytes from the underlying stream. 104 */ 105 BytesToUnicode converter; 106 107 /** 108 * This method initializes a new instance of <code>InputStreamReader</code> 109 * to read from the specified stream using the default encoding. 110 * 111 * @param in The <code>InputStream</code> to read from 112 */ InputStreamReader(InputStream in)113 public InputStreamReader(InputStream in) 114 { 115 this(in, BytesToUnicode.getDefaultDecoder()); 116 } 117 118 /** 119 * This method initializes a new instance of <code>InputStreamReader</code> 120 * to read from the specified stream using a caller supplied character 121 * encoding scheme. Note that due to a deficiency in the Java language 122 * design, there is no way to determine which encodings are supported. 123 * 124 * @param in The <code>InputStream</code> to read from 125 * @param encoding_name The name of the encoding scheme to use 126 * 127 * @exception UnsupportedEncodingException If the encoding scheme 128 * requested is not available. 129 */ InputStreamReader(InputStream in, String encoding_name)130 public InputStreamReader(InputStream in, String encoding_name) 131 throws UnsupportedEncodingException 132 { 133 this(in, BytesToUnicode.getDecoder(encoding_name)); 134 } 135 136 /** 137 * Creates an InputStreamReader that uses a decoder of the given 138 * charset to decode the bytes in the InputStream into 139 * characters. 140 */ InputStreamReader(InputStream in, Charset charset)141 public InputStreamReader(InputStream in, Charset charset) 142 { 143 this(in, new BytesToCharsetAdaptor(charset)); 144 } 145 146 /** 147 * Creates an InputStreamReader that uses the given charset decoder 148 * to decode the bytes in the InputStream into characters. 149 */ InputStreamReader(InputStream in, CharsetDecoder decoder)150 public InputStreamReader(InputStream in, CharsetDecoder decoder) 151 { 152 this(in, new BytesToCharsetAdaptor(decoder)); 153 } 154 InputStreamReader(InputStream in, BytesToUnicode decoder)155 private InputStreamReader(InputStream in, BytesToUnicode decoder) 156 { 157 // FIXME: someone could pass in a BufferedInputStream whose buffer 158 // is smaller than the longest encoded character for this 159 // encoding. We will probably go into an infinite loop in this 160 // case. We probably ought to just have our own byte buffering 161 // here. 162 this.in = in instanceof BufferedInputStream 163 ? (BufferedInputStream) in 164 : new BufferedInputStream(in); 165 /* Don't need to call super(in) here as long as the lock gets set. */ 166 this.lock = in; 167 converter = decoder; 168 converter.setInput(this.in.buf, 0, 0); 169 } 170 171 /** 172 * This method closes this stream, as well as the underlying 173 * <code>InputStream</code>. 174 * 175 * @exception IOException If an error occurs 176 */ close()177 public void close() throws IOException 178 { 179 synchronized (lock) 180 { 181 if (in != null) 182 in.close(); 183 in = null; 184 work = null; 185 wpos = wcount = 0; 186 } 187 } 188 189 /** 190 * This method returns the name of the encoding that is currently in use 191 * by this object. If the stream has been closed, this method is allowed 192 * to return <code>null</code>. 193 * 194 * @return The current encoding name 195 */ getEncoding()196 public String getEncoding() 197 { 198 return in != null ? converter.getName() : null; 199 } 200 201 /** 202 * This method checks to see if the stream is read to be read. It 203 * will return <code>true</code> if is, or <code>false</code> if it is not. 204 * If the stream is not ready to be read, it could (although is not required 205 * to) block on the next read attempt. 206 * 207 * @return <code>true</code> if the stream is ready to be read, 208 * <code>false</code> otherwise 209 * 210 * @exception IOException If an error occurs 211 */ ready()212 public boolean ready() throws IOException 213 { 214 synchronized (lock) 215 { 216 if (in == null) 217 throw new IOException("Stream closed"); 218 219 if (wpos < wcount) 220 return true; 221 222 // According to the spec, an InputStreamReader is ready if its 223 // input buffer is not empty (above), or if bytes are 224 // available on the underlying byte stream. 225 return in.available () > 0; 226 } 227 } 228 229 /** 230 * This method reads up to <code>length</code> characters from the stream into 231 * the specified array starting at index <code>offset</code> into the 232 * array. 233 * 234 * @param buf The character array to recieve the data read 235 * @param offset The offset into the array to start storing characters 236 * @param length The requested number of characters to read. 237 * 238 * @return The actual number of characters read, or -1 if end of stream. 239 * 240 * @exception IOException If an error occurs 241 */ read(char[] buf, int offset, int length)242 public int read (char[] buf, int offset, int length) throws IOException 243 { 244 synchronized (lock) 245 { 246 if (in == null) 247 throw new IOException("Stream closed"); 248 249 if (length == 0) 250 return 0; 251 252 int wavail = wcount - wpos; 253 if (wavail <= 0) 254 { 255 // Nothing waiting, so refill their buffer. 256 return refill(buf, offset, length); 257 } 258 259 if (length > wavail) 260 length = wavail; 261 System.arraycopy(work, wpos, buf, offset, length); 262 wpos += length; 263 return length; 264 } 265 } 266 267 /** 268 * This method reads a single character of data from the stream. 269 * 270 * @return The char read, as an int, or -1 if end of stream. 271 * 272 * @exception IOException If an error occurs 273 */ read()274 public int read() throws IOException 275 { 276 synchronized (lock) 277 { 278 if (in == null) 279 throw new IOException("Stream closed"); 280 281 int wavail = wcount - wpos; 282 if (wavail <= 0) 283 { 284 // Nothing waiting, so refill our internal buffer. 285 wpos = wcount = 0; 286 if (work == null) 287 work = new char[100]; 288 int count = refill(work, 0, work.length); 289 if (count == -1) 290 return -1; 291 wcount += count; 292 } 293 294 return work[wpos++]; 295 } 296 } 297 298 // Read more bytes and convert them into the specified buffer. 299 // Returns the number of converted characters or -1 on EOF. refill(char[] buf, int offset, int length)300 private int refill(char[] buf, int offset, int length) throws IOException 301 { 302 for (;;) 303 { 304 // We have knowledge of the internals of BufferedInputStream 305 // here. Eww. 306 // BufferedInputStream.refill() can only be called when 307 // `pos>=count'. 308 boolean r = in.pos < in.count || in.refill (); 309 if (! r) 310 return -1; 311 converter.setInput(in.buf, in.pos, in.count); 312 int count = converter.read(buf, offset, length); 313 314 // We might have bytes but not have made any progress. In 315 // this case we try to refill. If refilling fails, we assume 316 // we have a malformed character at the end of the stream. 317 if (count == 0 && converter.inpos == in.pos) 318 { 319 in.mark(in.count); 320 if (! in.refill ()) 321 throw new CharConversionException (); 322 in.reset(); 323 } 324 else 325 { 326 in.skip(converter.inpos - in.pos); 327 if (count > 0) 328 return count; 329 } 330 } 331 } 332 } 333