1 /* InputStreamReader.java -- Reader than transforms bytes to chars
2    Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005, 2006
3    Free Software Foundation, Inc.
4 
5 This file is part of GNU Classpath.
6 
7 GNU Classpath is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GNU Classpath is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GNU Classpath; see the file COPYING.  If not, write to the
19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA.
21 
22 Linking this library statically or dynamically with other modules is
23 making a combined work based on this library.  Thus, the terms and
24 conditions of the GNU General Public License cover the whole
25 combination.
26 
27 As a special exception, the copyright holders of this library give you
28 permission to link this library with independent modules to produce an
29 executable, regardless of the license terms of these independent
30 modules, and to copy and distribute the resulting executable under
31 terms of your choice, provided that you also meet, for each linked
32 independent module, the terms and conditions of the license of that
33 module.  An independent module is a module which is not derived from
34 or based on this library.  If you modify this library, you may extend
35 this exception to your version of the library, but you are not
36 obligated to do so.  If you do not wish to do so, delete this
37 exception statement from your version. */
38 
39 
40 package java.io;
41 
42 import gnu.classpath.SystemProperties;
43 import gnu.java.nio.charset.EncodingHelper;
44 
45 import java.nio.ByteBuffer;
46 import java.nio.CharBuffer;
47 import java.nio.charset.Charset;
48 import java.nio.charset.CharsetDecoder;
49 import java.nio.charset.CoderResult;
50 import java.nio.charset.CodingErrorAction;
51 
52 /**
53  * This class reads characters from a byte input stream.   The characters
54  * read are converted from bytes in the underlying stream by a
55  * decoding layer.  The decoding layer transforms bytes to chars according
56  * to an encoding standard.  There are many available encodings to choose
57  * from.  The desired encoding can either be specified by name, or if no
58  * encoding is selected, the system default encoding will be used.  The
59  * system default encoding name is determined from the system property
60  * <code>file.encoding</code>.  The only encodings that are guaranteed to
61  * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8".
62  * Unforunately, Java does not provide a mechanism for listing the
63  * ecodings that are supported in a given implementation.
64  * <p>
65  * Here is a list of standard encoding names that may be available:
66  * <p>
67  * <ul>
68  * <li>8859_1 (ISO-8859-1/Latin-1)</li>
69  * <li>8859_2 (ISO-8859-2/Latin-2)</li>
70  * <li>8859_3 (ISO-8859-3/Latin-3)</li>
71  * <li>8859_4 (ISO-8859-4/Latin-4)</li>
72  * <li>8859_5 (ISO-8859-5/Latin-5)</li>
73  * <li>8859_6 (ISO-8859-6/Latin-6)</li>
74  * <li>8859_7 (ISO-8859-7/Latin-7)</li>
75  * <li>8859_8 (ISO-8859-8/Latin-8)</li>
76  * <li>8859_9 (ISO-8859-9/Latin-9)</li>
77  * <li>ASCII (7-bit ASCII)</li>
78  * <li>UTF8 (UCS Transformation Format-8)</li>
79  * <li>More later</li>
80  * </ul>
81  * <p>
82  * It is recommended that applications do not use
83  * <code>InputStreamReader</code>'s
84  * directly.  Rather, for efficiency purposes, an object of this class
85  * should be wrapped by a <code>BufferedReader</code>.
86  * <p>
87  * Due to a deficiency the Java class library design, there is no standard
88  * way for an application to install its own byte-character encoding.
89  *
90  * @see BufferedReader
91  * @see InputStream
92  *
93  * @author Robert Schuster
94  * @author Aaron M. Renn (arenn@urbanophile.com)
95  * @author Per Bothner (bothner@cygnus.com)
96  * @date April 22, 1998.
97  */
98 public class InputStreamReader extends Reader
99 {
100   /**
101    * The input stream.
102    */
103   private InputStream in;
104 
105   /**
106    * The charset decoder.
107    */
108   private CharsetDecoder decoder;
109 
110   /**
111    * End of stream reached.
112    */
113   private boolean isDone = false;
114 
115   /**
116    * Need this.
117    */
118   private float maxBytesPerChar;
119 
120   /**
121    * Buffer holding surplus loaded bytes (if any)
122    */
123   private ByteBuffer byteBuffer;
124 
125   /**
126    * java.io canonical name of the encoding.
127    */
128   private String encoding;
129 
130   /**
131    * We might decode to a 2-char UTF-16 surrogate, which won't fit in the
132    * output buffer. In this case we need to save the surrogate char.
133    */
134   private char savedSurrogate;
135   private boolean hasSavedSurrogate = false;
136 
137   /**
138    * A byte array to be reused in read(byte[], int, int).
139    */
140   private byte[] bytesCache;
141 
142   /**
143    * Locks the bytesCache above in read(byte[], int, int).
144    */
145   private Object cacheLock = new Object();
146 
147   /**
148    * This method initializes a new instance of <code>InputStreamReader</code>
149    * to read from the specified stream using the default encoding.
150    *
151    * @param in The <code>InputStream</code> to read from
152    */
InputStreamReader(InputStream in)153   public InputStreamReader(InputStream in)
154   {
155     if (in == null)
156       throw new NullPointerException();
157     this.in = in;
158     try
159         {
160           encoding = SystemProperties.getProperty("file.encoding");
161           // Don't use NIO if avoidable
162           if(EncodingHelper.isISOLatin1(encoding))
163             {
164               encoding = "ISO8859_1";
165               maxBytesPerChar = 1f;
166               decoder = null;
167               return;
168             }
169           Charset cs = EncodingHelper.getCharset(encoding);
170           decoder = cs.newDecoder();
171           encoding = EncodingHelper.getOldCanonical(cs.name());
172           try {
173               maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
174           } catch(UnsupportedOperationException _){
175               maxBytesPerChar = 1f;
176           }
177           decoder.onMalformedInput(CodingErrorAction.REPLACE);
178           decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
179           decoder.reset();
180         } catch(RuntimeException e) {
181           encoding = "ISO8859_1";
182           maxBytesPerChar = 1f;
183           decoder = null;
184         } catch(UnsupportedEncodingException e) {
185           encoding = "ISO8859_1";
186           maxBytesPerChar = 1f;
187           decoder = null;
188         }
189   }
190 
191   /**
192    * This method initializes a new instance of <code>InputStreamReader</code>
193    * to read from the specified stream using a caller supplied character
194    * encoding scheme.  Note that due to a deficiency in the Java language
195    * design, there is no way to determine which encodings are supported.
196    *
197    * @param in The <code>InputStream</code> to read from
198    * @param encoding_name The name of the encoding scheme to use
199    *
200    * @exception UnsupportedEncodingException If the encoding scheme
201    * requested is not available.
202    */
InputStreamReader(InputStream in, String encoding_name)203   public InputStreamReader(InputStream in, String encoding_name)
204     throws UnsupportedEncodingException
205   {
206     if (in == null
207         || encoding_name == null)
208       throw new NullPointerException();
209 
210     this.in = in;
211     // Don't use NIO if avoidable
212     if(EncodingHelper.isISOLatin1(encoding_name))
213       {
214         encoding = "ISO8859_1";
215         maxBytesPerChar = 1f;
216         decoder = null;
217         return;
218       }
219     try {
220       Charset cs = EncodingHelper.getCharset(encoding_name);
221       try {
222         maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
223       } catch(UnsupportedOperationException _){
224         maxBytesPerChar = 1f;
225       }
226 
227       decoder = cs.newDecoder();
228       decoder.onMalformedInput(CodingErrorAction.REPLACE);
229       decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
230       decoder.reset();
231 
232       // The encoding should be the old name, if such exists.
233       encoding = EncodingHelper.getOldCanonical(cs.name());
234     } catch(RuntimeException e) {
235       encoding = "ISO8859_1";
236       maxBytesPerChar = 1f;
237       decoder = null;
238     }
239   }
240 
241   /**
242    * Creates an InputStreamReader that uses a decoder of the given
243    * charset to decode the bytes in the InputStream into
244    * characters.
245    *
246    * @since 1.4
247    */
InputStreamReader(InputStream in, Charset charset)248   public InputStreamReader(InputStream in, Charset charset) {
249     if (in == null)
250       throw new NullPointerException();
251     this.in = in;
252     decoder = charset.newDecoder();
253 
254     try {
255       maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
256     } catch(UnsupportedOperationException _){
257       maxBytesPerChar = 1f;
258     }
259 
260     decoder.onMalformedInput(CodingErrorAction.REPLACE);
261     decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
262     decoder.reset();
263     encoding = EncodingHelper.getOldCanonical(charset.name());
264   }
265 
266   /**
267    * Creates an InputStreamReader that uses the given charset decoder
268    * to decode the bytes in the InputStream into characters.
269    *
270    * @since 1.4
271    */
InputStreamReader(InputStream in, CharsetDecoder decoder)272   public InputStreamReader(InputStream in, CharsetDecoder decoder) {
273     if (in == null)
274       throw new NullPointerException();
275     this.in = in;
276     this.decoder = decoder;
277 
278     Charset charset = decoder.charset();
279     try {
280       if (charset == null)
281         maxBytesPerChar = 1f;
282       else
283         maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
284     } catch(UnsupportedOperationException _){
285         maxBytesPerChar = 1f;
286     }
287 
288     decoder.onMalformedInput(CodingErrorAction.REPLACE);
289     decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
290     decoder.reset();
291     if (charset == null)
292       encoding = "US-ASCII";
293     else
294       encoding = EncodingHelper.getOldCanonical(decoder.charset().name());
295   }
296 
297   /**
298    * This method closes this stream, as well as the underlying
299    * <code>InputStream</code>.
300    *
301    * @exception IOException If an error occurs
302    */
close()303   public void close() throws IOException
304   {
305     synchronized (lock)
306       {
307         // Makes sure all intermediate data is released by the decoder.
308         if (decoder != null)
309            decoder.reset();
310         if (in != null)
311            in.close();
312         in = null;
313         isDone = true;
314         decoder = null;
315       }
316   }
317 
318   /**
319    * This method returns the name of the encoding that is currently in use
320    * by this object.  If the stream has been closed, this method is allowed
321    * to return <code>null</code>.
322    *
323    * @return The current encoding name
324    */
getEncoding()325   public String getEncoding()
326   {
327     return in != null ? encoding : null;
328   }
329 
330   /**
331    * This method checks to see if the stream is ready to be read.  It
332    * will return <code>true</code> if is, or <code>false</code> if it is not.
333    * If the stream is not ready to be read, it could (although is not required
334    * to) block on the next read attempt.
335    *
336    * @return <code>true</code> if the stream is ready to be read,
337    * <code>false</code> otherwise
338    *
339    * @exception IOException If an error occurs
340    */
ready()341   public boolean ready() throws IOException
342   {
343     if (in == null)
344       throw new IOException("Reader has been closed");
345 
346     return in.available() != 0;
347   }
348 
349   /**
350    * This method reads up to <code>length</code> characters from the stream into
351    * the specified array starting at index <code>offset</code> into the
352    * array.
353    *
354    * @param buf The character array to recieve the data read
355    * @param offset The offset into the array to start storing characters
356    * @param length The requested number of characters to read.
357    *
358    * @return The actual number of characters read, or -1 if end of stream.
359    *
360    * @exception IOException If an error occurs
361    */
read(char[] buf, int offset, int length)362   public int read(char[] buf, int offset, int length) throws IOException
363   {
364     if (in == null)
365       throw new IOException("Reader has been closed");
366     if (isDone)
367       return -1;
368     if(decoder != null)
369       {
370         int totalBytes = (int)((double) length * maxBytesPerChar);
371         if (byteBuffer != null)
372           totalBytes = Math.max(totalBytes, byteBuffer.remaining());
373         byte[] bytes;
374         // Fetch cached bytes array if available and big enough.
375         synchronized(cacheLock)
376           {
377             bytes = bytesCache;
378             if (bytes == null || bytes.length < totalBytes)
379               bytes = new byte[totalBytes];
380             else
381               bytesCache = null;
382           }
383 
384         int remaining = 0;
385         if(byteBuffer != null)
386         {
387             remaining = byteBuffer.remaining();
388             byteBuffer.get(bytes, 0, remaining);
389         }
390         int read;
391         if(totalBytes - remaining > 0)
392           {
393             read = in.read(bytes, remaining, totalBytes - remaining);
394             if(read == -1){
395               read = remaining;
396               isDone = true;
397             } else
398               read += remaining;
399           } else
400             read = remaining;
401         byteBuffer = ByteBuffer.wrap(bytes, 0, read);
402         CharBuffer cb = CharBuffer.wrap(buf, offset, length);
403         int startPos = cb.position();
404 
405         if(hasSavedSurrogate){
406             hasSavedSurrogate = false;
407             cb.put(savedSurrogate);
408             read++;
409         }
410 
411         CoderResult cr = decoder.decode(byteBuffer, cb, isDone);
412         decoder.reset();
413         // 1 char remains which is the first half of a surrogate pair.
414         if(cr.isOverflow() && cb.hasRemaining()){
415             CharBuffer overflowbuf = CharBuffer.allocate(2);
416             cr = decoder.decode(byteBuffer, overflowbuf, isDone);
417             overflowbuf.flip();
418             if(overflowbuf.hasRemaining())
419             {
420               cb.put(overflowbuf.get());
421               savedSurrogate = overflowbuf.get();
422               hasSavedSurrogate = true;
423               isDone = false;
424             }
425         }
426 
427         if(byteBuffer.hasRemaining()) {
428             byteBuffer.compact();
429             byteBuffer.flip();
430             isDone = false;
431         } else
432             byteBuffer = null;
433 
434         read = cb.position() - startPos;
435 
436         // Put cached bytes array back if we are finished and the cache
437         // is null or smaller than the used bytes array.
438         synchronized (cacheLock)
439           {
440             if (byteBuffer == null
441                 && (bytesCache == null || bytesCache.length < bytes.length))
442               bytesCache = bytes;
443           }
444         return (read <= 0) ? -1 : read;
445       }
446     else
447       {
448         byte[] bytes;
449         // Fetch cached bytes array if available and big enough.
450         synchronized (cacheLock)
451           {
452             bytes = bytesCache;
453             if (bytes == null || length < bytes.length)
454               bytes = new byte[length];
455             else
456               bytesCache = null;
457           }
458 
459         int read = in.read(bytes);
460         for(int i=0;i<read;i++)
461           buf[offset+i] = (char)(bytes[i]&0xFF);
462 
463         // Put back byte array into cache if appropriate.
464         synchronized (cacheLock)
465           {
466             if (bytesCache == null || bytesCache.length < bytes.length)
467               bytesCache = bytes;
468           }
469         return read;
470     }
471   }
472 
473   /**
474    * Reads an char from the input stream and returns it
475    * as an int in the range of 0-65535.  This method also will return -1 if
476    * the end of the stream has been reached.
477    * <p>
478    * This method will block until the char can be read.
479    *
480    * @return The char read or -1 if end of stream
481    *
482    * @exception IOException If an error occurs
483    */
read()484   public int read() throws IOException
485   {
486     char[] buf = new char[1];
487     int count = read(buf, 0, 1);
488     return count > 0 ? buf[0] : -1;
489   }
490 
491   /**
492    * Skips the specified number of chars in the stream.  It
493    * returns the actual number of chars skipped, which may be less than the
494    * requested amount.
495    *
496    * @param count The requested number of chars to skip
497    *
498    * @return The actual number of chars skipped.
499    *
500    * @exception IOException If an error occurs
501    */
skip(long count)502    public long skip(long count) throws IOException
503    {
504      if (in == null)
505        throw new IOException("Reader has been closed");
506 
507      return super.skip(count);
508    }
509 }
510