1 /* Charset.java --
2    Copyright (C) 2002, 2004, 2005, 2007  Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 
39 package java.nio.charset;
40 
41 import gnu.classpath.ServiceFactory;
42 import gnu.classpath.SystemProperties;
43 import gnu.java.nio.charset.Provider;
44 
45 import java.io.BufferedReader;
46 import java.io.InputStreamReader;
47 import java.net.URL;
48 import java.nio.ByteBuffer;
49 import java.nio.CharBuffer;
50 import java.nio.charset.spi.CharsetProvider;
51 import java.util.Collections;
52 import java.util.Enumeration;
53 import java.util.HashSet;
54 import java.util.Iterator;
55 import java.util.LinkedHashSet;
56 import java.util.Locale;
57 import java.util.Set;
58 import java.util.SortedMap;
59 import java.util.TreeMap;
60 
61 /**
62  * @author Jesse Rosenstock
63  * @since 1.4
64  * @status updated to 1.5
65  */
66 public abstract class Charset implements Comparable<Charset>
67 {
68   private CharsetEncoder cachedEncoder;
69   private CharsetDecoder cachedDecoder;
70 
71   /**
72    * Extra Charset providers.
73    */
74   private static CharsetProvider[] providers;
75 
76   private final String canonicalName;
77   private final String[] aliases;
78 
Charset(String canonicalName, String[] aliases)79   protected Charset (String canonicalName, String[] aliases)
80   {
81     checkName (canonicalName);
82     if (aliases != null)
83       {
84         int n = aliases.length;
85         for (int i = 0; i < n; ++i)
86             checkName (aliases[i]);
87       }
88 
89     cachedEncoder = null;
90     cachedDecoder = null;
91     this.canonicalName = canonicalName;
92     this.aliases = aliases;
93   }
94 
95   /**
96    * @throws IllegalCharsetNameException  if the name is illegal
97    */
checkName(String name)98   private static void checkName (String name)
99   {
100     int n = name.length ();
101 
102     if (n == 0)
103       throw new IllegalCharsetNameException (name);
104 
105     char ch = name.charAt (0);
106     if (!(('A' <= ch && ch <= 'Z')
107           || ('a' <= ch && ch <= 'z')
108           || ('0' <= ch && ch <= '9')))
109       throw new IllegalCharsetNameException (name);
110 
111     for (int i = 1; i < n; ++i)
112       {
113         ch = name.charAt (i);
114         if (!(('A' <= ch && ch <= 'Z')
115               || ('a' <= ch && ch <= 'z')
116               || ('0' <= ch && ch <= '9')
117               || ch == '-' || ch == '.' || ch == ':' || ch == '_'))
118           throw new IllegalCharsetNameException (name);
119       }
120   }
121 
122   /**
123    * Returns the system default charset.
124    *
125    * This may be set by the user or VM with the file.encoding
126    * property.
127    *
128    * @since 1.5
129    */
defaultCharset()130   public static Charset defaultCharset()
131   {
132     String encoding;
133 
134     try
135       {
136 	encoding = SystemProperties.getProperty("file.encoding");
137       }
138     catch(SecurityException e)
139       {
140 	// Use fallback.
141 	encoding = "ISO-8859-1";
142       }
143     catch(IllegalArgumentException e)
144       {
145 	// Use fallback.
146 	encoding = "ISO-8859-1";
147       }
148 
149     try
150       {
151 	return forName(encoding);
152       }
153     catch(UnsupportedCharsetException e)
154       {
155 	// Ignore.
156       }
157     catch(IllegalCharsetNameException e)
158       {
159 	// Ignore.
160       }
161     catch(IllegalArgumentException e)
162       {
163 	// Ignore.
164       }
165 
166     throw new IllegalStateException("Can't get default charset!");
167   }
168 
isSupported(String charsetName)169   public static boolean isSupported (String charsetName)
170   {
171     return charsetForName (charsetName) != null;
172   }
173 
174   /**
175    * Returns the Charset instance for the charset of the given name.
176    *
177    * @param charsetName
178    * @return the Charset instance for the indicated charset
179    * @throws UnsupportedCharsetException if this VM does not support
180    * the charset of the given name.
181    * @throws IllegalCharsetNameException if the given charset name is
182    * legal.
183    * @throws IllegalArgumentException if <code>charsetName</code> is null.
184    */
forName(String charsetName)185   public static Charset forName (String charsetName)
186   {
187     // Throws IllegalArgumentException as the JDK does.
188     if(charsetName == null)
189         throw new IllegalArgumentException("Charset name must not be null.");
190 
191     Charset cs = charsetForName (charsetName);
192     if (cs == null)
193       throw new UnsupportedCharsetException (charsetName);
194     return cs;
195   }
196 
197   /**
198    * Retrieves a charset for the given charset name.
199    *
200    * @return A charset object for the charset with the specified name, or
201    * <code>null</code> if no such charset exists.
202    *
203    * @throws IllegalCharsetNameException  if the name is illegal
204    */
charsetForName(String charsetName)205   private static Charset charsetForName(String charsetName)
206   {
207     checkName (charsetName);
208     // Try the default provider first
209     // (so we don't need to load external providers unless really necessary)
210     // if it is an exotic charset try loading the external providers.
211     Charset cs = provider().charsetForName(charsetName);
212     if (cs == null)
213       {
214 	CharsetProvider[] providers = providers2();
215 	for (int i = 0; i < providers.length; i++)
216 	  {
217 	    cs = providers[i].charsetForName(charsetName);
218 	    if (cs != null)
219 	      break;
220 	  }
221       }
222     return cs;
223   }
224 
availableCharsets()225   public static SortedMap<String, Charset> availableCharsets()
226   {
227     TreeMap<String, Charset> charsets
228       = new TreeMap(String.CASE_INSENSITIVE_ORDER);
229     for (Iterator<Charset> i = provider().charsets(); i.hasNext(); )
230       {
231 	Charset cs = i.next();
232 	charsets.put(cs.name(), cs);
233       }
234 
235     CharsetProvider[] providers = providers2();
236     for (int j = 0; j < providers.length; j++)
237       {
238         for (Iterator<Charset> i = providers[j].charsets(); i.hasNext(); )
239           {
240             Charset cs = (Charset) i.next();
241             charsets.put(cs.name(), cs);
242           }
243       }
244 
245     return Collections.unmodifiableSortedMap(charsets);
246   }
247 
provider()248   private static CharsetProvider provider()
249   {
250     try
251       {
252 	String s = System.getProperty("charset.provider");
253 	if (s != null)
254 	  {
255 	    CharsetProvider p =
256 	      (CharsetProvider) ((Class.forName(s)).newInstance());
257 	    return p;
258 	  }
259       }
260     catch (Exception e)
261       {
262 	// Ignore.
263       }
264 
265     return Provider.provider();
266   }
267 
268   /**
269    * We need to support multiple providers, reading them from
270    * java.nio.charset.spi.CharsetProvider in the resource directory
271    * META-INF/services. This returns the "extra" charset providers.
272    */
providers2()273   private static CharsetProvider[] providers2()
274   {
275     if (providers == null)
276       {
277         try
278           {
279             Iterator i = ServiceFactory.lookupProviders(CharsetProvider.class);
280             LinkedHashSet set = new LinkedHashSet();
281             while (i.hasNext())
282               set.add(i.next());
283 
284             providers = new CharsetProvider[set.size()];
285             set.toArray(providers);
286           }
287         catch (Exception e)
288           {
289             throw new RuntimeException(e);
290           }
291       }
292     return providers;
293   }
294 
name()295   public final String name ()
296   {
297     return canonicalName;
298   }
299 
aliases()300   public final Set<String> aliases ()
301   {
302     if (aliases == null)
303       return Collections.<String>emptySet();
304 
305     // should we cache the aliasSet instead?
306     int n = aliases.length;
307     HashSet<String> aliasSet = new HashSet<String> (n);
308     for (int i = 0; i < n; ++i)
309         aliasSet.add (aliases[i]);
310     return Collections.unmodifiableSet (aliasSet);
311   }
312 
displayName()313   public String displayName ()
314   {
315     return canonicalName;
316   }
317 
displayName(Locale locale)318   public String displayName (Locale locale)
319   {
320     return canonicalName;
321   }
322 
isRegistered()323   public final boolean isRegistered ()
324   {
325     return (!canonicalName.startsWith ("x-")
326             && !canonicalName.startsWith ("X-"));
327   }
328 
contains(Charset cs)329   public abstract boolean contains (Charset cs);
330 
newDecoder()331   public abstract CharsetDecoder newDecoder ();
332 
newEncoder()333   public abstract CharsetEncoder newEncoder ();
334 
canEncode()335   public boolean canEncode ()
336   {
337     return true;
338   }
339 
340   // NB: This implementation serializes different threads calling
341   // Charset.encode(), a potential performance problem.  It might
342   // be better to remove the cache, or use ThreadLocal to cache on
343   // a per-thread basis.
encode(CharBuffer cb)344   public final synchronized ByteBuffer encode (CharBuffer cb)
345   {
346     try
347       {
348 	if (cachedEncoder == null)
349 	  {
350 	    cachedEncoder = newEncoder ()
351 	      .onMalformedInput (CodingErrorAction.REPLACE)
352 	      .onUnmappableCharacter (CodingErrorAction.REPLACE);
353 	  } else
354 	  cachedEncoder.reset();
355 	return cachedEncoder.encode (cb);
356       }
357     catch (CharacterCodingException e)
358       {
359         throw new AssertionError (e);
360       }
361   }
362 
encode(String str)363   public final ByteBuffer encode (String str)
364   {
365     return encode (CharBuffer.wrap (str));
366   }
367 
368   // NB: This implementation serializes different threads calling
369   // Charset.decode(), a potential performance problem.  It might
370   // be better to remove the cache, or use ThreadLocal to cache on
371   // a per-thread basis.
decode(ByteBuffer bb)372   public final synchronized CharBuffer decode (ByteBuffer bb)
373   {
374     try
375       {
376 	if (cachedDecoder == null)
377 	  {
378 	    cachedDecoder = newDecoder ()
379 	      .onMalformedInput (CodingErrorAction.REPLACE)
380 	      .onUnmappableCharacter (CodingErrorAction.REPLACE);
381 	  } else
382 	  cachedDecoder.reset();
383 
384 	return cachedDecoder.decode (bb);
385       }
386     catch (CharacterCodingException e)
387       {
388         throw new AssertionError (e);
389       }
390   }
391 
compareTo(Charset other)392   public final int compareTo (Charset other)
393   {
394     return canonicalName.compareToIgnoreCase (other.canonicalName);
395   }
396 
hashCode()397   public final int hashCode ()
398   {
399     return canonicalName.hashCode ();
400   }
401 
equals(Object ob)402   public final boolean equals (Object ob)
403   {
404     if (ob instanceof Charset)
405       return canonicalName.equalsIgnoreCase (((Charset) ob).canonicalName);
406     else
407       return false;
408   }
409 
toString()410   public final String toString ()
411   {
412     return canonicalName;
413   }
414 }
415