1 /* Copyright 2002-2004, 2018 Elliotte Rusty Harold
2 
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6 
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU Lesser General Public License for more details.
11 
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307  USA
16 
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@ibiblio.org. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */
21 
22 package nu.xom;
23 
24 import java.io.UnsupportedEncodingException;
25 import java.io.Writer;
26 import java.nio.charset.Charset;
27 import java.nio.charset.CharsetEncoder;
28 import java.util.Locale;
29 
30 /**
31  * <p>
32  * <code>GenericWriter</code> figures out whether a
33  * character is or is not available in a particular encoding.
34  * </p>
35  *
36  * @author Elliotte Rusty Harold
37  * @version 1.2.11
38  *
39  */
40 class GenericWriter extends TextWriter {
41 
42     private final boolean               isJapanese;
43     private final CharsetEncoder        encoder;
44 
45 
GenericWriter(Writer out, String encoding)46     GenericWriter(Writer out, String encoding)
47       throws UnsupportedEncodingException {
48 
49         super(out, encoding);
50         encoding = encoding.toUpperCase(Locale.ENGLISH);
51         if (encoding.indexOf("EUC-JP") > -1
52           || encoding.startsWith("EUC_JP")
53           || encoding.equals("SHIFT_JIS")
54           || encoding.equals("SJIS")
55           || encoding.equals("ISO-2022-JP")) {
56             isJapanese = true;
57         }
58         else {
59            isJapanese = false;
60         }
61         encoder = Charset.forName(encoding).newEncoder();
62     }
63 
64 
needsEscaping(char c)65     boolean needsEscaping(char c) {
66 
67         // assume everything has at least the ASCII characters
68         if (c <= 127) return false;
69         // work around various bugs in Japanese encodings
70         if (isJapanese) {
71             if (c == 0xA5) return true; // Yen symbol
72             if (c == 0x203E) return true; // Sun bugs in EUC-JP and SJIS
73         }
74 
75         if (encoder.canEncode(c)) return false;
76         return true;
77     }
78 
79 
80 }
81