1 /* Copyright 2002-2004, 2018 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.io.UnsupportedEncodingException; 25 import java.io.Writer; 26 import java.nio.charset.Charset; 27 import java.nio.charset.CharsetEncoder; 28 import java.util.Locale; 29 30 /** 31 * <p> 32 * <code>GenericWriter</code> figures out whether a 33 * character is or is not available in a particular encoding. 34 * </p> 35 * 36 * @author Elliotte Rusty Harold 37 * @version 1.2.11 38 * 39 */ 40 class GenericWriter extends TextWriter { 41 42 private final boolean isJapanese; 43 private final CharsetEncoder encoder; 44 45 GenericWriter(Writer out, String encoding)46 GenericWriter(Writer out, String encoding) 47 throws UnsupportedEncodingException { 48 49 super(out, encoding); 50 encoding = encoding.toUpperCase(Locale.ENGLISH); 51 if (encoding.indexOf("EUC-JP") > -1 52 || encoding.startsWith("EUC_JP") 53 || encoding.equals("SHIFT_JIS") 54 || encoding.equals("SJIS") 55 || encoding.equals("ISO-2022-JP")) { 56 isJapanese = true; 57 } 58 else { 59 isJapanese = false; 60 } 61 encoder = Charset.forName(encoding).newEncoder(); 62 } 63 64 needsEscaping(char c)65 boolean needsEscaping(char c) { 66 67 // assume everything has at least the ASCII characters 68 if (c <= 127) return false; 69 // work around various bugs in Japanese encodings 70 if (isJapanese) { 71 if (c == 0xA5) return true; // Yen symbol 72 if (c == 0x203E) return true; // Sun bugs in EUC-JP and SJIS 73 } 74 75 if (encoder.canEncode(c)) return false; 76 return true; 77 } 78 79 80 } 81