1 /* Copyright 2002, 2003 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.io.Writer; 25 26 /** 27 * <p> 28 * ISO-8859-14, for Gaelic, Welsh, and other Celtic languages. 29 * Not yet supported by Sun's JDK as of 1.5 alpha. IBM's 1.4.1 30 * JDK does support it. 31 * </p> 32 * 33 * @author Elliotte Rusty Harold 34 * @version 1.0 35 * 36 */ 37 class Latin8Writer extends TextWriter { 38 Latin8Writer(Writer out, String encoding)39 Latin8Writer(Writer out, String encoding) { 40 super(out, encoding); 41 } 42 43 /** 44 * @see nu.xom.TextWriter#needsEscaping(char) 45 */ needsEscaping(char c)46 boolean needsEscaping(char c) { 47 if (c <= 0xA0) return false; 48 switch (c) { // Latin-1 overlap 49 case 0x00A3: return false; // POUND SIGN 50 case 0x00A4: return true; // place holder to enable table lookup 51 case 0x00A5: return true; // place holder to enable table lookup 52 case 0x00A6: return true; // place holder to enable table lookup 53 case 0x00A7: return false; // SECTION SIGN 54 case 0x00A8: return true; // place holder to enable table lookup 55 case 0x00A9: return false; // COPYRIGHT SIGN 56 case 0x00AA: return true; // place holder to enable table lookup 57 case 0x00AB: return true; // place holder to enable table lookup 58 case 0x00AC: return true; // place holder to enable table lookup 59 case 0x00AD: return false; // SOFT HYPHEN 60 case 0x00AE: return false; // REGISTERED SIGN 61 case 0x00AF: return true; // place holder to enable table lookup 62 case 0x00B0: return true; // place holder to enable table lookup 63 case 0x00B1: return true; // place holder to enable table lookup 64 case 0x00B2: return true; // place holder to enable table lookup 65 case 0x00B3: return true; // place holder to enable table lookup 66 case 0x00B4: return true; // place holder to enable table lookup 67 case 0x00B5: return true; // place holder to enable table lookup 68 case 0x00B6: return false; // PILCROW SIGN 69 case 0x00B7: return true; // place holder to enable table lookup 70 case 0x00B8: return true; // place holder to enable table lookup 71 case 0x00B9: return true; // place holder to enable table lookup 72 case 0x00BA: return true; // place holder to enable table lookup 73 case 0x00BB: return true; // place holder to enable table lookup 74 case 0x00BC: return true; // place holder to enable table lookup 75 case 0x00BD: return true; // place holder to enable table lookup 76 case 0x00BE: return true; // place holder to enable table lookup 77 case 0x00BF: return true; // place holder to enable table lookup 78 case 0x00C0: return false; // LATIN CAPITAL LETTER A WITH GRAVE 79 case 0x00C1: return false; // LATIN CAPITAL LETTER A WITH ACUTE 80 case 0x00C2: return false; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX 81 case 0x00C3: return false; // LATIN CAPITAL LETTER A WITH TILDE 82 case 0x00C4: return false; // LATIN CAPITAL LETTER A WITH DIAERESIS 83 case 0x00C5: return false; // LATIN CAPITAL LETTER A WITH RING ABOVE 84 case 0x00C6: return false; // LATIN CAPITAL LETTER AE 85 case 0x00C7: return false; // LATIN CAPITAL LETTER C WITH CEDILLA 86 case 0x00C8: return false; // LATIN CAPITAL LETTER E WITH GRAVE 87 case 0x00C9: return false; // LATIN CAPITAL LETTER E WITH ACUTE 88 case 0x00CA: return false; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX 89 case 0x00CB: return false; // LATIN CAPITAL LETTER E WITH DIAERESIS 90 case 0x00CC: return false; // LATIN CAPITAL LETTER I WITH GRAVE 91 case 0x00CD: return false; // LATIN CAPITAL LETTER I WITH ACUTE 92 case 0x00CE: return false; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX 93 case 0x00CF: return false; // LATIN CAPITAL LETTER I WITH DIAERESIS 94 case 0x00D0: return true; // place holder to enable table lookup 95 case 0x00D1: return false; // LATIN CAPITAL LETTER N WITH TILDE 96 case 0x00D2: return false; // LATIN CAPITAL LETTER O WITH GRAVE 97 case 0x00D3: return false; // LATIN CAPITAL LETTER O WITH ACUTE 98 case 0x00D4: return false; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX 99 case 0x00D5: return false; // LATIN CAPITAL LETTER O WITH TILDE 100 case 0x00D6: return false; // LATIN CAPITAL LETTER O WITH DIAERESIS 101 case 0x00D7: return true; // place holder to enable table lookup 102 case 0x00D8: return false; // LATIN CAPITAL LETTER O WITH STROKE 103 case 0x00D9: return false; // LATIN CAPITAL LETTER U WITH GRAVE 104 case 0x00DA: return false; // LATIN CAPITAL LETTER U WITH ACUTE 105 case 0x00DB: return false; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX 106 case 0x00DC: return false; // LATIN CAPITAL LETTER U WITH DIAERESIS 107 case 0x00DD: return false; // LATIN CAPITAL LETTER Y WITH ACUTE 108 case 0x00DE: return true; // place holder to enable table lookup 109 case 0x00DF: return false; // LATIN SMALL LETTER SHARP S 110 case 0x00E0: return false; // LATIN SMALL LETTER A WITH GRAVE 111 case 0x00E1: return false; // LATIN SMALL LETTER A WITH ACUTE 112 case 0x00E2: return false; // LATIN SMALL LETTER A WITH CIRCUMFLEX 113 case 0x00E3: return false; // LATIN SMALL LETTER A WITH TILDE 114 case 0x00E4: return false; // LATIN SMALL LETTER A WITH DIAERESIS 115 case 0x00E5: return false; // LATIN SMALL LETTER A WITH RING ABOVE 116 case 0x00E6: return false; // LATIN SMALL LETTER AE 117 case 0x00E7: return false; // LATIN SMALL LETTER C WITH CEDILLA 118 case 0x00E8: return false; // LATIN SMALL LETTER E WITH GRAVE 119 case 0x00E9: return false; // LATIN SMALL LETTER E WITH ACUTE 120 case 0x00EA: return false; // LATIN SMALL LETTER E WITH CIRCUMFLEX 121 case 0x00EB: return false; // LATIN SMALL LETTER E WITH DIAERESIS 122 case 0x00EC: return false; // LATIN SMALL LETTER I WITH GRAVE 123 case 0x00ED: return false; // LATIN SMALL LETTER I WITH ACUTE 124 case 0x00EE: return false; // LATIN SMALL LETTER I WITH CIRCUMFLEX 125 case 0x00EF: return false; // LATIN SMALL LETTER I WITH DIAERESIS 126 case 0x00F0: return true; // place holder to enable table lookup 127 case 0x00F1: return false; // LATIN SMALL LETTER N WITH TILDE 128 case 0x00F2: return false; // LATIN SMALL LETTER O WITH GRAVE 129 case 0x00F3: return false; // LATIN SMALL LETTER O WITH ACUTE 130 case 0x00F4: return false; // LATIN SMALL LETTER O WITH CIRCUMFLEX 131 case 0x00F5: return false; // LATIN SMALL LETTER O WITH TILDE 132 case 0x00F6: return false; // LATIN SMALL LETTER O WITH DIAERESIS 133 case 0x00F8: return false; // LATIN SMALL LETTER O WITH STROKE 134 case 0x00F9: return false; // LATIN SMALL LETTER U WITH GRAVE 135 case 0x00FA: return false; // LATIN SMALL LETTER U WITH ACUTE 136 case 0x00FB: return false; // LATIN SMALL LETTER U WITH CIRCUMFLEX 137 case 0x00FC: return false; // LATIN SMALL LETTER U WITH DIAERESIS 138 case 0x00FD: return false; // LATIN SMALL LETTER Y WITH ACUTE 139 case 0x00FE: return true; // place holder to enable table lookup 140 case 0x00FF: return false; // LATIN SMALL LETTER Y WITH DIAERESIS 141 case 0x0100: return true; // place holder to enable table lookup 142 case 0x0101: return true; // place holder to enable table lookup 143 case 0x0102: return true; // place holder to enable table lookup 144 case 0x0103: return true; // place holder to enable table lookup 145 case 0x0104: return true; // place holder to enable table lookup 146 case 0x0105: return true; // place holder to enable table lookup 147 case 0x0106: return true; // place holder to enable table lookup 148 case 0x0107: return true; // place holder to enable table lookup 149 case 0x0108: return true; // place holder to enable table lookup 150 case 0x0109: return true; // place holder to enable table lookup 151 case 0x010A: return false; // LATIN CAPITAL LETTER C WITH DOT ABOVE 152 case 0x010B: return false; // LATIN SMALL LETTER C WITH DOT ABOVE 153 case 0x010C: return true; // place holder to enable table lookup 154 case 0x010D: return true; // place holder to enable table lookup 155 case 0x010E: return true; // place holder to enable table lookup 156 case 0x010F: return true; // place holder to enable table lookup 157 case 0x0110: return true; // place holder to enable table lookup 158 case 0x0111: return true; // place holder to enable table lookup 159 case 0x0112: return true; // place holder to enable table lookup 160 case 0x0113: return true; // place holder to enable table lookup 161 case 0x0114: return true; // place holder to enable table lookup 162 case 0x0115: return true; // place holder to enable table lookup 163 case 0x0116: return true; // place holder to enable table lookup 164 case 0x0117: return true; // place holder to enable table lookup 165 case 0x0118: return true; // place holder to enable table lookup 166 case 0x0119: return true; // place holder to enable table lookup 167 case 0x011A: return true; // place holder to enable table lookup 168 case 0x011B: return true; // place holder to enable table lookup 169 case 0x011C: return true; // place holder to enable table lookup 170 case 0x011D: return true; // place holder to enable table lookup 171 case 0x011E: return true; // place holder to enable table lookup 172 case 0x011F: return true; // place holder to enable table lookup 173 case 0x0120: return false; // LATIN CAPITAL LETTER G WITH DOT ABOVE 174 case 0x0121: return false; // LATIN SMALL LETTER G WITH DOT ABOVE 175 } 176 switch (c) { 177 case 0x0174: return false; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX 178 case 0x0175: return false; // LATIN SMALL LETTER W WITH CIRCUMFLEX 179 case 0x0176: return false; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 180 case 0x0177: return false; // LATIN SMALL LETTER Y WITH CIRCUMFLEX 181 case 0x0178: return false; // LATIN CAPITAL LETTER Y WITH DIAERESIS 182 } 183 switch (c) { 184 case 0x1E02: return false; // LATIN CAPITAL LETTER B WITH DOT ABOVE 185 case 0x1E03: return false; // LATIN SMALL LETTER B WITH DOT ABOVE 186 case 0x1E04: return true; // place holder to enable table lookup 187 case 0x1E05: return true; // place holder to enable table lookup 188 case 0x1E06: return true; // place holder to enable table lookup 189 case 0x1E07: return true; // place holder to enable table lookup 190 case 0x1E08: return true; // place holder to enable table lookup 191 case 0x1E09: return true; // place holder to enable table lookup 192 case 0x1E0A: return false; // LATIN CAPITAL LETTER D WITH DOT ABOVE 193 case 0x1E0B: return false; // LATIN SMALL LETTER D WITH DOT ABOVE 194 case 0x1E0C: return true; // place holder to enable table lookup 195 case 0x1E0D: return true; // place holder to enable table lookup 196 case 0x1E0E: return true; // place holder to enable table lookup 197 case 0x1E0F: return true; // place holder to enable table lookup 198 case 0x1E10: return true; // place holder to enable table lookup 199 case 0x1E11: return true; // place holder to enable table lookup 200 case 0x1E12: return true; // place holder to enable table lookup 201 case 0x1E13: return true; // place holder to enable table lookup 202 case 0x1E14: return true; // place holder to enable table lookup 203 case 0x1E15: return true; // place holder to enable table lookup 204 case 0x1E16: return true; // place holder to enable table lookup 205 case 0x1E17: return true; // place holder to enable table lookup 206 case 0x1E18: return true; // place holder to enable table lookup 207 case 0x1E19: return true; // place holder to enable table lookup 208 case 0x1E1A: return true; // place holder to enable table lookup 209 case 0x1E1B: return true; // place holder to enable table lookup 210 case 0x1E1C: return true; // place holder to enable table lookup 211 case 0x1E1D: return true; // place holder to enable table lookup 212 case 0x1E1E: return false; // LATIN CAPITAL LETTER F WITH DOT ABOVE 213 case 0x1E1F: return false; // LATIN SMALL LETTER F WITH DOT ABOVE 214 case 0x1E20: return true; // place holder to enable table lookup 215 case 0x1E21: return true; // place holder to enable table lookup 216 case 0x1E22: return true; // place holder to enable table lookup 217 case 0x1E23: return true; // place holder to enable table lookup 218 case 0x1E24: return true; // place holder to enable table lookup 219 case 0x1E25: return true; // place holder to enable table lookup 220 case 0x1E26: return true; // place holder to enable table lookup 221 case 0x1E27: return true; // place holder to enable table lookup 222 case 0x1E28: return true; // place holder to enable table lookup 223 case 0x1E29: return true; // place holder to enable table lookup 224 case 0x1E2A: return true; // place holder to enable table lookup 225 case 0x1E2B: return true; // place holder to enable table lookup 226 case 0x1E2C: return true; // place holder to enable table lookup 227 case 0x1E2D: return true; // place holder to enable table lookup 228 case 0x1E2E: return true; // place holder to enable table lookup 229 case 0x1E2F: return true; // place holder to enable table lookup 230 case 0x1E30: return true; // place holder to enable table lookup 231 case 0x1E31: return true; // place holder to enable table lookup 232 case 0x1E32: return true; // place holder to enable table lookup 233 case 0x1E33: return true; // place holder to enable table lookup 234 case 0x1E34: return true; // place holder to enable table lookup 235 case 0x1E35: return true; // place holder to enable table lookup 236 case 0x1E36: return true; // place holder to enable table lookup 237 case 0x1E37: return true; // place holder to enable table lookup 238 case 0x1E38: return true; // place holder to enable table lookup 239 case 0x1E39: return true; // place holder to enable table lookup 240 case 0x1E3A: return true; // place holder to enable table lookup 241 case 0x1E3B: return true; // place holder to enable table lookup 242 case 0x1E3C: return true; // place holder to enable table lookup 243 case 0x1E3D: return true; // place holder to enable table lookup 244 case 0x1E3E: return true; // place holder to enable table lookup 245 case 0x1E3F: return true; // place holder to enable table lookup 246 case 0x1E40: return false; // LATIN CAPITAL LETTER M WITH DOT ABOVE 247 case 0x1E41: return false; // LATIN SMALL LETTER M WITH DOT ABOVE 248 case 0x1E42: return true; // place holder to enable table lookup 249 case 0x1E43: return true; // place holder to enable table lookup 250 case 0x1E44: return true; // place holder to enable table lookup 251 case 0x1E45: return true; // place holder to enable table lookup 252 case 0x1E46: return true; // place holder to enable table lookup 253 case 0x1E47: return true; // place holder to enable table lookup 254 case 0x1E48: return true; // place holder to enable table lookup 255 case 0x1E49: return true; // place holder to enable table lookup 256 case 0x1E4A: return true; // place holder to enable table lookup 257 case 0x1E4B: return true; // place holder to enable table lookup 258 case 0x1E4C: return true; // place holder to enable table lookup 259 case 0x1E4D: return true; // place holder to enable table lookup 260 case 0x1E4E: return true; // place holder to enable table lookup 261 case 0x1E4F: return true; // place holder to enable table lookup 262 case 0x1E50: return true; // place holder to enable table lookup 263 case 0x1E51: return true; // place holder to enable table lookup 264 case 0x1E52: return true; // place holder to enable table lookup 265 case 0x1E53: return true; // place holder to enable table lookup 266 case 0x1E54: return true; // place holder to enable table lookup 267 case 0x1E55: return true; // place holder to enable table lookup 268 case 0x1E56: return false; // LATIN CAPITAL LETTER P WITH DOT ABOVE 269 case 0x1E57: return false; // LATIN SMALL LETTER P WITH DOT ABOVE 270 case 0x1E58: return true; // place holder to enable table lookup 271 case 0x1E59: return true; // place holder to enable table lookup 272 case 0x1E5A: return true; // place holder to enable table lookup 273 case 0x1E5B: return true; // place holder to enable table lookup 274 case 0x1E5C: return true; // place holder to enable table lookup 275 case 0x1E5D: return true; // place holder to enable table lookup 276 case 0x1E5E: return true; // place holder to enable table lookup 277 case 0x1E5F: return true; // place holder to enable table lookup 278 case 0x1E60: return false; // LATIN CAPITAL LETTER S WITH DOT ABOVE 279 case 0x1E61: return false; // LATIN SMALL LETTER S WITH DOT ABOVE 280 case 0x1E62: return true; // place holder to enable table lookup 281 case 0x1E63: return true; // place holder to enable table lookup 282 case 0x1E64: return true; // place holder to enable table lookup 283 case 0x1E65: return true; // place holder to enable table lookup 284 case 0x1E66: return true; // place holder to enable table lookup 285 case 0x1E67: return true; // place holder to enable table lookup 286 case 0x1E68: return true; // place holder to enable table lookup 287 case 0x1E69: return true; // place holder to enable table lookup 288 case 0x1E6A: return false; // LATIN CAPITAL LETTER T WITH DOT ABOVE 289 case 0x1E6B: return false; // LATIN SMALL LETTER T WITH DOT ABOVE 290 case 0x1E6C: return true; // place holder to enable table lookup 291 case 0x1E6D: return true; // place holder to enable table lookup 292 case 0x1E6E: return true; // place holder to enable table lookup 293 case 0x1E6F: return true; // place holder to enable table lookup 294 case 0x1E70: return true; // place holder to enable table lookup 295 case 0x1E71: return true; // place holder to enable table lookup 296 case 0x1E72: return true; // place holder to enable table lookup 297 case 0x1E73: return true; // place holder to enable table lookup 298 case 0x1E74: return true; // place holder to enable table lookup 299 case 0x1E75: return true; // place holder to enable table lookup 300 case 0x1E76: return true; // place holder to enable table lookup 301 case 0x1E77: return true; // place holder to enable table lookup 302 case 0x1E78: return true; // place holder to enable table lookup 303 case 0x1E79: return true; // place holder to enable table lookup 304 case 0x1E7A: return true; // place holder to enable table lookup 305 case 0x1E7B: return true; // place holder to enable table lookup 306 case 0x1E7C: return true; // place holder to enable table lookup 307 case 0x1E7D: return true; // place holder to enable table lookup 308 case 0x1E7E: return true; // place holder to enable table lookup 309 case 0x1E7F: return true; // place holder to enable table lookup 310 case 0x1E80: return false; // LATIN CAPITAL LETTER W WITH GRAVE 311 case 0x1E81: return false; // LATIN SMALL LETTER W WITH GRAVE 312 case 0x1E82: return false; // LATIN CAPITAL LETTER W WITH ACUTE 313 case 0x1E83: return false; // LATIN SMALL LETTER W WITH ACUTE 314 case 0x1E84: return false; // LATIN CAPITAL LETTER W WITH DIAERESIS 315 case 0x1E85: return false; // LATIN SMALL LETTER W WITH DIAERESIS 316 } 317 switch (c) { 318 case 0x1EF2: return false; // LATIN CAPITAL LETTER Y WITH GRAVE 319 case 0x1EF3: return false; // LATIN SMALL LETTER Y WITH GRAVE 320 } 321 322 return true; 323 324 } 325 326 }