1 /* Copyright 2002, 2003 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.io.Writer; 25 26 /** 27 * <p> 28 * ISO 8859-5, ASCII plus Cyrillic (Russian, Byelorussian, etc.) 29 * </p> 30 * 31 * @author Elliotte Rusty Harold 32 * @version 1.0 33 * 34 */ 35 class ISOCyrillicWriter extends TextWriter { 36 ISOCyrillicWriter(Writer out, String encoding)37 ISOCyrillicWriter(Writer out, String encoding) { 38 super(out, encoding); 39 } 40 41 /** 42 * @see nu.xom.TextWriter#needsEscaping(char) 43 */ needsEscaping(char c)44 boolean needsEscaping(char c) { 45 if (c <= 0xA0) return false; 46 switch (c) { 47 case 0x0401: return false; // CYRILLIC CAPITAL LETTER IO 48 case 0x0402: return false; // CYRILLIC CAPITAL LETTER DJE 49 case 0x0403: return false; // CYRILLIC CAPITAL LETTER GJE 50 case 0x0404: return false; // CYRILLIC CAPITAL LETTER UKRAINIAN IE 51 case 0x0405: return false; // CYRILLIC CAPITAL LETTER DZE 52 case 0x0406: return false; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 53 case 0x0407: return false; // CYRILLIC CAPITAL LETTER YI 54 case 0x0408: return false; // CYRILLIC CAPITAL LETTER JE 55 case 0x0409: return false; // CYRILLIC CAPITAL LETTER LJE 56 case 0x040A: return false; // CYRILLIC CAPITAL LETTER NJE 57 case 0x040B: return false; // CYRILLIC CAPITAL LETTER TSHE 58 case 0x040C: return false; // CYRILLIC CAPITAL LETTER KJE 59 case 0x040D: return true; // place holder to enable table lookup 60 case 0x040E: return false; // CYRILLIC CAPITAL LETTER SHORT U 61 case 0x040F: return false; // CYRILLIC CAPITAL LETTER DZHE 62 case 0x0410: return false; // CYRILLIC CAPITAL LETTER A 63 case 0x0411: return false; // CYRILLIC CAPITAL LETTER BE 64 case 0x0412: return false; // CYRILLIC CAPITAL LETTER VE 65 case 0x0413: return false; // CYRILLIC CAPITAL LETTER GHE 66 case 0x0414: return false; // CYRILLIC CAPITAL LETTER DE 67 case 0x0415: return false; // CYRILLIC CAPITAL LETTER IE 68 case 0x0416: return false; // CYRILLIC CAPITAL LETTER ZHE 69 case 0x0417: return false; // CYRILLIC CAPITAL LETTER ZE 70 case 0x0418: return false; // CYRILLIC CAPITAL LETTER I 71 case 0x0419: return false; // CYRILLIC CAPITAL LETTER SHORT I 72 case 0x041A: return false; // CYRILLIC CAPITAL LETTER KA 73 case 0x041B: return false; // CYRILLIC CAPITAL LETTER EL 74 case 0x041C: return false; // CYRILLIC CAPITAL LETTER EM 75 case 0x041D: return false; // CYRILLIC CAPITAL LETTER EN 76 case 0x041E: return false; // CYRILLIC CAPITAL LETTER O 77 case 0x041F: return false; // CYRILLIC CAPITAL LETTER PE 78 case 0x0420: return false; // CYRILLIC CAPITAL LETTER ER 79 case 0x0421: return false; // CYRILLIC CAPITAL LETTER ES 80 case 0x0422: return false; // CYRILLIC CAPITAL LETTER TE 81 case 0x0423: return false; // CYRILLIC CAPITAL LETTER U 82 case 0x0424: return false; // CYRILLIC CAPITAL LETTER EF 83 case 0x0425: return false; // CYRILLIC CAPITAL LETTER HA 84 case 0x0426: return false; // CYRILLIC CAPITAL LETTER TSE 85 case 0x0427: return false; // CYRILLIC CAPITAL LETTER CHE 86 case 0x0428: return false; // CYRILLIC CAPITAL LETTER SHA 87 case 0x0429: return false; // CYRILLIC CAPITAL LETTER SHCHA 88 case 0x042A: return false; // CYRILLIC CAPITAL LETTER HARD SIGN 89 case 0x042B: return false; // CYRILLIC CAPITAL LETTER YERU 90 case 0x042C: return false; // CYRILLIC CAPITAL LETTER SOFT SIGN 91 case 0x042D: return false; // CYRILLIC CAPITAL LETTER E 92 case 0x042E: return false; // CYRILLIC CAPITAL LETTER YU 93 case 0x042F: return false; // CYRILLIC CAPITAL LETTER YA 94 case 0x0430: return false; // CYRILLIC SMALL LETTER A 95 case 0x0431: return false; // CYRILLIC SMALL LETTER BE 96 case 0x0432: return false; // CYRILLIC SMALL LETTER VE 97 case 0x0433: return false; // CYRILLIC SMALL LETTER GHE 98 case 0x0434: return false; // CYRILLIC SMALL LETTER DE 99 case 0x0435: return false; // CYRILLIC SMALL LETTER IE 100 case 0x0436: return false; // CYRILLIC SMALL LETTER ZHE 101 case 0x0437: return false; // CYRILLIC SMALL LETTER ZE 102 case 0x0438: return false; // CYRILLIC SMALL LETTER I 103 case 0x0439: return false; // CYRILLIC SMALL LETTER SHORT I 104 case 0x043A: return false; // CYRILLIC SMALL LETTER KA 105 case 0x043B: return false; // CYRILLIC SMALL LETTER EL 106 case 0x043C: return false; // CYRILLIC SMALL LETTER EM 107 case 0x043D: return false; // CYRILLIC SMALL LETTER EN 108 case 0x043E: return false; // CYRILLIC SMALL LETTER O 109 case 0x043F: return false; // CYRILLIC SMALL LETTER PE 110 case 0x0440: return false; // CYRILLIC SMALL LETTER ER 111 case 0x0441: return false; // CYRILLIC SMALL LETTER ES 112 case 0x0442: return false; // CYRILLIC SMALL LETTER TE 113 case 0x0443: return false; // CYRILLIC SMALL LETTER U 114 case 0x0444: return false; // CYRILLIC SMALL LETTER EF 115 case 0x0445: return false; // CYRILLIC SMALL LETTER HA 116 case 0x0446: return false; // CYRILLIC SMALL LETTER TSE 117 case 0x0447: return false; // CYRILLIC SMALL LETTER CHE 118 case 0x0448: return false; // CYRILLIC SMALL LETTER SHA 119 case 0x0449: return false; // CYRILLIC SMALL LETTER SHCHA 120 case 0x044A: return false; // CYRILLIC SMALL LETTER HARD SIGN 121 case 0x044B: return false; // CYRILLIC SMALL LETTER YERU 122 case 0x044C: return false; // CYRILLIC SMALL LETTER SOFT SIGN 123 case 0x044D: return false; // CYRILLIC SMALL LETTER E 124 case 0x044E: return false; // CYRILLIC SMALL LETTER YU 125 case 0x044F: return false; // CYRILLIC SMALL LETTER YA 126 case 0x0450: return true; // place holder to enable table lookup 127 case 0x0451: return false; // CYRILLIC SMALL LETTER IO 128 case 0x0452: return false; // CYRILLIC SMALL LETTER DJE 129 case 0x0453: return false; // CYRILLIC SMALL LETTER GJE 130 case 0x0454: return false; // CYRILLIC SMALL LETTER UKRAINIAN IE 131 case 0x0455: return false; // CYRILLIC SMALL LETTER DZE 132 case 0x0456: return false; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 133 case 0x0457: return false; // CYRILLIC SMALL LETTER YI 134 case 0x0458: return false; // CYRILLIC SMALL LETTER JE 135 case 0x0459: return false; // CYRILLIC SMALL LETTER LJE 136 case 0x045A: return false; // CYRILLIC SMALL LETTER NJE 137 case 0x045B: return false; // CYRILLIC SMALL LETTER TSHE 138 case 0x045C: return false; // CYRILLIC SMALL LETTER KJE 139 case 0x045D: return true; // place holder to enable table lookup 140 case 0x045E: return false; // CYRILLIC SMALL LETTER SHORT U 141 case 0x045F: return false; // CYRILLIC SMALL LETTER DZHE 142 } 143 144 switch (c) { // assorted leftover characters 145 case 0x00AD: return false; // SOFT HYPHEN 146 case 0x00A7: return false; // SECTION SIGN 147 case 0x2116: return false; // NUMERO SIGN 148 } 149 150 return true; 151 } 152 153 }