1 /* Copyright 2002, 2003 Elliotte Rusty Harold
2 
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6 
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU Lesser General Public License for more details.
11 
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307  USA
16 
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@ibiblio.org. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */
21 
22 package nu.xom;
23 
24 import java.io.Writer;
25 
26 /**
27  * <p>
28  *  ISO 8859-5, ASCII plus Cyrillic (Russian, Byelorussian, etc.)
29  * </p>
30  *
31  * @author Elliotte Rusty Harold
32  * @version 1.0
33  *
34  */
35 class ISOCyrillicWriter extends TextWriter {
36 
ISOCyrillicWriter(Writer out, String encoding)37     ISOCyrillicWriter(Writer out, String encoding) {
38         super(out, encoding);
39     }
40 
41     /**
42      * @see nu.xom.TextWriter#needsEscaping(char)
43      */
needsEscaping(char c)44     boolean needsEscaping(char c) {
45         if (c <= 0xA0) return false;
46         switch (c) {
47             case 0x0401: return false; // CYRILLIC CAPITAL LETTER IO
48             case 0x0402: return false; // CYRILLIC CAPITAL LETTER DJE
49             case 0x0403: return false; // CYRILLIC CAPITAL LETTER GJE
50             case 0x0404: return false; // CYRILLIC CAPITAL LETTER UKRAINIAN IE
51             case 0x0405: return false; // CYRILLIC CAPITAL LETTER DZE
52             case 0x0406: return false; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
53             case 0x0407: return false; // CYRILLIC CAPITAL LETTER YI
54             case 0x0408: return false; // CYRILLIC CAPITAL LETTER JE
55             case 0x0409: return false; // CYRILLIC CAPITAL LETTER LJE
56             case 0x040A: return false; // CYRILLIC CAPITAL LETTER NJE
57             case 0x040B: return false; // CYRILLIC CAPITAL LETTER TSHE
58             case 0x040C: return false; // CYRILLIC CAPITAL LETTER KJE
59             case 0x040D: return true;  // place holder to enable table lookup
60             case 0x040E: return false; // CYRILLIC CAPITAL LETTER SHORT U
61             case 0x040F: return false; // CYRILLIC CAPITAL LETTER DZHE
62             case 0x0410: return false; // CYRILLIC CAPITAL LETTER A
63             case 0x0411: return false; // CYRILLIC CAPITAL LETTER BE
64             case 0x0412: return false; // CYRILLIC CAPITAL LETTER VE
65             case 0x0413: return false; // CYRILLIC CAPITAL LETTER GHE
66             case 0x0414: return false; // CYRILLIC CAPITAL LETTER DE
67             case 0x0415: return false; // CYRILLIC CAPITAL LETTER IE
68             case 0x0416: return false; // CYRILLIC CAPITAL LETTER ZHE
69             case 0x0417: return false; // CYRILLIC CAPITAL LETTER ZE
70             case 0x0418: return false; // CYRILLIC CAPITAL LETTER I
71             case 0x0419: return false; // CYRILLIC CAPITAL LETTER SHORT I
72             case 0x041A: return false; // CYRILLIC CAPITAL LETTER KA
73             case 0x041B: return false; // CYRILLIC CAPITAL LETTER EL
74             case 0x041C: return false; // CYRILLIC CAPITAL LETTER EM
75             case 0x041D: return false; // CYRILLIC CAPITAL LETTER EN
76             case 0x041E: return false; // CYRILLIC CAPITAL LETTER O
77             case 0x041F: return false; // CYRILLIC CAPITAL LETTER PE
78             case 0x0420: return false; // CYRILLIC CAPITAL LETTER ER
79             case 0x0421: return false; // CYRILLIC CAPITAL LETTER ES
80             case 0x0422: return false; // CYRILLIC CAPITAL LETTER TE
81             case 0x0423: return false; // CYRILLIC CAPITAL LETTER U
82             case 0x0424: return false; // CYRILLIC CAPITAL LETTER EF
83             case 0x0425: return false; // CYRILLIC CAPITAL LETTER HA
84             case 0x0426: return false; // CYRILLIC CAPITAL LETTER TSE
85             case 0x0427: return false; // CYRILLIC CAPITAL LETTER CHE
86             case 0x0428: return false; // CYRILLIC CAPITAL LETTER SHA
87             case 0x0429: return false; // CYRILLIC CAPITAL LETTER SHCHA
88             case 0x042A: return false; // CYRILLIC CAPITAL LETTER HARD SIGN
89             case 0x042B: return false; // CYRILLIC CAPITAL LETTER YERU
90             case 0x042C: return false; // CYRILLIC CAPITAL LETTER SOFT SIGN
91             case 0x042D: return false; // CYRILLIC CAPITAL LETTER E
92             case 0x042E: return false; // CYRILLIC CAPITAL LETTER YU
93             case 0x042F: return false; // CYRILLIC CAPITAL LETTER YA
94             case 0x0430: return false; // CYRILLIC SMALL LETTER A
95             case 0x0431: return false; // CYRILLIC SMALL LETTER BE
96             case 0x0432: return false; // CYRILLIC SMALL LETTER VE
97             case 0x0433: return false; // CYRILLIC SMALL LETTER GHE
98             case 0x0434: return false; // CYRILLIC SMALL LETTER DE
99             case 0x0435: return false; // CYRILLIC SMALL LETTER IE
100             case 0x0436: return false; // CYRILLIC SMALL LETTER ZHE
101             case 0x0437: return false; // CYRILLIC SMALL LETTER ZE
102             case 0x0438: return false; // CYRILLIC SMALL LETTER I
103             case 0x0439: return false; // CYRILLIC SMALL LETTER SHORT I
104             case 0x043A: return false; // CYRILLIC SMALL LETTER KA
105             case 0x043B: return false; // CYRILLIC SMALL LETTER EL
106             case 0x043C: return false; // CYRILLIC SMALL LETTER EM
107             case 0x043D: return false; // CYRILLIC SMALL LETTER EN
108             case 0x043E: return false; // CYRILLIC SMALL LETTER O
109             case 0x043F: return false; // CYRILLIC SMALL LETTER PE
110             case 0x0440: return false; // CYRILLIC SMALL LETTER ER
111             case 0x0441: return false; // CYRILLIC SMALL LETTER ES
112             case 0x0442: return false; // CYRILLIC SMALL LETTER TE
113             case 0x0443: return false; // CYRILLIC SMALL LETTER U
114             case 0x0444: return false; // CYRILLIC SMALL LETTER EF
115             case 0x0445: return false; // CYRILLIC SMALL LETTER HA
116             case 0x0446: return false; // CYRILLIC SMALL LETTER TSE
117             case 0x0447: return false; // CYRILLIC SMALL LETTER CHE
118             case 0x0448: return false; // CYRILLIC SMALL LETTER SHA
119             case 0x0449: return false; // CYRILLIC SMALL LETTER SHCHA
120             case 0x044A: return false; // CYRILLIC SMALL LETTER HARD SIGN
121             case 0x044B: return false; // CYRILLIC SMALL LETTER YERU
122             case 0x044C: return false; // CYRILLIC SMALL LETTER SOFT SIGN
123             case 0x044D: return false; // CYRILLIC SMALL LETTER E
124             case 0x044E: return false; // CYRILLIC SMALL LETTER YU
125             case 0x044F: return false; // CYRILLIC SMALL LETTER YA
126             case 0x0450: return true;  // place holder to enable table lookup
127             case 0x0451: return false; // CYRILLIC SMALL LETTER IO
128             case 0x0452: return false; // CYRILLIC SMALL LETTER DJE
129             case 0x0453: return false; // CYRILLIC SMALL LETTER GJE
130             case 0x0454: return false; // CYRILLIC SMALL LETTER UKRAINIAN IE
131             case 0x0455: return false; // CYRILLIC SMALL LETTER DZE
132             case 0x0456: return false; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
133             case 0x0457: return false; // CYRILLIC SMALL LETTER YI
134             case 0x0458: return false; // CYRILLIC SMALL LETTER JE
135             case 0x0459: return false; // CYRILLIC SMALL LETTER LJE
136             case 0x045A: return false; // CYRILLIC SMALL LETTER NJE
137             case 0x045B: return false; // CYRILLIC SMALL LETTER TSHE
138             case 0x045C: return false; // CYRILLIC SMALL LETTER KJE
139             case 0x045D: return true;  // place holder to enable table lookup
140             case 0x045E: return false; // CYRILLIC SMALL LETTER SHORT U
141             case 0x045F: return false; // CYRILLIC SMALL LETTER DZHE
142         }
143 
144         switch (c) { // assorted leftover characters
145             case 0x00AD: return false; // SOFT HYPHEN
146             case 0x00A7: return false; // SECTION SIGN
147             case 0x2116: return false; // NUMERO SIGN
148         }
149 
150         return true;
151     }
152 
153 }