1 /* Copyright 2002, 2003 Elliotte Rusty Harold
2 
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6 
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU Lesser General Public License for more details.
11 
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307  USA
16 
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@ibiblio.org. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */
21 
22 package nu.xom;
23 
24 import java.io.Writer;
25 
26 /**
27  * <p>
28  *   ISO 8859-8, ASCII plus Hebrew
29  * </p>
30  *
31  * @author Elliotte Rusty Harold
32  * @version 1.0
33  *
34  */
35 class ISOHebrewWriter extends TextWriter {
36 
ISOHebrewWriter(Writer out, String encoding)37     ISOHebrewWriter(Writer out, String encoding) {
38         super(out, encoding);
39     }
40 
41     /**
42      * @see nu.xom.TextWriter#needsEscaping(char)
43      */
needsEscaping(char c)44     boolean needsEscaping(char c) {
45         if (c <= 0xA0) return false;
46         switch (c) { // characters shared with Latin-1
47             case 0x00A2: return false; // CENT SIGN
48             case 0x00A3: return false; // POUND SIGN
49             case 0x00A4: return false; // CURRENCY SIGN
50             case 0x00A5: return false; // YEN SIGN
51             case 0x00A6: return false; // BROKEN BAR
52             case 0x00A7: return false; // SECTION SIGN
53             case 0x00A8: return false; // DIAERESIS
54             case 0x00A9: return false; // COPYRIGHT SIGN
55             case 0x00AB: return false; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
56             case 0x00AC: return false; // NOT SIGN
57             case 0x00AD: return false; // SOFT HYPHEN
58             case 0x00AE: return false; // REGISTERED SIGN
59             // A bug in Java prevents a macron from being correctly
60             // output as the actual character in this encoding even
61             // though it does exist in the ISO-8859-8 character set.
62             // See JDC bug 4760496
63             // http://developer.java.sun.com/developer/bugParade/bugs/4760496.html
64             // They have marked this as fixed in Tiger (i.e. Java 1.5)
65             // I'm not going to fix it here yet though, because I'd
66             // prefer XOM to work correctly with earlier versions of
67             // of Java; and it's not incorrect to output a character
68             // reference even if you don't have to. It is an issue if a
69             // macron is used in a a comment or a processing
70             // instruction though. The macron is not a name character
71             // so that's not an issue though.
72             case 0x00AF: return true;  // MACRON
73             case 0x00B0: return false; // DEGREE SIGN
74             case 0x00B1: return false; // PLUS-MINUS SIGN
75             case 0x00B2: return false; // SUPERSCRIPT TWO
76             case 0x00B3: return false; // SUPERSCRIPT THREE
77             case 0x00B4: return false; // ACUTE ACCENT
78             case 0x00B5: return false; // MICRO SIGN
79             case 0x00B6: return false; // PILCROW SIGN
80             case 0x00B7: return false; // MIDDLE DOT
81             case 0x00B8: return false; // CEDILLA
82             case 0x00B9: return false; // SUPERSCRIPT ONE
83             case 0x00BA: return true;  // place holder to allow optimization of switch statement
84             case 0x00BB: return false; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
85             case 0x00BC: return false; // VULGAR FRACTION ONE QUARTER
86             case 0x00BD: return false; // VULGAR FRACTION ONE HALF
87             case 0x00BE: return false; // VULGAR FRACTION THREE QUARTERS
88         }
89         switch (c) {  // Unicode Hebrew block
90             case 0x05D0: return false; // HEBREW LETTER ALEF
91             case 0x05D1: return false; // HEBREW LETTER BET
92             case 0x05D2: return false; // HEBREW LETTER GIMEL
93             case 0x05D3: return false; // HEBREW LETTER DALET
94             case 0x05D4: return false; // HEBREW LETTER HE
95             case 0x05D5: return false; // HEBREW LETTER VAV
96             case 0x05D6: return false; // HEBREW LETTER ZAYIN
97             case 0x05D7: return false; // HEBREW LETTER HET
98             case 0x05D8: return false; // HEBREW LETTER TET
99             case 0x05D9: return false; // HEBREW LETTER YOD
100             case 0x05DA: return false; // HEBREW LETTER FINAL KAF
101             case 0x05DB: return false; // HEBREW LETTER KAF
102             case 0x05DC: return false; // HEBREW LETTER LAMED
103             case 0x05DD: return false; // HEBREW LETTER FINAL MEM
104             case 0x05DE: return false; // HEBREW LETTER MEM
105             case 0x05DF: return false; // HEBREW LETTER FINAL NUN
106             case 0x05E0: return false; // HEBREW LETTER NUN
107             case 0x05E1: return false; // HEBREW LETTER SAMEKH
108             case 0x05E2: return false; // HEBREW LETTER AYIN
109             case 0x05E3: return false; // HEBREW LETTER FINAL PE
110             case 0x05E4: return false; // HEBREW LETTER PE
111             case 0x05E5: return false; // HEBREW LETTER FINAL TSADI
112             case 0x05E6: return false; // HEBREW LETTER TSADI
113             case 0x05E7: return false; // HEBREW LETTER QOF
114             case 0x05E8: return false; // HEBREW LETTER RESH
115             case 0x05E9: return false; // HEBREW LETTER SHIN
116             case 0x05EA: return false; // HEBREW LETTER TAV
117         }
118 
119         switch (c) {  // a few random, out of order characters
120             case 0x00D7: return false; // MULTIPLICATION SIGN
121             case 0x00F7: return false; // DIVISION SIGN
122             // A bug in Java prevents a LEFT-TO-RIGHT MARK
123             // and RIGHT-TO-LEFT MARK from being correctly output
124             // as the actual character in this encoding even
125             // though it does exist in the ISO-8859-8 character set.
126             // See JDC bug 4758951
127             // http://developer.java.sun.com/developer/bugParade/bugs/4758951.html
128             // They have marked this as fixed in Tiger (i.e. Java 1.5)
129             // case 0x200E: return false; // LEFT-TO-RIGHT MARK
130             // case 0x200F: return false; // RIGHT-TO-LEFT MARK
131             case 0x2017: return false; // DOUBLE LOW LINE
132         }
133 
134         return true;
135 
136     }
137 
138 }