1 /* Copyright 2002, 2003 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.io.Writer; 25 26 /** 27 * <p> 28 * ISO 8859-8, ASCII plus Hebrew 29 * </p> 30 * 31 * @author Elliotte Rusty Harold 32 * @version 1.0 33 * 34 */ 35 class ISOHebrewWriter extends TextWriter { 36 ISOHebrewWriter(Writer out, String encoding)37 ISOHebrewWriter(Writer out, String encoding) { 38 super(out, encoding); 39 } 40 41 /** 42 * @see nu.xom.TextWriter#needsEscaping(char) 43 */ needsEscaping(char c)44 boolean needsEscaping(char c) { 45 if (c <= 0xA0) return false; 46 switch (c) { // characters shared with Latin-1 47 case 0x00A2: return false; // CENT SIGN 48 case 0x00A3: return false; // POUND SIGN 49 case 0x00A4: return false; // CURRENCY SIGN 50 case 0x00A5: return false; // YEN SIGN 51 case 0x00A6: return false; // BROKEN BAR 52 case 0x00A7: return false; // SECTION SIGN 53 case 0x00A8: return false; // DIAERESIS 54 case 0x00A9: return false; // COPYRIGHT SIGN 55 case 0x00AB: return false; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 56 case 0x00AC: return false; // NOT SIGN 57 case 0x00AD: return false; // SOFT HYPHEN 58 case 0x00AE: return false; // REGISTERED SIGN 59 // A bug in Java prevents a macron from being correctly 60 // output as the actual character in this encoding even 61 // though it does exist in the ISO-8859-8 character set. 62 // See JDC bug 4760496 63 // http://developer.java.sun.com/developer/bugParade/bugs/4760496.html 64 // They have marked this as fixed in Tiger (i.e. Java 1.5) 65 // I'm not going to fix it here yet though, because I'd 66 // prefer XOM to work correctly with earlier versions of 67 // of Java; and it's not incorrect to output a character 68 // reference even if you don't have to. It is an issue if a 69 // macron is used in a a comment or a processing 70 // instruction though. The macron is not a name character 71 // so that's not an issue though. 72 case 0x00AF: return true; // MACRON 73 case 0x00B0: return false; // DEGREE SIGN 74 case 0x00B1: return false; // PLUS-MINUS SIGN 75 case 0x00B2: return false; // SUPERSCRIPT TWO 76 case 0x00B3: return false; // SUPERSCRIPT THREE 77 case 0x00B4: return false; // ACUTE ACCENT 78 case 0x00B5: return false; // MICRO SIGN 79 case 0x00B6: return false; // PILCROW SIGN 80 case 0x00B7: return false; // MIDDLE DOT 81 case 0x00B8: return false; // CEDILLA 82 case 0x00B9: return false; // SUPERSCRIPT ONE 83 case 0x00BA: return true; // place holder to allow optimization of switch statement 84 case 0x00BB: return false; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 85 case 0x00BC: return false; // VULGAR FRACTION ONE QUARTER 86 case 0x00BD: return false; // VULGAR FRACTION ONE HALF 87 case 0x00BE: return false; // VULGAR FRACTION THREE QUARTERS 88 } 89 switch (c) { // Unicode Hebrew block 90 case 0x05D0: return false; // HEBREW LETTER ALEF 91 case 0x05D1: return false; // HEBREW LETTER BET 92 case 0x05D2: return false; // HEBREW LETTER GIMEL 93 case 0x05D3: return false; // HEBREW LETTER DALET 94 case 0x05D4: return false; // HEBREW LETTER HE 95 case 0x05D5: return false; // HEBREW LETTER VAV 96 case 0x05D6: return false; // HEBREW LETTER ZAYIN 97 case 0x05D7: return false; // HEBREW LETTER HET 98 case 0x05D8: return false; // HEBREW LETTER TET 99 case 0x05D9: return false; // HEBREW LETTER YOD 100 case 0x05DA: return false; // HEBREW LETTER FINAL KAF 101 case 0x05DB: return false; // HEBREW LETTER KAF 102 case 0x05DC: return false; // HEBREW LETTER LAMED 103 case 0x05DD: return false; // HEBREW LETTER FINAL MEM 104 case 0x05DE: return false; // HEBREW LETTER MEM 105 case 0x05DF: return false; // HEBREW LETTER FINAL NUN 106 case 0x05E0: return false; // HEBREW LETTER NUN 107 case 0x05E1: return false; // HEBREW LETTER SAMEKH 108 case 0x05E2: return false; // HEBREW LETTER AYIN 109 case 0x05E3: return false; // HEBREW LETTER FINAL PE 110 case 0x05E4: return false; // HEBREW LETTER PE 111 case 0x05E5: return false; // HEBREW LETTER FINAL TSADI 112 case 0x05E6: return false; // HEBREW LETTER TSADI 113 case 0x05E7: return false; // HEBREW LETTER QOF 114 case 0x05E8: return false; // HEBREW LETTER RESH 115 case 0x05E9: return false; // HEBREW LETTER SHIN 116 case 0x05EA: return false; // HEBREW LETTER TAV 117 } 118 119 switch (c) { // a few random, out of order characters 120 case 0x00D7: return false; // MULTIPLICATION SIGN 121 case 0x00F7: return false; // DIVISION SIGN 122 // A bug in Java prevents a LEFT-TO-RIGHT MARK 123 // and RIGHT-TO-LEFT MARK from being correctly output 124 // as the actual character in this encoding even 125 // though it does exist in the ISO-8859-8 character set. 126 // See JDC bug 4758951 127 // http://developer.java.sun.com/developer/bugParade/bugs/4758951.html 128 // They have marked this as fixed in Tiger (i.e. Java 1.5) 129 // case 0x200E: return false; // LEFT-TO-RIGHT MARK 130 // case 0x200F: return false; // RIGHT-TO-LEFT MARK 131 case 0x2017: return false; // DOUBLE LOW LINE 132 } 133 134 return true; 135 136 } 137 138 }