1 package gnu.text; 2 import gnu.lists.Consumer; 3 import gnu.lists.Sequence; 4 import gnu.kawa.util.*; 5 import java.io.*; 6 import java.util.Map; 7 8 /** 9 * A wrapper for characters. 10 * #author Per Bothner 11 */ 12 13 /* 14 * This is similar to java.lang.Character, so why don't we just use that? 15 * Good question, since this new class makes us a little less compatible 16 * with "standard" Java. However, that should be fairly minor, since 17 * few methods will require Character parameters or arrays (better to 18 * just use chars then). 19 * The Char class uses hashing to ensure that characters are unique. 20 * Thus equal? Char are eq?, which is convenient. 21 * Also, we can use 32-bit character values to allow for non-Unicode chars. 22 */ 23 24 public class Char implements Comparable, Externalizable { 25 // Leave open the possibility for characters beyond Unicode. 26 int value; 27 28 public static final int IGNORABLE_CHAR = 0x1FFFFF; 29 30 /** Should only be used for serialization. */ Char()31 public Char() { 32 } 33 Char(int ch)34 Char(int ch) { 35 value = ch; 36 } 37 print(Consumer out)38 public void print(Consumer out) { 39 print(value, out); 40 } 41 castToChar(Object obj)42 public static char castToChar(Object obj) { 43 if (obj instanceof Char) 44 return ((Char) obj).charValue(); 45 else 46 return ((Character) obj).charValue(); 47 } 48 castToCharacter(Object obj)49 public static int castToCharacter(Object obj) { 50 if (obj instanceof Char) 51 return ((Char) obj).intValue(); 52 else 53 return ((Character) obj).charValue(); 54 } 55 castToCharacterOrEof(Object obj)56 public static int castToCharacterOrEof(Object obj) { 57 if (obj == Sequence.eofValue) 58 return -1; 59 return castToCharacter(obj); 60 } 61 isChar(Object obj)62 public static boolean isChar(Object obj) { 63 return obj instanceof Char || obj instanceof Character; 64 } 65 isCharOrEof(Object obj)66 public static boolean isCharOrEof(Object obj) { 67 return obj instanceof Char || obj instanceof Character 68 || obj == Sequence.eofValue; 69 } 70 71 /** Return character value of argument; otherwise return -2. 72 * Uses same conversion as {@code castToCharacterOrEof}. */ checkCharOrEof(Object obj)73 public static int checkCharOrEof(Object obj) { 74 if (obj instanceof Char) 75 return ((Char) obj).intValue(); 76 if (obj instanceof Character) 77 return ((Character) obj).charValue(); 78 if (obj == Sequence.eofValue) 79 return -1; 80 return -2; 81 } 82 print(int i, Appendable out)83 public static void print(int i, Appendable out) { 84 try { 85 append(i, out); 86 } catch (IOException ex) { 87 throw new RuntimeException(ex); 88 } 89 90 } 91 append(int i, Appendable out)92 public static void append(int i, Appendable out) throws IOException { 93 if (i >= 0x10000 && i != IGNORABLE_CHAR) { 94 out.append((char) (((i - 0x10000) >> 10) + 0xD800)); 95 i = (i & 0x3FF) + 0xDC00; 96 } 97 out.append((char) i); 98 } 99 charValue()100 public final char charValue() { 101 return (char) value; 102 } 103 intValue()104 public final int intValue() { 105 return value; 106 } 107 hashCode()108 public int hashCode() { 109 return value; 110 } 111 112 static Char[] ascii; 113 114 static CharMap hashTable = new CharMap(); 115 116 static { 117 ascii = new Char[128]; 118 for (int i = 128; --i >= 0; ) 119 ascii[i] = new Char(i); 120 } 121 valueOf(int ch)122 public static Char valueOf(int ch) { 123 if (ch < 128) 124 return ascii[ch]; 125 synchronized (hashTable) { 126 return hashTable.get(ch); 127 } 128 } 129 make(int ch)130 public static Char make(int ch) { 131 return valueOf(ch); 132 } 133 makeOrEof(int ch)134 public static Object makeOrEof(int ch) { 135 if (ch < 0) 136 return Sequence.eofValue; 137 return make(ch); 138 } 139 equals(Object obj)140 public boolean equals(Object obj) { 141 // This does not work for hashing in make! Redo make! FIXME 142 // return this == obj; 143 return obj != null && (obj instanceof Char) 144 && ((Char)obj).intValue() == value; 145 } 146 147 private static String charNameValues = 148 " \t\n\n\r\f\b\033\033\177\177\177\007\007\013\0\0"; 149 static String[] charNames = { "space", 150 "tab", 151 "newline", 152 "linefeed", 153 "return", 154 "page", 155 "backspace", 156 "escape", 157 "esc", 158 "delete", 159 "del", 160 "rubout", 161 "alarm", 162 "bel", 163 "vtab", 164 "null", 165 "nul" }; 166 addNamedChars(Map<String,String> map)167 public static void addNamedChars(Map<String,String> map) { 168 for (int i = charNames.length; --i >= 0 ; ) { 169 map.put(charNames[i], charNameValues.substring(i,i+1)); 170 } 171 } 172 nameToChar(String name)173 public static int nameToChar(String name) { 174 for (int i = charNames.length; --i >= 0 ; ) { 175 if (charNames[i].equals(name)) 176 return charNameValues.charAt(i); 177 } 178 for (int i = charNames.length; --i >= 0 ; ) { 179 if (charNames[i].equalsIgnoreCase(name)) 180 return charNameValues.charAt(i); 181 } 182 if ("ignorable-char".equalsIgnoreCase(name)) 183 return IGNORABLE_CHAR; 184 int len = name.length(); 185 if (len > 1 && name.charAt(0) == 'u') { 186 int value = 0; 187 for (int pos = 1; ; pos++) { 188 if (pos == len) 189 return value; 190 int dig = Character.digit(name.charAt(pos), 16); 191 if (dig < 0) 192 break; 193 value = (value << 4) + dig; 194 } 195 } 196 197 // Check for Emacs control character syntax. 198 if (len == 3 && name.charAt(1) == '-') { 199 char ch = name.charAt(0); 200 if (ch == 'c' || ch == 'C') { 201 ch = name.charAt(2); 202 return ch & 31; 203 } 204 } 205 206 return -1; 207 } 208 toString()209 public String toString() { 210 return toString(value); 211 } 212 toString(int value)213 public static String toString(int value) { 214 StringBuffer buf = new StringBuffer(); 215 buf.append('\''); 216 if (value >= (int) ' ' && value < 127 && value != '\'') 217 buf.append((char) value); 218 else { 219 buf.append('\\'); 220 if (value == '\'') 221 buf.append('\''); 222 else if (value == '\n') 223 buf.append('n'); 224 else if (value == '\r') 225 buf.append('r'); 226 else if (value == '\t') 227 buf.append('t'); 228 else if (value < 256) { 229 String str = Integer.toOctalString(value); 230 for (int i = 3 - str.length(); --i >= 0; ) 231 buf.append('0'); 232 buf.append(str); 233 } else { 234 buf.append('u'); 235 String str = Integer.toHexString(value); 236 for (int i = 4 - str.length(); --i >= 0; ) 237 buf.append('0'); 238 buf.append(str); 239 } 240 } 241 buf.append('\''); 242 return buf.toString(); 243 } 244 toScmReadableString(int ch)245 public static String toScmReadableString(int ch) { 246 StringBuffer sbuf = new StringBuffer(20); 247 sbuf.append("#\\"); 248 int nlen = charNameValues.length(); 249 for (int i = 0; i < nlen; i++) { 250 if ((char) ch == charNameValues.charAt(i)) { 251 sbuf.append(charNames[i]); 252 return sbuf.toString(); 253 } 254 } 255 if (ch == IGNORABLE_CHAR) { 256 sbuf.append("ignorable-char"); 257 } else if (ch < ' ' || ch > 0x7F) { 258 sbuf.append('x'); 259 sbuf.append(Integer.toString(ch, 16)); 260 } else 261 sbuf.append((char) ch); 262 return sbuf.toString(); 263 } 264 265 /** 266 * @serialData Writes the char value as an int. 267 */ writeExternal(ObjectOutput out)268 public void writeExternal(ObjectOutput out) throws IOException { 269 out.writeInt(value); 270 } 271 readExternal(ObjectInput in)272 public void readExternal(ObjectInput in) 273 throws IOException, ClassNotFoundException { 274 value = in.readInt(); 275 } 276 readResolve()277 public Object readResolve() throws ObjectStreamException { 278 return make(value); 279 } 280 compareTo(Object o)281 public int compareTo(Object o) { 282 return value - ((Char) o).value; 283 } 284 285 /** Helper class for mapping Unicode scalar value to Char object. */ 286 287 static class CharMap extends AbstractWeakHashTable<Char,Char> { get(int key)288 public Char get(int key) { 289 cleanup(); 290 int hash = key; 291 int index = hashToIndex(hash); 292 for (AbstractWeakHashTable.WEntry<Char,Char> node = table[index]; 293 node != null; node = node.next) { 294 Char val = node.getValue(); 295 if (val != null && val.intValue() == key) 296 return val; 297 } 298 Char val = new Char(key); 299 super.put(val, val); 300 return val; 301 } 302 getKeyFromValue(Char ch)303 protected Char getKeyFromValue(Char ch) { 304 return ch; 305 } 306 matches(Char oldValue, Char newValue)307 protected boolean matches(Char oldValue, Char newValue) { 308 return oldValue.intValue() == newValue.intValue(); 309 } 310 } 311 } 312