1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 import java.io.BufferedReader; 28 import java.io.FileReader; 29 import java.io.FileNotFoundException; 30 import java.io.IOException; 31 import java.io.File; 32 import java.util.regex.Pattern; 33 import java.util.ArrayList; 34 35 /** 36 * The UnicodeSpec class provides a way to read in Unicode character 37 * properties from a Unicode data file. One instance of class UnicodeSpec 38 * holds a decoded version of one line of the data file. The file may 39 * be obtained from www.unicode.org. The method readSpecFile returns an array 40 * of UnicodeSpec objects. 41 * 42 * @author Guy Steele 43 * @author John O'Conner 44 */ 45 46 public class UnicodeSpec { 47 UnicodeSpec()48 public UnicodeSpec() { 49 this(0xffff); 50 } 51 UnicodeSpec(int codePoint)52 public UnicodeSpec(int codePoint) { 53 this.codePoint = codePoint; 54 generalCategory = UNASSIGNED; 55 bidiCategory = DIRECTIONALITY_UNDEFINED; 56 mirrored = false; 57 titleMap = 0xFFFF; 58 upperMap = 0xFFFF; 59 lowerMap = 0xFFFF; 60 decimalValue = -1; 61 digitValue = -1; 62 numericValue = ""; 63 oldName = null; 64 comment = null; 65 name = null; 66 } 67 toString()68 public String toString() { 69 StringBuffer result = new StringBuffer(hex6(codePoint)); 70 if (getUpperMap() != 0xffff) { 71 result.append(", upper=").append(hex6(upperMap)); 72 } 73 if (getLowerMap() != 0xffff) { 74 result.append(", lower=").append(hex6(lowerMap)); 75 } 76 if (getTitleMap() != 0xffff) { 77 result.append(", title=").append(hex6(titleMap)); 78 } 79 return result.toString(); 80 } 81 hex4(int n)82 static String hex4(int n) { 83 String q = Long.toHexString(n & 0xFFFF).toUpperCase(); 84 return "0000".substring(Math.min(4, q.length())) + q; 85 } 86 hex6(int n)87 static String hex6(int n) { 88 String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase(); 89 return "000000".substring(Math.min(6, str.length())) + str; 90 91 } 92 93 94 /** 95 * Given one line of a Unicode data file as a String, parse the line 96 * and return a UnicodeSpec object that contains the same character information. 97 * 98 * @param s a line of the Unicode data file to be parsed 99 * @return a UnicodeSpec object, or null if the parsing process failed for some reason 100 */ parse(String s)101 public static UnicodeSpec parse(String s) { 102 UnicodeSpec spec = null; 103 String[] tokens = null; 104 105 try { 106 tokens = tokenSeparator.split(s, REQUIRED_FIELDS); 107 spec = new UnicodeSpec(); 108 spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE])); 109 spec.setName(parseName(tokens[FIELD_NAME])); 110 spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY])); 111 spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI])); 112 spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS])); 113 spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION])); 114 spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL])); 115 spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT])); 116 spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC])); 117 spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED])); 118 spec.setOldName(parseOldName(tokens[FIELD_OLDNAME])); 119 spec.setComment(parseComment(tokens[FIELD_COMMENT])); 120 spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE])); 121 spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE])); 122 spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE])); 123 } 124 catch(Exception e) { 125 spec = null; 126 System.out.println("Error parsing spec line."); 127 } 128 return spec; 129 } 130 131 /** 132 * Parse the codePoint attribute for a Unicode character. If the parse succeeds, 133 * the codePoint field of this UnicodeSpec object is updated and false is returned. 134 * 135 * The codePoint attribute should be a four-digit hexadecimal integer. 136 * 137 * @param s the codePoint attribute extracted from a line of the Unicode data file 138 * @return code point if successful 139 * @exception NumberFormatException if unable to parse argument 140 */ parseCodePoint(String s)141 public static int parseCodePoint(String s) throws NumberFormatException { 142 return Integer.parseInt(s, 16); 143 } 144 parseName(String s)145 public static String parseName(String s) throws Exception { 146 if (s==null) throw new Exception("Cannot parse name."); 147 return s; 148 } 149 parseGeneralCategory(String s)150 public static byte parseGeneralCategory(String s) throws Exception { 151 byte category = GENERAL_CATEGORY_COUNT; 152 153 for (byte x=0; x<generalCategoryList.length; x++) { 154 if (s.equals(generalCategoryList[x][SHORT])) { 155 category = x; 156 break; 157 } 158 } 159 if (category >= GENERAL_CATEGORY_COUNT) { 160 throw new Exception("Could not parse general category."); 161 } 162 return category; 163 } 164 parseBidiCategory(String s)165 public static byte parseBidiCategory(String s) throws Exception { 166 byte category = DIRECTIONALITY_CATEGORY_COUNT; 167 168 for (byte x=0; x<bidiCategoryList.length; x++) { 169 if (s.equals(bidiCategoryList[x][SHORT])) { 170 category = x; 171 break; 172 } 173 } 174 if (category >= DIRECTIONALITY_CATEGORY_COUNT) { 175 throw new Exception("Could not parse bidi category."); 176 } 177 return category; 178 } 179 180 181 /** 182 * Parse the combining attribute for a Unicode character. If there is a combining 183 * attribute and the parse succeeds, then the hasCombining field is set to true, 184 * the combining field of this UnicodeSpec object is updated, and false is returned. 185 * If the combining attribute is an empty string, the parse succeeds but the 186 * hasCombining field is set to false. (and false is returned). 187 * 188 * The combining attribute, if any, should be a nonnegative decimal integer. 189 * 190 * @param s the combining attribute extracted from a line of the Unicode data file 191 * @return the combining class value if any, -1 if property not defined 192 * @exception Exception if can't parse the combining class 193 */ 194 parseCombiningClass(String s)195 public static int parseCombiningClass(String s) throws Exception { 196 int combining = -1; 197 if (s.length()>0) { 198 combining = Integer.parseInt(s, 10); 199 } 200 return combining; 201 } 202 203 /** 204 * Parse the decomposition attribute for a Unicode character. If the parse succeeds, 205 * the decomposition field of this UnicodeSpec object is updated and false is returned. 206 * 207 * The decomposition attribute is complicated; for now, it is treated as a string. 208 * 209 * @param s the decomposition attribute extracted from a line of the Unicode data file 210 * @return true if the parse failed; otherwise false 211 */ 212 parseDecomposition(String s)213 public static String parseDecomposition(String s) throws Exception { 214 if (s==null) throw new Exception("Cannot parse decomposition."); 215 return s; 216 } 217 218 219 /** 220 * Parse the decimal value attribute for a Unicode character. If there is a decimal value 221 * attribute and the parse succeeds, then the hasDecimalValue field is set to true, 222 * the decimalValue field of this UnicodeSpec object is updated, and false is returned. 223 * If the decimal value attribute is an empty string, the parse succeeds but the 224 * hasDecimalValue field is set to false. (and false is returned). 225 * 226 * The decimal value attribute, if any, should be a nonnegative decimal integer. 227 * 228 * @param s the decimal value attribute extracted from a line of the Unicode data file 229 * @return the decimal value as an int, -1 if no decimal value defined 230 * @exception NumberFormatException if the parse fails 231 */ parseDecimalValue(String s)232 public static int parseDecimalValue(String s) throws NumberFormatException { 233 int value = -1; 234 235 if (s.length() > 0) { 236 value = Integer.parseInt(s, 10); 237 } 238 return value; 239 } 240 241 /** 242 * Parse the digit value attribute for a Unicode character. If there is a digit value 243 * attribute and the parse succeeds, then the hasDigitValue field is set to true, 244 * the digitValue field of this UnicodeSpec object is updated, and false is returned. 245 * If the digit value attribute is an empty string, the parse succeeds but the 246 * hasDigitValue field is set to false. (and false is returned). 247 * 248 * The digit value attribute, if any, should be a nonnegative decimal integer. 249 * 250 * @param s the digit value attribute extracted from a line of the Unicode data file 251 * @return the digit value as an non-negative int, or -1 if no digit property defined 252 * @exception NumberFormatException if the parse fails 253 */ parseDigitValue(String s)254 public static int parseDigitValue(String s) throws NumberFormatException { 255 int value = -1; 256 257 if (s.length() > 0) { 258 value = Integer.parseInt(s, 10); 259 } 260 return value; 261 } 262 parseNumericValue(String s)263 public static String parseNumericValue(String s) throws Exception { 264 if (s == null) throw new Exception("Cannot parse numeric value."); 265 return s; 266 } 267 parseComment(String s)268 public static String parseComment(String s) throws Exception { 269 if (s == null) throw new Exception("Cannot parse comment."); 270 return s; 271 } 272 parseMirrored(String s)273 public static boolean parseMirrored(String s) throws Exception { 274 boolean mirrored; 275 if (s.length() == 1) { 276 if (s.charAt(0) == 'Y') {mirrored = true;} 277 else if (s.charAt(0) == 'N') {mirrored = false;} 278 else {throw new Exception("Cannot parse mirrored property.");} 279 } 280 else { throw new Exception("Cannot parse mirrored property.");} 281 return mirrored; 282 } 283 parseOldName(String s)284 public static String parseOldName(String s) throws Exception { 285 if (s == null) throw new Exception("Cannot parse old name"); 286 return s; 287 } 288 289 /** 290 * Parse the uppercase mapping attribute for a Unicode character. If there is a uppercase 291 * mapping attribute and the parse succeeds, then the hasUpperMap field is set to true, 292 * the upperMap field of this UnicodeSpec object is updated, and false is returned. 293 * If the uppercase mapping attribute is an empty string, the parse succeeds but the 294 * hasUpperMap field is set to false. (and false is returned). 295 * 296 * The uppercase mapping attribute should be a four-digit hexadecimal integer. 297 * 298 * @param s the uppercase mapping attribute extracted from a line of the Unicode data file 299 * @return uppercase char if defined, \uffff otherwise 300 * @exception NumberFormatException if parse fails 301 */ parseUpperMap(String s)302 public static int parseUpperMap(String s) throws NumberFormatException { 303 int upperCase = 0xFFFF; 304 305 if (s.length() >= 4) { 306 upperCase = Integer.parseInt(s, 16); 307 } 308 else if (s.length() != 0) { 309 throw new NumberFormatException(); 310 } 311 return upperCase; 312 } 313 314 /** 315 * Parse the lowercase mapping attribute for a Unicode character. If there is a lowercase 316 * mapping attribute and the parse succeeds, then the hasLowerMap field is set to true, 317 * the lowerMap field of this UnicodeSpec object is updated, and false is returned. 318 * If the lowercase mapping attribute is an empty string, the parse succeeds but the 319 * hasLowerMap field is set to false. (and false is returned). 320 * 321 * The lowercase mapping attribute should be a four-digit hexadecimal integer. 322 * 323 * @param s the lowercase mapping attribute extracted from a line of the Unicode data file 324 * @return lowercase char mapping if defined, \uFFFF otherwise 325 * @exception NumberFormatException if parse fails 326 */ parseLowerMap(String s)327 public static int parseLowerMap(String s) throws NumberFormatException { 328 int lowerCase = 0xFFFF; 329 330 if (s.length() >= 4) { 331 lowerCase = Integer.parseInt(s, 16); 332 } 333 else if (s.length() != 0) { 334 throw new NumberFormatException(); 335 } 336 return lowerCase; 337 } 338 339 /** 340 * Parse the titlecase mapping attribute for a Unicode character. If there is a titlecase 341 * mapping attribute and the parse succeeds, then the hasTitleMap field is set to true, 342 * the titleMap field of this UnicodeSpec object is updated, and false is returned. 343 * If the titlecase mapping attribute is an empty string, the parse succeeds but the 344 * hasTitleMap field is set to false. (and false is returned). 345 * 346 * The titlecase mapping attribute should be a four-digit hexadecimal integer. 347 * 348 * @param s the titlecase mapping attribute extracted from a line of the Unicode data file 349 * @return title case char mapping if defined, \uFFFF otherwise 350 * @exception NumberFormatException if parse fails 351 */ parseTitleMap(String s)352 public static int parseTitleMap(String s) throws NumberFormatException { 353 int titleCase = 0xFFFF; 354 355 if (s.length() >= 4) { 356 titleCase = Integer.parseInt(s, 16); 357 } 358 else if (s.length() != 0) { 359 throw new NumberFormatException(); 360 } 361 return titleCase; 362 } 363 364 /** 365 * Read and parse a Unicode data file. 366 * 367 * @param file a file specifying the Unicode data file to be read 368 * @return an array of UnicodeSpec objects, one for each line of the 369 * Unicode data file that could be successfully parsed as 370 * specifying Unicode character attributes 371 */ 372 readSpecFile(File file, int plane)373 public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException { 374 ArrayList<UnicodeSpec> list = new ArrayList<>(3000); 375 UnicodeSpec[] result = null; 376 int count = 0; 377 BufferedReader f = new BufferedReader(new FileReader(file)); 378 String line = null; 379 loop: 380 while(true) { 381 try { 382 line = f.readLine(); 383 } 384 catch (IOException e) { 385 break loop; 386 } 387 if (line == null) break loop; 388 UnicodeSpec item = parse(line.trim()); 389 int specPlane = item.getCodePoint() >>> 16; 390 if (specPlane < plane) continue; 391 if (specPlane > plane) break; 392 393 if (item != null) { 394 list.add(item); 395 } 396 } 397 result = new UnicodeSpec[list.size()]; 398 list.toArray(result); 399 return result; 400 } 401 setCodePoint(int value)402 void setCodePoint(int value) { 403 codePoint = value; 404 } 405 406 /** 407 * Return the code point in this Unicode specification 408 * @return the char code point representing by the specification 409 */ getCodePoint()410 public int getCodePoint() { 411 return codePoint; 412 } 413 setName(String name)414 void setName(String name) { 415 this.name = name; 416 } 417 getName()418 public String getName() { 419 return name; 420 } 421 setGeneralCategory(byte category)422 void setGeneralCategory(byte category) { 423 generalCategory = category; 424 } 425 getGeneralCategory()426 public byte getGeneralCategory() { 427 return generalCategory; 428 } 429 setBidiCategory(byte category)430 void setBidiCategory(byte category) { 431 bidiCategory = category; 432 } 433 getBidiCategory()434 public byte getBidiCategory() { 435 return bidiCategory; 436 } 437 setCombiningClass(int combiningClass)438 void setCombiningClass(int combiningClass) { 439 this.combiningClass = combiningClass; 440 } 441 getCombiningClass()442 public int getCombiningClass() { 443 return combiningClass; 444 } 445 setDecomposition(String decomposition)446 void setDecomposition(String decomposition) { 447 this.decomposition = decomposition; 448 } 449 getDecomposition()450 public String getDecomposition() { 451 return decomposition; 452 } 453 setDecimalValue(int value)454 void setDecimalValue(int value) { 455 decimalValue = value; 456 } 457 getDecimalValue()458 public int getDecimalValue() { 459 return decimalValue; 460 } 461 isDecimalValue()462 public boolean isDecimalValue() { 463 return decimalValue != -1; 464 } 465 setDigitValue(int value)466 void setDigitValue(int value) { 467 digitValue = value; 468 } 469 getDigitValue()470 public int getDigitValue() { 471 return digitValue; 472 } 473 isDigitValue()474 public boolean isDigitValue() { 475 return digitValue != -1; 476 } 477 setNumericValue(String value)478 void setNumericValue(String value) { 479 numericValue = value; 480 } 481 getNumericValue()482 public String getNumericValue() { 483 return numericValue; 484 } 485 isNumericValue()486 public boolean isNumericValue() { 487 return numericValue.length() > 0; 488 } 489 setMirrored(boolean value)490 void setMirrored(boolean value) { 491 mirrored = value; 492 } 493 isMirrored()494 public boolean isMirrored() { 495 return mirrored; 496 } 497 setOldName(String name)498 void setOldName(String name) { 499 oldName = name; 500 } 501 getOldName()502 public String getOldName() { 503 return oldName; 504 } 505 setComment(String comment)506 void setComment(String comment) { 507 this.comment = comment; 508 } 509 getComment()510 public String getComment() { 511 return comment; 512 } 513 setUpperMap(int ch)514 void setUpperMap(int ch) { 515 upperMap = ch; 516 }; 517 getUpperMap()518 public int getUpperMap() { 519 return upperMap; 520 } 521 hasUpperMap()522 public boolean hasUpperMap() { 523 return upperMap != 0xffff; 524 } 525 setLowerMap(int ch)526 void setLowerMap(int ch) { 527 lowerMap = ch; 528 } 529 getLowerMap()530 public int getLowerMap() { 531 return lowerMap; 532 } 533 hasLowerMap()534 public boolean hasLowerMap() { 535 return lowerMap != 0xffff; 536 } 537 setTitleMap(int ch)538 void setTitleMap(int ch) { 539 titleMap = ch; 540 } 541 getTitleMap()542 public int getTitleMap() { 543 return titleMap; 544 } 545 hasTitleMap()546 public boolean hasTitleMap() { 547 return titleMap != 0xffff; 548 } 549 550 int codePoint; // the characters UTF-32 code value 551 String name; // the ASCII name 552 byte generalCategory; // general category, available via Characte.getType() 553 byte bidiCategory; // available via Character.getBidiType() 554 int combiningClass; // not used in Character 555 String decomposition; // not used in Character 556 int decimalValue; // decimal digit value 557 int digitValue; // not all digits are decimal 558 String numericValue; // numeric value if digit or non-digit 559 boolean mirrored; // 560 String oldName; 561 String comment; 562 int upperMap; 563 int lowerMap; 564 int titleMap; 565 566 // this is the number of fields in one line of the UnicodeData.txt file 567 // each field is separated by a semicolon (a token) 568 static final int REQUIRED_FIELDS = 15; 569 570 /** 571 * General category types 572 * To preserve compatibility, these values cannot be changed 573 */ 574 public static final byte 575 UNASSIGNED = 0, // Cn normative 576 UPPERCASE_LETTER = 1, // Lu normative 577 LOWERCASE_LETTER = 2, // Ll normative 578 TITLECASE_LETTER = 3, // Lt normative 579 MODIFIER_LETTER = 4, // Lm normative 580 OTHER_LETTER = 5, // Lo normative 581 NON_SPACING_MARK = 6, // Mn informative 582 ENCLOSING_MARK = 7, // Me informative 583 COMBINING_SPACING_MARK = 8, // Mc normative 584 DECIMAL_DIGIT_NUMBER = 9, // Nd normative 585 LETTER_NUMBER = 10, // Nl normative 586 OTHER_NUMBER = 11, // No normative 587 SPACE_SEPARATOR = 12, // Zs normative 588 LINE_SEPARATOR = 13, // Zl normative 589 PARAGRAPH_SEPARATOR = 14, // Zp normative 590 CONTROL = 15, // Cc normative 591 FORMAT = 16, // Cf normative 592 // 17 is unused for no apparent reason, 593 // but must preserve forward compatibility 594 PRIVATE_USE = 18, // Co normative 595 SURROGATE = 19, // Cs normative 596 DASH_PUNCTUATION = 20, // Pd informative 597 START_PUNCTUATION = 21, // Ps informative 598 END_PUNCTUATION = 22, // Pe informative 599 CONNECTOR_PUNCTUATION = 23, // Pc informative 600 OTHER_PUNCTUATION = 24, // Po informative 601 MATH_SYMBOL = 25, // Sm informative 602 CURRENCY_SYMBOL = 26, // Sc informative 603 MODIFIER_SYMBOL = 27, // Sk informative 604 OTHER_SYMBOL = 28, // So informative 605 INITIAL_QUOTE_PUNCTUATION = 29, // Pi informative 606 FINAL_QUOTE_PUNCTUATION = 30, // Pf informative 607 608 // this value is only used in the character generation tool 609 // it can change to accommodate the addition of new categories. 610 GENERAL_CATEGORY_COUNT = 31; // sentinel value 611 612 static final byte SHORT = 0, LONG = 1; 613 // general category type strings 614 // NOTE: The order of this category array is dependent on the assignment of 615 // category constants above. We want to access this array using constants above. 616 // [][SHORT] is the SHORT name, [][LONG] is the LONG name 617 static final String[][] generalCategoryList = { 618 {"Cn", "UNASSIGNED"}, 619 {"Lu", "UPPERCASE_LETTER"}, 620 {"Ll", "LOWERCASE_LETTER"}, 621 {"Lt", "TITLECASE_LETTER"}, 622 {"Lm", "MODIFIER_LETTER"}, 623 {"Lo", "OTHER_LETTER"}, 624 {"Mn", "NON_SPACING_MARK"}, 625 {"Me", "ENCLOSING_MARK"}, 626 {"Mc", "COMBINING_SPACING_MARK"}, 627 {"Nd", "DECIMAL_DIGIT_NUMBER"}, 628 {"Nl", "LETTER_NUMBER"}, 629 {"No", "OTHER_NUMBER"}, 630 {"Zs", "SPACE_SEPARATOR"}, 631 {"Zl", "LINE_SEPARATOR"}, 632 {"Zp", "PARAGRAPH_SEPARATOR"}, 633 {"Cc", "CONTROL"}, 634 {"Cf", "FORMAT"}, 635 {"xx", "unused"}, 636 {"Co", "PRIVATE_USE"}, 637 {"Cs", "SURROGATE"}, 638 {"Pd", "DASH_PUNCTUATION"}, 639 {"Ps", "START_PUNCTUATION"}, 640 {"Pe", "END_PUNCTUATION"}, 641 {"Pc", "CONNECTOR_PUNCTUATION"}, 642 {"Po", "OTHER_PUNCTUATION"}, 643 {"Sm", "MATH_SYMBOL"}, 644 {"Sc", "CURRENCY_SYMBOL"}, 645 {"Sk", "MODIFIER_SYMBOL"}, 646 {"So", "OTHER_SYMBOL"}, 647 {"Pi", "INITIAL_QUOTE_PUNCTUATION"}, 648 {"Pf", "FINAL_QUOTE_PUNCTUATION"} 649 }; 650 651 /** 652 * Bidirectional categories 653 */ 654 public static final byte 655 DIRECTIONALITY_UNDEFINED = -1, 656 // Strong category 657 DIRECTIONALITY_LEFT_TO_RIGHT = 0, // L 658 DIRECTIONALITY_RIGHT_TO_LEFT = 1, // R 659 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2, // AL 660 // Weak category 661 DIRECTIONALITY_EUROPEAN_NUMBER = 3, // EN 662 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4, // ES 663 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5, // ET 664 DIRECTIONALITY_ARABIC_NUMBER = 6, // AN 665 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7, // CS 666 DIRECTIONALITY_NONSPACING_MARK = 8, // NSM 667 DIRECTIONALITY_BOUNDARY_NEUTRAL = 9, // BN 668 // Neutral category 669 DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10, // B 670 DIRECTIONALITY_SEGMENT_SEPARATOR = 11, // S 671 DIRECTIONALITY_WHITESPACE = 12, // WS 672 DIRECTIONALITY_OTHER_NEUTRALS = 13, // ON 673 674 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14, // LRE 675 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15, // LRO 676 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16, // RLE 677 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17, // RLO 678 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18, // PDF 679 680 DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19, // LRI 681 DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20, // RLI 682 DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21, // FSI 683 DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22, // PDI 684 685 DIRECTIONALITY_CATEGORY_COUNT = 23; // sentinel value 686 687 // If changes are made to the above bidi category assignments, this 688 // list of bidi category names must be changed to keep their order in synch. 689 // Access this list using the bidi category constants above. 690 static final String[][] bidiCategoryList = { 691 {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"}, 692 {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"}, 693 {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"}, 694 {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"}, 695 {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"}, 696 {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"}, 697 {"AN", "DIRECTIONALITY_ARABIC_NUMBER"}, 698 {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"}, 699 {"NSM", "DIRECTIONALITY_NONSPACING_MARK"}, 700 {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"}, 701 {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"}, 702 {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"}, 703 {"WS", "DIRECTIONALITY_WHITESPACE"}, 704 {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"}, 705 {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"}, 706 {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"}, 707 {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"}, 708 {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"}, 709 {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"}, 710 {"LRI", "DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE"}, 711 {"RLI", "DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE"}, 712 {"FSI", "DIRECTIONALITY_FIRST_STRONG_ISOLATE"}, 713 {"PDI", "DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE"}, 714 715 }; 716 717 // Unicode specification lines have fields in this order. 718 static final byte 719 FIELD_VALUE = 0, 720 FIELD_NAME = 1, 721 FIELD_CATEGORY = 2, 722 FIELD_CLASS = 3, 723 FIELD_BIDI = 4, 724 FIELD_DECOMPOSITION = 5, 725 FIELD_DECIMAL = 6, 726 FIELD_DIGIT = 7, 727 FIELD_NUMERIC = 8, 728 FIELD_MIRRORED = 9, 729 FIELD_OLDNAME = 10, 730 FIELD_COMMENT = 11, 731 FIELD_UPPERCASE = 12, 732 FIELD_LOWERCASE = 13, 733 FIELD_TITLECASE = 14; 734 735 static final Pattern tokenSeparator = Pattern.compile(";"); 736 main(String[] args)737 public static void main(String[] args) { 738 UnicodeSpec[] spec = null; 739 if (args.length == 2 ) { 740 try { 741 File file = new File(args[0]); 742 int plane = Integer.parseInt(args[1]); 743 spec = UnicodeSpec.readSpecFile(file, plane); 744 System.out.println("UnicodeSpec[" + spec.length + "]:"); 745 for (int x=0; x<spec.length; x++) { 746 System.out.println(spec[x].toString()); 747 } 748 } 749 catch(Exception e) { 750 e.printStackTrace(); 751 } 752 } 753 754 } 755 756 } 757