1 /* java.lang.Character -- Wrapper class for char, and Unicode subsets 2 Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 /* 39 * Note: This class must not be merged with Classpath. Gcj uses C-style 40 * arrays (see include/java-chartables.h) to store the Unicode character 41 * database, whereas Classpath uses Java objects (char[] extracted from 42 * String constants) in gnu.java.lang.CharData. Gcj's approach is more 43 * efficient, because there is no vtable or data relocation to worry about. 44 * However, despite the difference in the database interface, the two 45 * versions share identical algorithms. 46 */ 47 48 package java.lang; 49 50 import java.io.Serializable; 51 52 /** 53 * Wrapper class for the primitive char data type. In addition, this class 54 * allows one to retrieve property information and perform transformations 55 * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0. 56 * java.lang.Character is designed to be very dynamic, and as such, it 57 * retrieves information on the Unicode character set from a separate 58 * database, gnu.java.lang.CharData, which can be easily upgraded. 59 * 60 * <p>For predicates, boundaries are used to describe 61 * the set of characters for which the method will return true. 62 * This syntax uses fairly normal regular expression notation. 63 * See 5.13 of the Unicode Standard, Version 3.0, for the 64 * boundary specification. 65 * 66 * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a> 67 * for more information on the Unicode Standard. 68 * 69 * @author Tom Tromey <tromey@cygnus.com> 70 * @author Paul N. Fisher 71 * @author Jochen Hoenicke 72 * @author Eric Blake <ebb9@email.byu.edu> 73 * @since 1.0 74 * @status updated to 1.4 75 */ 76 public final class Character implements Serializable, Comparable 77 { 78 /** 79 * A subset of Unicode blocks. 80 * 81 * @author Paul N. Fisher 82 * @author Eric Blake <ebb9@email.byu.edu> 83 * @since 1.2 84 */ 85 public static class Subset 86 { 87 /** The name of the subset. */ 88 private final String name; 89 90 /** 91 * Construct a new subset of characters. 92 * 93 * @param name the name of the subset 94 * @throws NullPointerException if name is null 95 */ Subset(String name)96 protected Subset(String name) 97 { 98 // Note that name.toString() is name, unless name was null. 99 this.name = name.toString(); 100 } 101 102 /** 103 * Compares two Subsets for equality. This is <code>final</code>, and 104 * restricts the comparison on the <code>==</code> operator, so it returns 105 * true only for the same object. 106 * 107 * @param o the object to compare 108 * @return true if o is this 109 */ equals(Object o)110 public final boolean equals(Object o) 111 { 112 return o == this; 113 } 114 115 /** 116 * Makes the original hashCode of Object final, to be consistent with 117 * equals. 118 * 119 * @return the hash code for this object 120 */ hashCode()121 public final int hashCode() 122 { 123 return super.hashCode(); 124 } 125 126 /** 127 * Returns the name of the subset. 128 * 129 * @return the name 130 */ toString()131 public final String toString() 132 { 133 return name; 134 } 135 } // class Subset 136 137 /** 138 * A family of character subsets in the Unicode specification. A character 139 * is in at most one of these blocks. 140 * 141 * This inner class was generated automatically from 142 * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts. 143 * This Unicode definition file can be found on the 144 * <a href="http://www.unicode.org">http://www.unicode.org</a> website. 145 * JDK 1.4 uses Unicode version 3.0.0. 146 * 147 * @author scripts/unicode-blocks.pl (written by Eric Blake) 148 * @since 1.2 149 */ 150 public static final class UnicodeBlock extends Subset 151 { 152 /** The start of the subset. */ 153 private final char start; 154 155 /** The end of the subset. */ 156 private final char end; 157 158 /** 159 * Constructor for strictly defined blocks. 160 * 161 * @param start the start character of the range 162 * @param end the end character of the range 163 * @param name the block name 164 */ UnicodeBlock(char start, char end, String name)165 private UnicodeBlock(char start, char end, String name) 166 { 167 super(name); 168 this.start = start; 169 this.end = end; 170 } 171 172 /** 173 * Returns the Unicode character block which a character belongs to. 174 * 175 * @param ch the character to look up 176 * @return the set it belongs to, or null if it is not in one 177 */ of(char ch)178 public static UnicodeBlock of(char ch) 179 { 180 // Special case, since SPECIALS contains two ranges. 181 if (ch == '\uFEFF') 182 return SPECIALS; 183 // Simple binary search for the correct block. 184 int low = 0; 185 int hi = sets.length - 1; 186 while (low <= hi) 187 { 188 int mid = (low + hi) >> 1; 189 UnicodeBlock b = sets[mid]; 190 if (ch < b.start) 191 hi = mid - 1; 192 else if (ch > b.end) 193 low = mid + 1; 194 else 195 return b; 196 } 197 return null; 198 } 199 200 /** 201 * Basic Latin. 202 * '\u0000' - '\u007F'. 203 */ 204 public final static UnicodeBlock BASIC_LATIN 205 = new UnicodeBlock('\u0000', '\u007F', 206 "BASIC_LATIN"); 207 208 /** 209 * Latin-1 Supplement. 210 * '\u0080' - '\u00FF'. 211 */ 212 public final static UnicodeBlock LATIN_1_SUPPLEMENT 213 = new UnicodeBlock('\u0080', '\u00FF', 214 "LATIN_1_SUPPLEMENT"); 215 216 /** 217 * Latin Extended-A. 218 * '\u0100' - '\u017F'. 219 */ 220 public final static UnicodeBlock LATIN_EXTENDED_A 221 = new UnicodeBlock('\u0100', '\u017F', 222 "LATIN_EXTENDED_A"); 223 224 /** 225 * Latin Extended-B. 226 * '\u0180' - '\u024F'. 227 */ 228 public final static UnicodeBlock LATIN_EXTENDED_B 229 = new UnicodeBlock('\u0180', '\u024F', 230 "LATIN_EXTENDED_B"); 231 232 /** 233 * IPA Extensions. 234 * '\u0250' - '\u02AF'. 235 */ 236 public final static UnicodeBlock IPA_EXTENSIONS 237 = new UnicodeBlock('\u0250', '\u02AF', 238 "IPA_EXTENSIONS"); 239 240 /** 241 * Spacing Modifier Letters. 242 * '\u02B0' - '\u02FF'. 243 */ 244 public final static UnicodeBlock SPACING_MODIFIER_LETTERS 245 = new UnicodeBlock('\u02B0', '\u02FF', 246 "SPACING_MODIFIER_LETTERS"); 247 248 /** 249 * Combining Diacritical Marks. 250 * '\u0300' - '\u036F'. 251 */ 252 public final static UnicodeBlock COMBINING_DIACRITICAL_MARKS 253 = new UnicodeBlock('\u0300', '\u036F', 254 "COMBINING_DIACRITICAL_MARKS"); 255 256 /** 257 * Greek. 258 * '\u0370' - '\u03FF'. 259 */ 260 public final static UnicodeBlock GREEK 261 = new UnicodeBlock('\u0370', '\u03FF', 262 "GREEK"); 263 264 /** 265 * Cyrillic. 266 * '\u0400' - '\u04FF'. 267 */ 268 public final static UnicodeBlock CYRILLIC 269 = new UnicodeBlock('\u0400', '\u04FF', 270 "CYRILLIC"); 271 272 /** 273 * Armenian. 274 * '\u0530' - '\u058F'. 275 */ 276 public final static UnicodeBlock ARMENIAN 277 = new UnicodeBlock('\u0530', '\u058F', 278 "ARMENIAN"); 279 280 /** 281 * Hebrew. 282 * '\u0590' - '\u05FF'. 283 */ 284 public final static UnicodeBlock HEBREW 285 = new UnicodeBlock('\u0590', '\u05FF', 286 "HEBREW"); 287 288 /** 289 * Arabic. 290 * '\u0600' - '\u06FF'. 291 */ 292 public final static UnicodeBlock ARABIC 293 = new UnicodeBlock('\u0600', '\u06FF', 294 "ARABIC"); 295 296 /** 297 * Syriac. 298 * '\u0700' - '\u074F'. 299 * @since 1.4 300 */ 301 public final static UnicodeBlock SYRIAC 302 = new UnicodeBlock('\u0700', '\u074F', 303 "SYRIAC"); 304 305 /** 306 * Thaana. 307 * '\u0780' - '\u07BF'. 308 * @since 1.4 309 */ 310 public final static UnicodeBlock THAANA 311 = new UnicodeBlock('\u0780', '\u07BF', 312 "THAANA"); 313 314 /** 315 * Devanagari. 316 * '\u0900' - '\u097F'. 317 */ 318 public final static UnicodeBlock DEVANAGARI 319 = new UnicodeBlock('\u0900', '\u097F', 320 "DEVANAGARI"); 321 322 /** 323 * Bengali. 324 * '\u0980' - '\u09FF'. 325 */ 326 public final static UnicodeBlock BENGALI 327 = new UnicodeBlock('\u0980', '\u09FF', 328 "BENGALI"); 329 330 /** 331 * Gurmukhi. 332 * '\u0A00' - '\u0A7F'. 333 */ 334 public final static UnicodeBlock GURMUKHI 335 = new UnicodeBlock('\u0A00', '\u0A7F', 336 "GURMUKHI"); 337 338 /** 339 * Gujarati. 340 * '\u0A80' - '\u0AFF'. 341 */ 342 public final static UnicodeBlock GUJARATI 343 = new UnicodeBlock('\u0A80', '\u0AFF', 344 "GUJARATI"); 345 346 /** 347 * Oriya. 348 * '\u0B00' - '\u0B7F'. 349 */ 350 public final static UnicodeBlock ORIYA 351 = new UnicodeBlock('\u0B00', '\u0B7F', 352 "ORIYA"); 353 354 /** 355 * Tamil. 356 * '\u0B80' - '\u0BFF'. 357 */ 358 public final static UnicodeBlock TAMIL 359 = new UnicodeBlock('\u0B80', '\u0BFF', 360 "TAMIL"); 361 362 /** 363 * Telugu. 364 * '\u0C00' - '\u0C7F'. 365 */ 366 public final static UnicodeBlock TELUGU 367 = new UnicodeBlock('\u0C00', '\u0C7F', 368 "TELUGU"); 369 370 /** 371 * Kannada. 372 * '\u0C80' - '\u0CFF'. 373 */ 374 public final static UnicodeBlock KANNADA 375 = new UnicodeBlock('\u0C80', '\u0CFF', 376 "KANNADA"); 377 378 /** 379 * Malayalam. 380 * '\u0D00' - '\u0D7F'. 381 */ 382 public final static UnicodeBlock MALAYALAM 383 = new UnicodeBlock('\u0D00', '\u0D7F', 384 "MALAYALAM"); 385 386 /** 387 * Sinhala. 388 * '\u0D80' - '\u0DFF'. 389 * @since 1.4 390 */ 391 public final static UnicodeBlock SINHALA 392 = new UnicodeBlock('\u0D80', '\u0DFF', 393 "SINHALA"); 394 395 /** 396 * Thai. 397 * '\u0E00' - '\u0E7F'. 398 */ 399 public final static UnicodeBlock THAI 400 = new UnicodeBlock('\u0E00', '\u0E7F', 401 "THAI"); 402 403 /** 404 * Lao. 405 * '\u0E80' - '\u0EFF'. 406 */ 407 public final static UnicodeBlock LAO 408 = new UnicodeBlock('\u0E80', '\u0EFF', 409 "LAO"); 410 411 /** 412 * Tibetan. 413 * '\u0F00' - '\u0FFF'. 414 */ 415 public final static UnicodeBlock TIBETAN 416 = new UnicodeBlock('\u0F00', '\u0FFF', 417 "TIBETAN"); 418 419 /** 420 * Myanmar. 421 * '\u1000' - '\u109F'. 422 * @since 1.4 423 */ 424 public final static UnicodeBlock MYANMAR 425 = new UnicodeBlock('\u1000', '\u109F', 426 "MYANMAR"); 427 428 /** 429 * Georgian. 430 * '\u10A0' - '\u10FF'. 431 */ 432 public final static UnicodeBlock GEORGIAN 433 = new UnicodeBlock('\u10A0', '\u10FF', 434 "GEORGIAN"); 435 436 /** 437 * Hangul Jamo. 438 * '\u1100' - '\u11FF'. 439 */ 440 public final static UnicodeBlock HANGUL_JAMO 441 = new UnicodeBlock('\u1100', '\u11FF', 442 "HANGUL_JAMO"); 443 444 /** 445 * Ethiopic. 446 * '\u1200' - '\u137F'. 447 * @since 1.4 448 */ 449 public final static UnicodeBlock ETHIOPIC 450 = new UnicodeBlock('\u1200', '\u137F', 451 "ETHIOPIC"); 452 453 /** 454 * Cherokee. 455 * '\u13A0' - '\u13FF'. 456 * @since 1.4 457 */ 458 public final static UnicodeBlock CHEROKEE 459 = new UnicodeBlock('\u13A0', '\u13FF', 460 "CHEROKEE"); 461 462 /** 463 * Unified Canadian Aboriginal Syllabics. 464 * '\u1400' - '\u167F'. 465 * @since 1.4 466 */ 467 public final static UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 468 = new UnicodeBlock('\u1400', '\u167F', 469 "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 470 471 /** 472 * Ogham. 473 * '\u1680' - '\u169F'. 474 * @since 1.4 475 */ 476 public final static UnicodeBlock OGHAM 477 = new UnicodeBlock('\u1680', '\u169F', 478 "OGHAM"); 479 480 /** 481 * Runic. 482 * '\u16A0' - '\u16FF'. 483 * @since 1.4 484 */ 485 public final static UnicodeBlock RUNIC 486 = new UnicodeBlock('\u16A0', '\u16FF', 487 "RUNIC"); 488 489 /** 490 * Khmer. 491 * '\u1780' - '\u17FF'. 492 * @since 1.4 493 */ 494 public final static UnicodeBlock KHMER 495 = new UnicodeBlock('\u1780', '\u17FF', 496 "KHMER"); 497 498 /** 499 * Mongolian. 500 * '\u1800' - '\u18AF'. 501 * @since 1.4 502 */ 503 public final static UnicodeBlock MONGOLIAN 504 = new UnicodeBlock('\u1800', '\u18AF', 505 "MONGOLIAN"); 506 507 /** 508 * Latin Extended Additional. 509 * '\u1E00' - '\u1EFF'. 510 */ 511 public final static UnicodeBlock LATIN_EXTENDED_ADDITIONAL 512 = new UnicodeBlock('\u1E00', '\u1EFF', 513 "LATIN_EXTENDED_ADDITIONAL"); 514 515 /** 516 * Greek Extended. 517 * '\u1F00' - '\u1FFF'. 518 */ 519 public final static UnicodeBlock GREEK_EXTENDED 520 = new UnicodeBlock('\u1F00', '\u1FFF', 521 "GREEK_EXTENDED"); 522 523 /** 524 * General Punctuation. 525 * '\u2000' - '\u206F'. 526 */ 527 public final static UnicodeBlock GENERAL_PUNCTUATION 528 = new UnicodeBlock('\u2000', '\u206F', 529 "GENERAL_PUNCTUATION"); 530 531 /** 532 * Superscripts and Subscripts. 533 * '\u2070' - '\u209F'. 534 */ 535 public final static UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 536 = new UnicodeBlock('\u2070', '\u209F', 537 "SUPERSCRIPTS_AND_SUBSCRIPTS"); 538 539 /** 540 * Currency Symbols. 541 * '\u20A0' - '\u20CF'. 542 */ 543 public final static UnicodeBlock CURRENCY_SYMBOLS 544 = new UnicodeBlock('\u20A0', '\u20CF', 545 "CURRENCY_SYMBOLS"); 546 547 /** 548 * Combining Marks for Symbols. 549 * '\u20D0' - '\u20FF'. 550 */ 551 public final static UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 552 = new UnicodeBlock('\u20D0', '\u20FF', 553 "COMBINING_MARKS_FOR_SYMBOLS"); 554 555 /** 556 * Letterlike Symbols. 557 * '\u2100' - '\u214F'. 558 */ 559 public final static UnicodeBlock LETTERLIKE_SYMBOLS 560 = new UnicodeBlock('\u2100', '\u214F', 561 "LETTERLIKE_SYMBOLS"); 562 563 /** 564 * Number Forms. 565 * '\u2150' - '\u218F'. 566 */ 567 public final static UnicodeBlock NUMBER_FORMS 568 = new UnicodeBlock('\u2150', '\u218F', 569 "NUMBER_FORMS"); 570 571 /** 572 * Arrows. 573 * '\u2190' - '\u21FF'. 574 */ 575 public final static UnicodeBlock ARROWS 576 = new UnicodeBlock('\u2190', '\u21FF', 577 "ARROWS"); 578 579 /** 580 * Mathematical Operators. 581 * '\u2200' - '\u22FF'. 582 */ 583 public final static UnicodeBlock MATHEMATICAL_OPERATORS 584 = new UnicodeBlock('\u2200', '\u22FF', 585 "MATHEMATICAL_OPERATORS"); 586 587 /** 588 * Miscellaneous Technical. 589 * '\u2300' - '\u23FF'. 590 */ 591 public final static UnicodeBlock MISCELLANEOUS_TECHNICAL 592 = new UnicodeBlock('\u2300', '\u23FF', 593 "MISCELLANEOUS_TECHNICAL"); 594 595 /** 596 * Control Pictures. 597 * '\u2400' - '\u243F'. 598 */ 599 public final static UnicodeBlock CONTROL_PICTURES 600 = new UnicodeBlock('\u2400', '\u243F', 601 "CONTROL_PICTURES"); 602 603 /** 604 * Optical Character Recognition. 605 * '\u2440' - '\u245F'. 606 */ 607 public final static UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 608 = new UnicodeBlock('\u2440', '\u245F', 609 "OPTICAL_CHARACTER_RECOGNITION"); 610 611 /** 612 * Enclosed Alphanumerics. 613 * '\u2460' - '\u24FF'. 614 */ 615 public final static UnicodeBlock ENCLOSED_ALPHANUMERICS 616 = new UnicodeBlock('\u2460', '\u24FF', 617 "ENCLOSED_ALPHANUMERICS"); 618 619 /** 620 * Box Drawing. 621 * '\u2500' - '\u257F'. 622 */ 623 public final static UnicodeBlock BOX_DRAWING 624 = new UnicodeBlock('\u2500', '\u257F', 625 "BOX_DRAWING"); 626 627 /** 628 * Block Elements. 629 * '\u2580' - '\u259F'. 630 */ 631 public final static UnicodeBlock BLOCK_ELEMENTS 632 = new UnicodeBlock('\u2580', '\u259F', 633 "BLOCK_ELEMENTS"); 634 635 /** 636 * Geometric Shapes. 637 * '\u25A0' - '\u25FF'. 638 */ 639 public final static UnicodeBlock GEOMETRIC_SHAPES 640 = new UnicodeBlock('\u25A0', '\u25FF', 641 "GEOMETRIC_SHAPES"); 642 643 /** 644 * Miscellaneous Symbols. 645 * '\u2600' - '\u26FF'. 646 */ 647 public final static UnicodeBlock MISCELLANEOUS_SYMBOLS 648 = new UnicodeBlock('\u2600', '\u26FF', 649 "MISCELLANEOUS_SYMBOLS"); 650 651 /** 652 * Dingbats. 653 * '\u2700' - '\u27BF'. 654 */ 655 public final static UnicodeBlock DINGBATS 656 = new UnicodeBlock('\u2700', '\u27BF', 657 "DINGBATS"); 658 659 /** 660 * Braille Patterns. 661 * '\u2800' - '\u28FF'. 662 * @since 1.4 663 */ 664 public final static UnicodeBlock BRAILLE_PATTERNS 665 = new UnicodeBlock('\u2800', '\u28FF', 666 "BRAILLE_PATTERNS"); 667 668 /** 669 * CJK Radicals Supplement. 670 * '\u2E80' - '\u2EFF'. 671 * @since 1.4 672 */ 673 public final static UnicodeBlock CJK_RADICALS_SUPPLEMENT 674 = new UnicodeBlock('\u2E80', '\u2EFF', 675 "CJK_RADICALS_SUPPLEMENT"); 676 677 /** 678 * Kangxi Radicals. 679 * '\u2F00' - '\u2FDF'. 680 * @since 1.4 681 */ 682 public final static UnicodeBlock KANGXI_RADICALS 683 = new UnicodeBlock('\u2F00', '\u2FDF', 684 "KANGXI_RADICALS"); 685 686 /** 687 * Ideographic Description Characters. 688 * '\u2FF0' - '\u2FFF'. 689 * @since 1.4 690 */ 691 public final static UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 692 = new UnicodeBlock('\u2FF0', '\u2FFF', 693 "IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 694 695 /** 696 * CJK Symbols and Punctuation. 697 * '\u3000' - '\u303F'. 698 */ 699 public final static UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 700 = new UnicodeBlock('\u3000', '\u303F', 701 "CJK_SYMBOLS_AND_PUNCTUATION"); 702 703 /** 704 * Hiragana. 705 * '\u3040' - '\u309F'. 706 */ 707 public final static UnicodeBlock HIRAGANA 708 = new UnicodeBlock('\u3040', '\u309F', 709 "HIRAGANA"); 710 711 /** 712 * Katakana. 713 * '\u30A0' - '\u30FF'. 714 */ 715 public final static UnicodeBlock KATAKANA 716 = new UnicodeBlock('\u30A0', '\u30FF', 717 "KATAKANA"); 718 719 /** 720 * Bopomofo. 721 * '\u3100' - '\u312F'. 722 */ 723 public final static UnicodeBlock BOPOMOFO 724 = new UnicodeBlock('\u3100', '\u312F', 725 "BOPOMOFO"); 726 727 /** 728 * Hangul Compatibility Jamo. 729 * '\u3130' - '\u318F'. 730 */ 731 public final static UnicodeBlock HANGUL_COMPATIBILITY_JAMO 732 = new UnicodeBlock('\u3130', '\u318F', 733 "HANGUL_COMPATIBILITY_JAMO"); 734 735 /** 736 * Kanbun. 737 * '\u3190' - '\u319F'. 738 */ 739 public final static UnicodeBlock KANBUN 740 = new UnicodeBlock('\u3190', '\u319F', 741 "KANBUN"); 742 743 /** 744 * Bopomofo Extended. 745 * '\u31A0' - '\u31BF'. 746 * @since 1.4 747 */ 748 public final static UnicodeBlock BOPOMOFO_EXTENDED 749 = new UnicodeBlock('\u31A0', '\u31BF', 750 "BOPOMOFO_EXTENDED"); 751 752 /** 753 * Enclosed CJK Letters and Months. 754 * '\u3200' - '\u32FF'. 755 */ 756 public final static UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 757 = new UnicodeBlock('\u3200', '\u32FF', 758 "ENCLOSED_CJK_LETTERS_AND_MONTHS"); 759 760 /** 761 * CJK Compatibility. 762 * '\u3300' - '\u33FF'. 763 */ 764 public final static UnicodeBlock CJK_COMPATIBILITY 765 = new UnicodeBlock('\u3300', '\u33FF', 766 "CJK_COMPATIBILITY"); 767 768 /** 769 * CJK Unified Ideographs Extension A. 770 * '\u3400' - '\u4DB5'. 771 * @since 1.4 772 */ 773 public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 774 = new UnicodeBlock('\u3400', '\u4DB5', 775 "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 776 777 /** 778 * CJK Unified Ideographs. 779 * '\u4E00' - '\u9FFF'. 780 */ 781 public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 782 = new UnicodeBlock('\u4E00', '\u9FFF', 783 "CJK_UNIFIED_IDEOGRAPHS"); 784 785 /** 786 * Yi Syllables. 787 * '\uA000' - '\uA48F'. 788 * @since 1.4 789 */ 790 public final static UnicodeBlock YI_SYLLABLES 791 = new UnicodeBlock('\uA000', '\uA48F', 792 "YI_SYLLABLES"); 793 794 /** 795 * Yi Radicals. 796 * '\uA490' - '\uA4CF'. 797 * @since 1.4 798 */ 799 public final static UnicodeBlock YI_RADICALS 800 = new UnicodeBlock('\uA490', '\uA4CF', 801 "YI_RADICALS"); 802 803 /** 804 * Hangul Syllables. 805 * '\uAC00' - '\uD7A3'. 806 */ 807 public final static UnicodeBlock HANGUL_SYLLABLES 808 = new UnicodeBlock('\uAC00', '\uD7A3', 809 "HANGUL_SYLLABLES"); 810 811 /** 812 * Surrogates Area. 813 * '\uD800' - '\uDFFF'. 814 */ 815 public final static UnicodeBlock SURROGATES_AREA 816 = new UnicodeBlock('\uD800', '\uDFFF', 817 "SURROGATES_AREA"); 818 819 /** 820 * Private Use Area. 821 * '\uE000' - '\uF8FF'. 822 */ 823 public final static UnicodeBlock PRIVATE_USE_AREA 824 = new UnicodeBlock('\uE000', '\uF8FF', 825 "PRIVATE_USE_AREA"); 826 827 /** 828 * CJK Compatibility Ideographs. 829 * '\uF900' - '\uFAFF'. 830 */ 831 public final static UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 832 = new UnicodeBlock('\uF900', '\uFAFF', 833 "CJK_COMPATIBILITY_IDEOGRAPHS"); 834 835 /** 836 * Alphabetic Presentation Forms. 837 * '\uFB00' - '\uFB4F'. 838 */ 839 public final static UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 840 = new UnicodeBlock('\uFB00', '\uFB4F', 841 "ALPHABETIC_PRESENTATION_FORMS"); 842 843 /** 844 * Arabic Presentation Forms-A. 845 * '\uFB50' - '\uFDFF'. 846 */ 847 public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_A 848 = new UnicodeBlock('\uFB50', '\uFDFF', 849 "ARABIC_PRESENTATION_FORMS_A"); 850 851 /** 852 * Combining Half Marks. 853 * '\uFE20' - '\uFE2F'. 854 */ 855 public final static UnicodeBlock COMBINING_HALF_MARKS 856 = new UnicodeBlock('\uFE20', '\uFE2F', 857 "COMBINING_HALF_MARKS"); 858 859 /** 860 * CJK Compatibility Forms. 861 * '\uFE30' - '\uFE4F'. 862 */ 863 public final static UnicodeBlock CJK_COMPATIBILITY_FORMS 864 = new UnicodeBlock('\uFE30', '\uFE4F', 865 "CJK_COMPATIBILITY_FORMS"); 866 867 /** 868 * Small Form Variants. 869 * '\uFE50' - '\uFE6F'. 870 */ 871 public final static UnicodeBlock SMALL_FORM_VARIANTS 872 = new UnicodeBlock('\uFE50', '\uFE6F', 873 "SMALL_FORM_VARIANTS"); 874 875 /** 876 * Arabic Presentation Forms-B. 877 * '\uFE70' - '\uFEFE'. 878 */ 879 public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_B 880 = new UnicodeBlock('\uFE70', '\uFEFE', 881 "ARABIC_PRESENTATION_FORMS_B"); 882 883 /** 884 * Halfwidth and Fullwidth Forms. 885 * '\uFF00' - '\uFFEF'. 886 */ 887 public final static UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 888 = new UnicodeBlock('\uFF00', '\uFFEF', 889 "HALFWIDTH_AND_FULLWIDTH_FORMS"); 890 891 /** 892 * Specials. 893 * '\uFEFF', '\uFFF0' - '\uFFFD'. 894 */ 895 public final static UnicodeBlock SPECIALS 896 = new UnicodeBlock('\uFFF0', '\uFFFD', 897 "SPECIALS"); 898 899 /** 900 * The defined subsets. 901 */ 902 private static final UnicodeBlock sets[] = { 903 BASIC_LATIN, 904 LATIN_1_SUPPLEMENT, 905 LATIN_EXTENDED_A, 906 LATIN_EXTENDED_B, 907 IPA_EXTENSIONS, 908 SPACING_MODIFIER_LETTERS, 909 COMBINING_DIACRITICAL_MARKS, 910 GREEK, 911 CYRILLIC, 912 ARMENIAN, 913 HEBREW, 914 ARABIC, 915 SYRIAC, 916 THAANA, 917 DEVANAGARI, 918 BENGALI, 919 GURMUKHI, 920 GUJARATI, 921 ORIYA, 922 TAMIL, 923 TELUGU, 924 KANNADA, 925 MALAYALAM, 926 SINHALA, 927 THAI, 928 LAO, 929 TIBETAN, 930 MYANMAR, 931 GEORGIAN, 932 HANGUL_JAMO, 933 ETHIOPIC, 934 CHEROKEE, 935 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 936 OGHAM, 937 RUNIC, 938 KHMER, 939 MONGOLIAN, 940 LATIN_EXTENDED_ADDITIONAL, 941 GREEK_EXTENDED, 942 GENERAL_PUNCTUATION, 943 SUPERSCRIPTS_AND_SUBSCRIPTS, 944 CURRENCY_SYMBOLS, 945 COMBINING_MARKS_FOR_SYMBOLS, 946 LETTERLIKE_SYMBOLS, 947 NUMBER_FORMS, 948 ARROWS, 949 MATHEMATICAL_OPERATORS, 950 MISCELLANEOUS_TECHNICAL, 951 CONTROL_PICTURES, 952 OPTICAL_CHARACTER_RECOGNITION, 953 ENCLOSED_ALPHANUMERICS, 954 BOX_DRAWING, 955 BLOCK_ELEMENTS, 956 GEOMETRIC_SHAPES, 957 MISCELLANEOUS_SYMBOLS, 958 DINGBATS, 959 BRAILLE_PATTERNS, 960 CJK_RADICALS_SUPPLEMENT, 961 KANGXI_RADICALS, 962 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 963 CJK_SYMBOLS_AND_PUNCTUATION, 964 HIRAGANA, 965 KATAKANA, 966 BOPOMOFO, 967 HANGUL_COMPATIBILITY_JAMO, 968 KANBUN, 969 BOPOMOFO_EXTENDED, 970 ENCLOSED_CJK_LETTERS_AND_MONTHS, 971 CJK_COMPATIBILITY, 972 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 973 CJK_UNIFIED_IDEOGRAPHS, 974 YI_SYLLABLES, 975 YI_RADICALS, 976 HANGUL_SYLLABLES, 977 SURROGATES_AREA, 978 PRIVATE_USE_AREA, 979 CJK_COMPATIBILITY_IDEOGRAPHS, 980 ALPHABETIC_PRESENTATION_FORMS, 981 ARABIC_PRESENTATION_FORMS_A, 982 COMBINING_HALF_MARKS, 983 CJK_COMPATIBILITY_FORMS, 984 SMALL_FORM_VARIANTS, 985 ARABIC_PRESENTATION_FORMS_B, 986 HALFWIDTH_AND_FULLWIDTH_FORMS, 987 SPECIALS, 988 }; 989 } // class UnicodeBlock 990 991 /** 992 * The immutable value of this Character. 993 * 994 * @serial the value of this Character 995 */ 996 private final char value; 997 998 /** 999 * Compatible with JDK 1.0+. 1000 */ 1001 private static final long serialVersionUID = 3786198910865385080L; 1002 1003 /** 1004 * Smallest value allowed for radix arguments in Java. This value is 2. 1005 * 1006 * @see #digit(char, int) 1007 * @see #forDigit(int, int) 1008 * @see Integer#toString(int, int) 1009 * @see Integer#valueOf(String) 1010 */ 1011 public static final int MIN_RADIX = 2; 1012 1013 /** 1014 * Largest value allowed for radix arguments in Java. This value is 36. 1015 * 1016 * @see #digit(char, int) 1017 * @see #forDigit(int, int) 1018 * @see Integer#toString(int, int) 1019 * @see Integer#valueOf(String) 1020 */ 1021 public static final int MAX_RADIX = 36; 1022 1023 /** 1024 * The minimum value the char data type can hold. 1025 * This value is <code>'\\u0000'</code>. 1026 */ 1027 public static final char MIN_VALUE = '\u0000'; 1028 1029 /** 1030 * The maximum value the char data type can hold. 1031 * This value is <code>'\\uFFFF'</code>. 1032 */ 1033 public static final char MAX_VALUE = '\uFFFF'; 1034 1035 /** 1036 * Class object representing the primitive char data type. 1037 * 1038 * @since 1.1 1039 */ 1040 public static final Class TYPE = VMClassLoader.getPrimitiveClass('C'); 1041 1042 /** 1043 * Lu = Letter, Uppercase (Informative). 1044 * 1045 * @since 1.1 1046 */ 1047 public static final byte UPPERCASE_LETTER = 1; 1048 1049 /** 1050 * Ll = Letter, Lowercase (Informative). 1051 * 1052 * @since 1.1 1053 */ 1054 public static final byte LOWERCASE_LETTER = 2; 1055 1056 /** 1057 * Lt = Letter, Titlecase (Informative). 1058 * 1059 * @since 1.1 1060 */ 1061 public static final byte TITLECASE_LETTER = 3; 1062 1063 /** 1064 * Mn = Mark, Non-Spacing (Normative). 1065 * 1066 * @since 1.1 1067 */ 1068 public static final byte NON_SPACING_MARK = 6; 1069 1070 /** 1071 * Mc = Mark, Spacing Combining (Normative). 1072 * 1073 * @since 1.1 1074 */ 1075 public static final byte COMBINING_SPACING_MARK = 8; 1076 1077 /** 1078 * Me = Mark, Enclosing (Normative). 1079 * 1080 * @since 1.1 1081 */ 1082 public static final byte ENCLOSING_MARK = 7; 1083 1084 /** 1085 * Nd = Number, Decimal Digit (Normative). 1086 * 1087 * @since 1.1 1088 */ 1089 public static final byte DECIMAL_DIGIT_NUMBER = 9; 1090 1091 /** 1092 * Nl = Number, Letter (Normative). 1093 * 1094 * @since 1.1 1095 */ 1096 public static final byte LETTER_NUMBER = 10; 1097 1098 /** 1099 * No = Number, Other (Normative). 1100 * 1101 * @since 1.1 1102 */ 1103 public static final byte OTHER_NUMBER = 11; 1104 1105 /** 1106 * Zs = Separator, Space (Normative). 1107 * 1108 * @since 1.1 1109 */ 1110 public static final byte SPACE_SEPARATOR = 12; 1111 1112 /** 1113 * Zl = Separator, Line (Normative). 1114 * 1115 * @since 1.1 1116 */ 1117 public static final byte LINE_SEPARATOR = 13; 1118 1119 /** 1120 * Zp = Separator, Paragraph (Normative). 1121 * 1122 * @since 1.1 1123 */ 1124 public static final byte PARAGRAPH_SEPARATOR = 14; 1125 1126 /** 1127 * Cc = Other, Control (Normative). 1128 * 1129 * @since 1.1 1130 */ 1131 public static final byte CONTROL = 15; 1132 1133 /** 1134 * Cf = Other, Format (Normative). 1135 * 1136 * @since 1.1 1137 */ 1138 public static final byte FORMAT = 16; 1139 1140 /** 1141 * Cs = Other, Surrogate (Normative). 1142 * 1143 * @since 1.1 1144 */ 1145 public static final byte SURROGATE = 19; 1146 1147 /** 1148 * Co = Other, Private Use (Normative). 1149 * 1150 * @since 1.1 1151 */ 1152 public static final byte PRIVATE_USE = 18; 1153 1154 /** 1155 * Cn = Other, Not Assigned (Normative). 1156 * 1157 * @since 1.1 1158 */ 1159 public static final byte UNASSIGNED = 0; 1160 1161 /** 1162 * Lm = Letter, Modifier (Informative). 1163 * 1164 * @since 1.1 1165 */ 1166 public static final byte MODIFIER_LETTER = 4; 1167 1168 /** 1169 * Lo = Letter, Other (Informative). 1170 * 1171 * @since 1.1 1172 */ 1173 public static final byte OTHER_LETTER = 5; 1174 1175 /** 1176 * Pc = Punctuation, Connector (Informative). 1177 * 1178 * @since 1.1 1179 */ 1180 public static final byte CONNECTOR_PUNCTUATION = 23; 1181 1182 /** 1183 * Pd = Punctuation, Dash (Informative). 1184 * 1185 * @since 1.1 1186 */ 1187 public static final byte DASH_PUNCTUATION = 20; 1188 1189 /** 1190 * Ps = Punctuation, Open (Informative). 1191 * 1192 * @since 1.1 1193 */ 1194 public static final byte START_PUNCTUATION = 21; 1195 1196 /** 1197 * Pe = Punctuation, Close (Informative). 1198 * 1199 * @since 1.1 1200 */ 1201 public static final byte END_PUNCTUATION = 22; 1202 1203 /** 1204 * Pi = Punctuation, Initial Quote (Informative). 1205 * 1206 * @since 1.4 1207 */ 1208 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 1209 1210 /** 1211 * Pf = Punctuation, Final Quote (Informative). 1212 * 1213 * @since 1.4 1214 */ 1215 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 1216 1217 /** 1218 * Po = Punctuation, Other (Informative). 1219 * 1220 * @since 1.1 1221 */ 1222 public static final byte OTHER_PUNCTUATION = 24; 1223 1224 /** 1225 * Sm = Symbol, Math (Informative). 1226 * 1227 * @since 1.1 1228 */ 1229 public static final byte MATH_SYMBOL = 25; 1230 1231 /** 1232 * Sc = Symbol, Currency (Informative). 1233 * 1234 * @since 1.1 1235 */ 1236 public static final byte CURRENCY_SYMBOL = 26; 1237 1238 /** 1239 * Sk = Symbol, Modifier (Informative). 1240 * 1241 * @since 1.1 1242 */ 1243 public static final byte MODIFIER_SYMBOL = 27; 1244 1245 /** 1246 * So = Symbol, Other (Informative). 1247 * 1248 * @since 1.1 1249 */ 1250 public static final byte OTHER_SYMBOL = 28; 1251 1252 /** 1253 * Undefined bidirectional character type. Undefined char values have 1254 * undefined directionality in the Unicode specification. 1255 * 1256 * @since 1.4 1257 */ 1258 public static final byte DIRECTIONALITY_UNDEFINED = -1; 1259 1260 /** 1261 * Strong bidirectional character type "L". 1262 * 1263 * @since 1.4 1264 */ 1265 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 1266 1267 /** 1268 * Strong bidirectional character type "R". 1269 * 1270 * @since 1.4 1271 */ 1272 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 1273 1274 /** 1275 * Strong bidirectional character type "AL". 1276 * 1277 * @since 1.4 1278 */ 1279 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 1280 1281 /** 1282 * Weak bidirectional character type "EN". 1283 * 1284 * @since 1.4 1285 */ 1286 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 1287 1288 /** 1289 * Weak bidirectional character type "ES". 1290 * 1291 * @since 1.4 1292 */ 1293 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 1294 1295 /** 1296 * Weak bidirectional character type "ET". 1297 * 1298 * @since 1.4 1299 */ 1300 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 1301 1302 /** 1303 * Weak bidirectional character type "AN". 1304 * 1305 * @since 1.4 1306 */ 1307 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 1308 1309 /** 1310 * Weak bidirectional character type "CS". 1311 * 1312 * @since 1.4 1313 */ 1314 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 1315 1316 /** 1317 * Weak bidirectional character type "NSM". 1318 * 1319 * @since 1.4 1320 */ 1321 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 1322 1323 /** 1324 * Weak bidirectional character type "BN". 1325 * 1326 * @since 1.4 1327 */ 1328 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 1329 1330 /** 1331 * Neutral bidirectional character type "B". 1332 * 1333 * @since 1.4 1334 */ 1335 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 1336 1337 /** 1338 * Neutral bidirectional character type "S". 1339 * 1340 * @since 1.4 1341 */ 1342 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 1343 1344 /** 1345 * Strong bidirectional character type "WS". 1346 * 1347 * @since 1.4 1348 */ 1349 public static final byte DIRECTIONALITY_WHITESPACE = 12; 1350 1351 /** 1352 * Neutral bidirectional character type "ON". 1353 * 1354 * @since 1.4 1355 */ 1356 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 1357 1358 /** 1359 * Strong bidirectional character type "LRE". 1360 * 1361 * @since 1.4 1362 */ 1363 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 1364 1365 /** 1366 * Strong bidirectional character type "LRO". 1367 * 1368 * @since 1.4 1369 */ 1370 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 1371 1372 /** 1373 * Strong bidirectional character type "RLE". 1374 * 1375 * @since 1.4 1376 */ 1377 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 1378 1379 /** 1380 * Strong bidirectional character type "RLO". 1381 * 1382 * @since 1.4 1383 */ 1384 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 1385 1386 /** 1387 * Weak bidirectional character type "PDF". 1388 * 1389 * @since 1.4 1390 */ 1391 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 1392 1393 /** 1394 * Mask for grabbing the type out of the result of readChar. 1395 * @see #readChar(char) 1396 */ 1397 private static final int TYPE_MASK = 0x1F; 1398 1399 /** 1400 * Mask for grabbing the non-breaking space flag out of the result of 1401 * readChar. 1402 * @see #readChar(char) 1403 */ 1404 private static final int NO_BREAK_MASK = 0x20; 1405 1406 /** 1407 * Mask for grabbing the mirrored directionality flag out of the result 1408 * of readChar. 1409 * @see #readChar(char) 1410 */ 1411 private static final int MIRROR_MASK = 0x40; 1412 1413 /** 1414 * Grabs an attribute offset from the Unicode attribute database. The lower 1415 * 5 bits are the character type, the next 2 bits are flags, and the top 1416 * 9 bits are the offset into the attribute tables. Note that the top 9 1417 * bits are meaningless in this context; they are useful only in the native 1418 * code. 1419 * 1420 * @param ch the character to look up 1421 * @return the character's attribute offset and type 1422 * @see #TYPE_MASK 1423 * @see #NO_BREAK_MASK 1424 * @see #MIRROR_MASK 1425 */ readChar(char ch)1426 private static native char readChar(char ch); 1427 1428 /** 1429 * Wraps up a character. 1430 * 1431 * @param value the character to wrap 1432 */ Character(char value)1433 public Character(char value) 1434 { 1435 this.value = value; 1436 } 1437 1438 /** 1439 * Returns the character which has been wrapped by this class. 1440 * 1441 * @return the character wrapped 1442 */ charValue()1443 public char charValue() 1444 { 1445 return value; 1446 } 1447 1448 /** 1449 * Returns the numerical value (unsigned) of the wrapped character. 1450 * Range of returned values: 0x0000-0xFFFF. 1451 * 1452 * @return the value of the wrapped character 1453 */ hashCode()1454 public int hashCode() 1455 { 1456 return value; 1457 } 1458 1459 /** 1460 * Determines if an object is equal to this object. This is only true for 1461 * another Character object wrapping the same value. 1462 * 1463 * @param o object to compare 1464 * @return true if o is a Character with the same value 1465 */ equals(Object o)1466 public boolean equals(Object o) 1467 { 1468 return o instanceof Character && value == ((Character) o).value; 1469 } 1470 1471 /** 1472 * Converts the wrapped character into a String. 1473 * 1474 * @return a String containing one character -- the wrapped character 1475 * of this instance 1476 */ toString()1477 public String toString() 1478 { 1479 // This assumes that String.valueOf(char) can create a single-character 1480 // String more efficiently than through the public API. 1481 return String.valueOf(value); 1482 } 1483 1484 /** 1485 * Returns a String of length 1 representing the specified character. 1486 * 1487 * @param ch the character to convert 1488 * @return a String containing the character 1489 * @since 1.4 1490 */ toString(char ch)1491 public static String toString(char ch) 1492 { 1493 // This assumes that String.valueOf(char) can create a single-character 1494 // String more efficiently than through the public API. 1495 return String.valueOf(ch); 1496 } 1497 1498 /** 1499 * Determines if a character is a Unicode lowercase letter. For example, 1500 * <code>'a'</code> is lowercase. 1501 * <br> 1502 * lowercase = [Ll] 1503 * 1504 * @param ch character to test 1505 * @return true if ch is a Unicode lowercase letter, else false 1506 * @see #isUpperCase(char) 1507 * @see #isTitleCase(char) 1508 * @see #toLowerCase(char) 1509 * @see #getType(char) 1510 */ isLowerCase(char ch)1511 public static boolean isLowerCase(char ch) 1512 { 1513 return getType(ch) == LOWERCASE_LETTER; 1514 } 1515 1516 /** 1517 * Determines if a character is a Unicode uppercase letter. For example, 1518 * <code>'A'</code> is uppercase. 1519 * <br> 1520 * uppercase = [Lu] 1521 * 1522 * @param ch character to test 1523 * @return true if ch is a Unicode uppercase letter, else false 1524 * @see #isLowerCase(char) 1525 * @see #isTitleCase(char) 1526 * @see #toUpperCase(char) 1527 * @see #getType(char) 1528 */ isUpperCase(char ch)1529 public static boolean isUpperCase(char ch) 1530 { 1531 return getType(ch) == UPPERCASE_LETTER; 1532 } 1533 1534 /** 1535 * Determines if a character is a Unicode titlecase letter. For example, 1536 * the character "Lj" (Latin capital L with small letter j) is titlecase. 1537 * <br> 1538 * titlecase = [Lt] 1539 * 1540 * @param ch character to test 1541 * @return true if ch is a Unicode titlecase letter, else false 1542 * @see #isLowerCase(char) 1543 * @see #isUpperCase(char) 1544 * @see #toTitleCase(char) 1545 * @see #getType(char) 1546 */ isTitleCase(char ch)1547 public static boolean isTitleCase(char ch) 1548 { 1549 return getType(ch) == TITLECASE_LETTER; 1550 } 1551 1552 /** 1553 * Determines if a character is a Unicode decimal digit. For example, 1554 * <code>'0'</code> is a digit. 1555 * <br> 1556 * Unicode decimal digit = [Nd] 1557 * 1558 * @param ch character to test 1559 * @return true if ch is a Unicode decimal digit, else false 1560 * @see #digit(char, int) 1561 * @see #forDigit(int, int) 1562 * @see #getType(char) 1563 */ isDigit(char ch)1564 public static boolean isDigit(char ch) 1565 { 1566 return getType(ch) == DECIMAL_DIGIT_NUMBER; 1567 } 1568 1569 /** 1570 * Determines if a character is part of the Unicode Standard. This is an 1571 * evolving standard, but covers every character in the data file. 1572 * <br> 1573 * defined = not [Cn] 1574 * 1575 * @param ch character to test 1576 * @return true if ch is a Unicode character, else false 1577 * @see #isDigit(char) 1578 * @see #isLetter(char) 1579 * @see #isLetterOrDigit(char) 1580 * @see #isLowerCase(char) 1581 * @see #isTitleCase(char) 1582 * @see #isUpperCase(char) 1583 */ isDefined(char ch)1584 public static boolean isDefined(char ch) 1585 { 1586 return getType(ch) != UNASSIGNED; 1587 } 1588 1589 /** 1590 * Determines if a character is a Unicode letter. Not all letters have case, 1591 * so this may return true when isLowerCase and isUpperCase return false. 1592 * <br> 1593 * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] 1594 * 1595 * @param ch character to test 1596 * @return true if ch is a Unicode letter, else false 1597 * @see #isDigit(char) 1598 * @see #isJavaIdentifierStart(char) 1599 * @see #isJavaLetter(char) 1600 * @see #isJavaLetterOrDigit(char) 1601 * @see #isLetterOrDigit(char) 1602 * @see #isLowerCase(char) 1603 * @see #isTitleCase(char) 1604 * @see #isUnicodeIdentifierStart(char) 1605 * @see #isUpperCase(char) 1606 */ isLetter(char ch)1607 public static boolean isLetter(char ch) 1608 { 1609 return ((1 << getType(ch)) 1610 & ((1 << UPPERCASE_LETTER) 1611 | (1 << LOWERCASE_LETTER) 1612 | (1 << TITLECASE_LETTER) 1613 | (1 << MODIFIER_LETTER) 1614 | (1 << OTHER_LETTER))) != 0; 1615 } 1616 1617 /** 1618 * Determines if a character is a Unicode letter or a Unicode digit. This 1619 * is the combination of isLetter and isDigit. 1620 * <br> 1621 * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd] 1622 * 1623 * @param ch character to test 1624 * @return true if ch is a Unicode letter or a Unicode digit, else false 1625 * @see #isDigit(char) 1626 * @see #isJavaIdentifierPart(char) 1627 * @see #isJavaLetter(char) 1628 * @see #isJavaLetterOrDigit(char) 1629 * @see #isLetter(char) 1630 * @see #isUnicodeIdentifierPart(char) 1631 */ isLetterOrDigit(char ch)1632 public static boolean isLetterOrDigit(char ch) 1633 { 1634 return ((1 << getType(ch)) 1635 & ((1 << UPPERCASE_LETTER) 1636 | (1 << LOWERCASE_LETTER) 1637 | (1 << TITLECASE_LETTER) 1638 | (1 << MODIFIER_LETTER) 1639 | (1 << OTHER_LETTER) 1640 | (1 << DECIMAL_DIGIT_NUMBER))) != 0; 1641 } 1642 1643 /** 1644 * Determines if a character can start a Java identifier. This is the 1645 * combination of isLetter, any character where getType returns 1646 * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation 1647 * (like '_'). 1648 * 1649 * @param ch character to test 1650 * @return true if ch can start a Java identifier, else false 1651 * @deprecated Replaced by {@link #isJavaIdentifierStart(char)} 1652 * @see #isJavaLetterOrDigit(char) 1653 * @see #isJavaIdentifierStart(char) 1654 * @see #isJavaIdentifierPart(char) 1655 * @see #isLetter(char) 1656 * @see #isLetterOrDigit(char) 1657 * @see #isUnicodeIdentifierStart(char) 1658 */ isJavaLetter(char ch)1659 public static boolean isJavaLetter(char ch) 1660 { 1661 return isJavaIdentifierStart(ch); 1662 } 1663 1664 /** 1665 * Determines if a character can follow the first letter in 1666 * a Java identifier. This is the combination of isJavaLetter (isLetter, 1667 * type of LETTER_NUMBER, currency, connecting punctuation) and digit, 1668 * numeric letter (like Roman numerals), combining marks, non-spacing marks, 1669 * or isIdentifierIgnorable. 1670 * 1671 * @param ch character to test 1672 * @return true if ch can follow the first letter in a Java identifier 1673 * @deprecated Replaced by {@link #isJavaIdentifierPart(char)} 1674 * @see #isJavaLetter(char) 1675 * @see #isJavaIdentifierStart(char) 1676 * @see #isJavaIdentifierPart(char) 1677 * @see #isLetter(char) 1678 * @see #isLetterOrDigit(char) 1679 * @see #isUnicodeIdentifierPart(char) 1680 * @see #isIdentifierIgnorable(char) 1681 */ isJavaLetterOrDigit(char ch)1682 public static boolean isJavaLetterOrDigit(char ch) 1683 { 1684 return isJavaIdentifierPart(ch); 1685 } 1686 1687 /** 1688 * Determines if a character can start a Java identifier. This is the 1689 * combination of isLetter, any character where getType returns 1690 * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation 1691 * (like '_'). 1692 * <br> 1693 * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] 1694 * 1695 * @param ch character to test 1696 * @return true if ch can start a Java identifier, else false 1697 * @see #isJavaIdentifierPart(char) 1698 * @see #isLetter(char) 1699 * @see #isUnicodeIdentifierStart(char) 1700 * @since 1.1 1701 */ isJavaIdentifierStart(char ch)1702 public static boolean isJavaIdentifierStart(char ch) 1703 { 1704 return ((1 << getType(ch)) 1705 & ((1 << UPPERCASE_LETTER) 1706 | (1 << LOWERCASE_LETTER) 1707 | (1 << TITLECASE_LETTER) 1708 | (1 << MODIFIER_LETTER) 1709 | (1 << OTHER_LETTER) 1710 | (1 << LETTER_NUMBER) 1711 | (1 << CURRENCY_SYMBOL) 1712 | (1 << CONNECTOR_PUNCTUATION))) != 0; 1713 } 1714 1715 /** 1716 * Determines if a character can follow the first letter in 1717 * a Java identifier. This is the combination of isJavaLetter (isLetter, 1718 * type of LETTER_NUMBER, currency, connecting punctuation) and digit, 1719 * numeric letter (like Roman numerals), combining marks, non-spacing marks, 1720 * or isIdentifierIgnorable. 1721 * <br> 1722 * Java identifier extender = 1723 * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf] 1724 * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F 1725 * 1726 * @param ch character to test 1727 * @return true if ch can follow the first letter in a Java identifier 1728 * @see #isIdentifierIgnorable(char) 1729 * @see #isJavaIdentifierStart(char) 1730 * @see #isLetterOrDigit(char) 1731 * @see #isUnicodeIdentifierPart(char) 1732 * @since 1.1 1733 */ isJavaIdentifierPart(char ch)1734 public static boolean isJavaIdentifierPart(char ch) 1735 { 1736 int category = getType(ch); 1737 return ((1 << category) 1738 & ((1 << UPPERCASE_LETTER) 1739 | (1 << LOWERCASE_LETTER) 1740 | (1 << TITLECASE_LETTER) 1741 | (1 << MODIFIER_LETTER) 1742 | (1 << OTHER_LETTER) 1743 | (1 << NON_SPACING_MARK) 1744 | (1 << COMBINING_SPACING_MARK) 1745 | (1 << DECIMAL_DIGIT_NUMBER) 1746 | (1 << LETTER_NUMBER) 1747 | (1 << CURRENCY_SYMBOL) 1748 | (1 << CONNECTOR_PUNCTUATION) 1749 | (1 << FORMAT))) != 0 1750 || (category == CONTROL && isIdentifierIgnorable(ch)); 1751 } 1752 1753 /** 1754 * Determines if a character can start a Unicode identifier. Only 1755 * letters can start a Unicode identifier, but this includes characters 1756 * in LETTER_NUMBER. 1757 * <br> 1758 * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl] 1759 * 1760 * @param ch character to test 1761 * @return true if ch can start a Unicode identifier, else false 1762 * @see #isJavaIdentifierStart(char) 1763 * @see #isLetter(char) 1764 * @see #isUnicodeIdentifierPart(char) 1765 * @since 1.1 1766 */ isUnicodeIdentifierStart(char ch)1767 public static boolean isUnicodeIdentifierStart(char ch) 1768 { 1769 return ((1 << getType(ch)) 1770 & ((1 << UPPERCASE_LETTER) 1771 | (1 << LOWERCASE_LETTER) 1772 | (1 << TITLECASE_LETTER) 1773 | (1 << MODIFIER_LETTER) 1774 | (1 << OTHER_LETTER) 1775 | (1 << LETTER_NUMBER))) != 0; 1776 } 1777 1778 /** 1779 * Determines if a character can follow the first letter in 1780 * a Unicode identifier. This includes letters, connecting punctuation, 1781 * digits, numeric letters, combining marks, non-spacing marks, and 1782 * isIdentifierIgnorable. 1783 * <br> 1784 * Unicode identifier extender = 1785 * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]| 1786 * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F 1787 * 1788 * @param ch character to test 1789 * @return true if ch can follow the first letter in a Unicode identifier 1790 * @see #isIdentifierIgnorable(char) 1791 * @see #isJavaIdentifierPart(char) 1792 * @see #isLetterOrDigit(char) 1793 * @see #isUnicodeIdentifierStart(char) 1794 * @since 1.1 1795 */ isUnicodeIdentifierPart(char ch)1796 public static boolean isUnicodeIdentifierPart(char ch) 1797 { 1798 int category = getType(ch); 1799 return ((1 << category) 1800 & ((1 << UPPERCASE_LETTER) 1801 | (1 << LOWERCASE_LETTER) 1802 | (1 << TITLECASE_LETTER) 1803 | (1 << MODIFIER_LETTER) 1804 | (1 << OTHER_LETTER) 1805 | (1 << NON_SPACING_MARK) 1806 | (1 << COMBINING_SPACING_MARK) 1807 | (1 << DECIMAL_DIGIT_NUMBER) 1808 | (1 << LETTER_NUMBER) 1809 | (1 << CONNECTOR_PUNCTUATION) 1810 | (1 << FORMAT))) != 0 1811 || (category == CONTROL && isIdentifierIgnorable(ch)); 1812 } 1813 1814 /** 1815 * Determines if a character is ignorable in a Unicode identifier. This 1816 * includes the non-whitespace ISO control characters (<code>'\u0000'</code> 1817 * through <code>'\u0008'</code>, <code>'\u000E'</code> through 1818 * <code>'\u001B'</code>, and <code>'\u007F'</code> through 1819 * <code>'\u009F'</code>), and FORMAT characters. 1820 * <br> 1821 * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B 1822 * |U+007F-U+009F 1823 * 1824 * @param ch character to test 1825 * @return true if ch is ignorable in a Unicode or Java identifier 1826 * @see #isJavaIdentifierPart(char) 1827 * @see #isUnicodeIdentifierPart(char) 1828 * @since 1.1 1829 */ isIdentifierIgnorable(char ch)1830 public static boolean isIdentifierIgnorable(char ch) 1831 { 1832 return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F' 1833 || (ch <= '\u001B' && ch >= '\u000E'))) 1834 || getType(ch) == FORMAT; 1835 } 1836 1837 /** 1838 * Converts a Unicode character into its lowercase equivalent mapping. 1839 * If a mapping does not exist, then the character passed is returned. 1840 * Note that isLowerCase(toLowerCase(ch)) does not always return true. 1841 * 1842 * @param ch character to convert to lowercase 1843 * @return lowercase mapping of ch, or ch if lowercase mapping does 1844 * not exist 1845 * @see #isLowerCase(char) 1846 * @see #isUpperCase(char) 1847 * @see #toTitleCase(char) 1848 * @see #toUpperCase(char) 1849 */ toLowerCase(char ch)1850 public static native char toLowerCase(char ch); 1851 1852 /** 1853 * Converts a Unicode character into its uppercase equivalent mapping. 1854 * If a mapping does not exist, then the character passed is returned. 1855 * Note that isUpperCase(toUpperCase(ch)) does not always return true. 1856 * 1857 * @param ch character to convert to uppercase 1858 * @return uppercase mapping of ch, or ch if uppercase mapping does 1859 * not exist 1860 * @see #isLowerCase(char) 1861 * @see #isUpperCase(char) 1862 * @see #toLowerCase(char) 1863 * @see #toTitleCase(char) 1864 */ toUpperCase(char ch)1865 public static native char toUpperCase(char ch); 1866 1867 /** 1868 * Converts a Unicode character into its titlecase equivalent mapping. 1869 * If a mapping does not exist, then the character passed is returned. 1870 * Note that isTitleCase(toTitleCase(ch)) does not always return true. 1871 * 1872 * @param ch character to convert to titlecase 1873 * @return titlecase mapping of ch, or ch if titlecase mapping does 1874 * not exist 1875 * @see #isTitleCase(char) 1876 * @see #toLowerCase(char) 1877 * @see #toUpperCase(char) 1878 */ toTitleCase(char ch)1879 public static native char toTitleCase(char ch); 1880 1881 /** 1882 * Converts a character into a digit of the specified radix. If the radix 1883 * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch) 1884 * exceeds the radix, or if ch is not a decimal digit or in the case 1885 * insensitive set of 'a'-'z', the result is -1. 1886 * <br> 1887 * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A 1888 * |U+FF21-U+FF3A|U+FF41-U+FF5A 1889 * 1890 * @param ch character to convert into a digit 1891 * @param radix radix in which ch is a digit 1892 * @return digit which ch represents in radix, or -1 not a valid digit 1893 * @see #MIN_RADIX 1894 * @see #MAX_RADIX 1895 * @see #forDigit(int, int) 1896 * @see #isDigit(char) 1897 * @see #getNumericValue(char) 1898 */ digit(char ch, int radix)1899 public static native int digit(char ch, int radix); 1900 1901 /** 1902 * Returns the Unicode numeric value property of a character. For example, 1903 * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. 1904 * 1905 * <p>This method also returns values for the letters A through Z, (not 1906 * specified by Unicode), in these ranges: <code>'\u0041'</code> 1907 * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code> 1908 * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code> 1909 * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through 1910 * <code>'\uFF5A'</code> (full width variants). 1911 * 1912 * <p>If the character lacks a numeric value property, -1 is returned. 1913 * If the character has a numeric value property which is not representable 1914 * as a nonnegative integer, such as a fraction, -2 is returned. 1915 * 1916 * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A 1917 * |U+FF21-U+FF3A|U+FF41-U+FF5A 1918 * 1919 * @param ch character from which the numeric value property will 1920 * be retrieved 1921 * @return the numeric value property of ch, or -1 if it does not exist, or 1922 * -2 if it is not representable as a nonnegative integer 1923 * @see #forDigit(int, int) 1924 * @see #digit(char, int) 1925 * @see #isDigit(char) 1926 * @since 1.1 1927 */ getNumericValue(char ch)1928 public static native int getNumericValue(char ch); 1929 1930 /** 1931 * Determines if a character is a ISO-LATIN-1 space. This is only the five 1932 * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>, 1933 * <code>'\r'</code>, and <code>' '</code>. 1934 * <br> 1935 * Java space = U+0020|U+0009|U+000A|U+000C|U+000D 1936 * 1937 * @param ch character to test 1938 * @return true if ch is a space, else false 1939 * @deprecated Replaced by {@link #isWhitespace(char)} 1940 * @see #isSpaceChar(char) 1941 * @see #isWhitespace(char) 1942 */ isSpace(char ch)1943 public static boolean isSpace(char ch) 1944 { 1945 // Performing the subtraction up front alleviates need to compare longs. 1946 return ch-- <= ' ' && ((1 << ch) 1947 & ((1 << (' ' - 1)) 1948 | (1 << ('\t' - 1)) 1949 | (1 << ('\n' - 1)) 1950 | (1 << ('\r' - 1)) 1951 | (1 << ('\f' - 1)))) != 0; 1952 } 1953 1954 /** 1955 * Determines if a character is a Unicode space character. This includes 1956 * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR. 1957 * <br> 1958 * Unicode space = [Zs]|[Zp]|[Zl] 1959 * 1960 * @param ch character to test 1961 * @return true if ch is a Unicode space, else false 1962 * @see #isWhitespace(char) 1963 * @since 1.1 1964 */ isSpaceChar(char ch)1965 public static boolean isSpaceChar(char ch) 1966 { 1967 return ((1 << getType(ch)) 1968 & ((1 << SPACE_SEPARATOR) 1969 | (1 << LINE_SEPARATOR) 1970 | (1 << PARAGRAPH_SEPARATOR))) != 0; 1971 } 1972 1973 /** 1974 * Determines if a character is Java whitespace. This includes Unicode 1975 * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and 1976 * PARAGRAPH_SEPARATOR) except the non-breaking spaces 1977 * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>); 1978 * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>, 1979 * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>, 1980 * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>, 1981 * and <code>'\u001F'</code>. 1982 * <br> 1983 * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F 1984 * 1985 * @param ch character to test 1986 * @return true if ch is Java whitespace, else false 1987 * @see #isSpaceChar(char) 1988 * @since 1.1 1989 */ isWhitespace(char ch)1990 public static boolean isWhitespace(char ch) 1991 { 1992 int attr = readChar(ch); 1993 return ((((1 << (attr & TYPE_MASK)) 1994 & ((1 << SPACE_SEPARATOR) 1995 | (1 << LINE_SEPARATOR) 1996 | (1 << PARAGRAPH_SEPARATOR))) != 0) 1997 && (attr & NO_BREAK_MASK) == 0) 1998 || (ch <= '\u001F' && ((1 << ch) 1999 & ((1 << '\t') 2000 | (1 << '\n') 2001 | (1 << '\u000B') 2002 | (1 << '\u000C') 2003 | (1 << '\r') 2004 | (1 << '\u001C') 2005 | (1 << '\u001D') 2006 | (1 << '\u001E') 2007 | (1 << '\u001F'))) != 0); 2008 } 2009 2010 /** 2011 * Determines if a character has the ISO Control property. 2012 * <br> 2013 * ISO Control = [Cc] 2014 * 2015 * @param ch character to test 2016 * @return true if ch is an ISO Control character, else false 2017 * @see #isSpaceChar(char) 2018 * @see #isWhitespace(char) 2019 * @since 1.1 2020 */ isISOControl(char ch)2021 public static boolean isISOControl(char ch) 2022 { 2023 return getType(ch) == CONTROL; 2024 } 2025 2026 /** 2027 * Returns the Unicode general category property of a character. 2028 * 2029 * @param ch character from which the general category property will 2030 * be retrieved 2031 * @return the character category property of ch as an integer 2032 * @see #UNASSIGNED 2033 * @see #UPPERCASE_LETTER 2034 * @see #LOWERCASE_LETTER 2035 * @see #TITLECASE_LETTER 2036 * @see #MODIFIER_LETTER 2037 * @see #OTHER_LETTER 2038 * @see #NON_SPACING_MARK 2039 * @see #ENCLOSING_MARK 2040 * @see #COMBINING_SPACING_MARK 2041 * @see #DECIMAL_DIGIT_NUMBER 2042 * @see #LETTER_NUMBER 2043 * @see #OTHER_NUMBER 2044 * @see #SPACE_SEPARATOR 2045 * @see #LINE_SEPARATOR 2046 * @see #PARAGRAPH_SEPARATOR 2047 * @see #CONTROL 2048 * @see #FORMAT 2049 * @see #PRIVATE_USE 2050 * @see #SURROGATE 2051 * @see #DASH_PUNCTUATION 2052 * @see #START_PUNCTUATION 2053 * @see #END_PUNCTUATION 2054 * @see #CONNECTOR_PUNCTUATION 2055 * @see #OTHER_PUNCTUATION 2056 * @see #MATH_SYMBOL 2057 * @see #CURRENCY_SYMBOL 2058 * @see #MODIFIER_SYMBOL 2059 * @see #INITIAL_QUOTE_PUNCTUATION 2060 * @see #FINAL_QUOTE_PUNCTUATION 2061 * @since 1.1 2062 */ getType(char ch)2063 public static native int getType(char ch); 2064 2065 /** 2066 * Converts a digit into a character which represents that digit 2067 * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX, 2068 * or the digit exceeds the radix, then the null character <code>'\0'</code> 2069 * is returned. Otherwise the return value is in '0'-'9' and 'a'-'z'. 2070 * <br> 2071 * return value boundary = U+0030-U+0039|U+0061-U+007A 2072 * 2073 * @param digit digit to be converted into a character 2074 * @param radix radix of digit 2075 * @return character representing digit in radix, or '\0' 2076 * @see #MIN_RADIX 2077 * @see #MAX_RADIX 2078 * @see #digit(char, int) 2079 */ forDigit(int digit, int radix)2080 public static char forDigit(int digit, int radix) 2081 { 2082 if (radix < MIN_RADIX || radix > MAX_RADIX 2083 || digit < 0 || digit >= radix) 2084 return '\0'; 2085 return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit)); 2086 } 2087 2088 /** 2089 * Returns the Unicode directionality property of the character. This 2090 * is used in the visual ordering of text. 2091 * 2092 * @param ch the character to look up 2093 * @return the directionality constant, or DIRECTIONALITY_UNDEFINED 2094 * @see #DIRECTIONALITY_UNDEFINED 2095 * @see #DIRECTIONALITY_LEFT_TO_RIGHT 2096 * @see #DIRECTIONALITY_RIGHT_TO_LEFT 2097 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 2098 * @see #DIRECTIONALITY_EUROPEAN_NUMBER 2099 * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 2100 * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 2101 * @see #DIRECTIONALITY_ARABIC_NUMBER 2102 * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 2103 * @see #DIRECTIONALITY_NONSPACING_MARK 2104 * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL 2105 * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR 2106 * @see #DIRECTIONALITY_SEGMENT_SEPARATOR 2107 * @see #DIRECTIONALITY_WHITESPACE 2108 * @see #DIRECTIONALITY_OTHER_NEUTRALS 2109 * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 2110 * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 2111 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 2112 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 2113 * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 2114 * @since 1.4 2115 */ getDirectionality(char ch)2116 public static native byte getDirectionality(char ch); 2117 2118 /** 2119 * Determines whether the character is mirrored according to Unicode. For 2120 * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in 2121 * left-to-right text, but ')' in right-to-left text. 2122 * 2123 * @param ch the character to look up 2124 * @return true if the character is mirrored 2125 * @since 1.4 2126 */ isMirrored(char ch)2127 public static boolean isMirrored(char ch) 2128 { 2129 return (readChar(ch) & MIRROR_MASK) != 0; 2130 } 2131 2132 /** 2133 * Compares another Character to this Character, numerically. 2134 * 2135 * @param anotherCharacter Character to compare with this Character 2136 * @return a negative integer if this Character is less than 2137 * anotherCharacter, zero if this Character is equal, and 2138 * a positive integer if this Character is greater 2139 * @throws NullPointerException if anotherCharacter is null 2140 * @since 1.2 2141 */ compareTo(Character anotherCharacter)2142 public int compareTo(Character anotherCharacter) 2143 { 2144 return value - anotherCharacter.value; 2145 } 2146 2147 /** 2148 * Compares an object to this Character. Assuming the object is a 2149 * Character object, this method performs the same comparison as 2150 * compareTo(Character). 2151 * 2152 * @param o object to compare 2153 * @return the comparison value 2154 * @throws ClassCastException if o is not a Character object 2155 * @throws NullPointerException if o is null 2156 * @see #compareTo(Character) 2157 * @since 1.2 2158 */ compareTo(Object o)2159 public int compareTo(Object o) 2160 { 2161 return compareTo((Character) o); 2162 } 2163 } // class Character 2164