1 /* java.lang.Character -- Wrapper class for char, and Unicode subsets
2    Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 /*
39  * Note: This class must not be merged with Classpath.  Gcj uses C-style
40  * arrays (see include/java-chartables.h) to store the Unicode character
41  * database, whereas Classpath uses Java objects (char[] extracted from
42  * String constants) in gnu.java.lang.CharData.  Gcj's approach is more
43  * efficient, because there is no vtable or data relocation to worry about.
44  * However, despite the difference in the database interface, the two
45  * versions share identical algorithms.
46  */
47 
48 package java.lang;
49 
50 import java.io.Serializable;
51 
52 /**
53  * Wrapper class for the primitive char data type.  In addition, this class
54  * allows one to retrieve property information and perform transformations
55  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
56  * java.lang.Character is designed to be very dynamic, and as such, it
57  * retrieves information on the Unicode character set from a separate
58  * database, gnu.java.lang.CharData, which can be easily upgraded.
59  *
60  * <p>For predicates, boundaries are used to describe
61  * the set of characters for which the method will return true.
62  * This syntax uses fairly normal regular expression notation.
63  * See 5.13 of the Unicode Standard, Version 3.0, for the
64  * boundary specification.
65  *
66  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
67  * for more information on the Unicode Standard.
68  *
69  * @author Tom Tromey <tromey@cygnus.com>
70  * @author Paul N. Fisher
71  * @author Jochen Hoenicke
72  * @author Eric Blake <ebb9@email.byu.edu>
73  * @since 1.0
74  * @status updated to 1.4
75  */
76 public final class Character implements Serializable, Comparable
77 {
78   /**
79    * A subset of Unicode blocks.
80    *
81    * @author Paul N. Fisher
82    * @author Eric Blake <ebb9@email.byu.edu>
83    * @since 1.2
84    */
85   public static class Subset
86   {
87     /** The name of the subset. */
88     private final String name;
89 
90     /**
91      * Construct a new subset of characters.
92      *
93      * @param name the name of the subset
94      * @throws NullPointerException if name is null
95      */
Subset(String name)96     protected Subset(String name)
97     {
98       // Note that name.toString() is name, unless name was null.
99       this.name = name.toString();
100     }
101 
102     /**
103      * Compares two Subsets for equality. This is <code>final</code>, and
104      * restricts the comparison on the <code>==</code> operator, so it returns
105      * true only for the same object.
106      *
107      * @param o the object to compare
108      * @return true if o is this
109      */
equals(Object o)110     public final boolean equals(Object o)
111     {
112       return o == this;
113     }
114 
115     /**
116      * Makes the original hashCode of Object final, to be consistent with
117      * equals.
118      *
119      * @return the hash code for this object
120      */
hashCode()121     public final int hashCode()
122     {
123       return super.hashCode();
124     }
125 
126     /**
127      * Returns the name of the subset.
128      *
129      * @return the name
130      */
toString()131     public final String toString()
132     {
133       return name;
134     }
135   } // class Subset
136 
137   /**
138    * A family of character subsets in the Unicode specification. A character
139    * is in at most one of these blocks.
140    *
141    * This inner class was generated automatically from
142    * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
143    * This Unicode definition file can be found on the
144    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
145    * JDK 1.4 uses Unicode version 3.0.0.
146    *
147    * @author scripts/unicode-blocks.pl (written by Eric Blake)
148    * @since 1.2
149    */
150   public static final class UnicodeBlock extends Subset
151   {
152     /** The start of the subset. */
153     private final char start;
154 
155     /** The end of the subset. */
156     private final char end;
157 
158     /**
159      * Constructor for strictly defined blocks.
160      *
161      * @param start the start character of the range
162      * @param end the end character of the range
163      * @param name the block name
164      */
UnicodeBlock(char start, char end, String name)165     private UnicodeBlock(char start, char end, String name)
166     {
167       super(name);
168       this.start = start;
169       this.end = end;
170     }
171 
172     /**
173      * Returns the Unicode character block which a character belongs to.
174      *
175      * @param ch the character to look up
176      * @return the set it belongs to, or null if it is not in one
177      */
of(char ch)178     public static UnicodeBlock of(char ch)
179     {
180       // Special case, since SPECIALS contains two ranges.
181       if (ch == '\uFEFF')
182         return SPECIALS;
183       // Simple binary search for the correct block.
184       int low = 0;
185       int hi = sets.length - 1;
186       while (low <= hi)
187         {
188           int mid = (low + hi) >> 1;
189           UnicodeBlock b = sets[mid];
190           if (ch < b.start)
191             hi = mid - 1;
192           else if (ch > b.end)
193             low = mid + 1;
194           else
195             return b;
196         }
197       return null;
198     }
199 
200     /**
201      * Basic Latin.
202      * '\u0000' - '\u007F'.
203      */
204     public final static UnicodeBlock BASIC_LATIN
205       = new UnicodeBlock('\u0000', '\u007F',
206                          "BASIC_LATIN");
207 
208     /**
209      * Latin-1 Supplement.
210      * '\u0080' - '\u00FF'.
211      */
212     public final static UnicodeBlock LATIN_1_SUPPLEMENT
213       = new UnicodeBlock('\u0080', '\u00FF',
214                          "LATIN_1_SUPPLEMENT");
215 
216     /**
217      * Latin Extended-A.
218      * '\u0100' - '\u017F'.
219      */
220     public final static UnicodeBlock LATIN_EXTENDED_A
221       = new UnicodeBlock('\u0100', '\u017F',
222                          "LATIN_EXTENDED_A");
223 
224     /**
225      * Latin Extended-B.
226      * '\u0180' - '\u024F'.
227      */
228     public final static UnicodeBlock LATIN_EXTENDED_B
229       = new UnicodeBlock('\u0180', '\u024F',
230                          "LATIN_EXTENDED_B");
231 
232     /**
233      * IPA Extensions.
234      * '\u0250' - '\u02AF'.
235      */
236     public final static UnicodeBlock IPA_EXTENSIONS
237       = new UnicodeBlock('\u0250', '\u02AF',
238                          "IPA_EXTENSIONS");
239 
240     /**
241      * Spacing Modifier Letters.
242      * '\u02B0' - '\u02FF'.
243      */
244     public final static UnicodeBlock SPACING_MODIFIER_LETTERS
245       = new UnicodeBlock('\u02B0', '\u02FF',
246                          "SPACING_MODIFIER_LETTERS");
247 
248     /**
249      * Combining Diacritical Marks.
250      * '\u0300' - '\u036F'.
251      */
252     public final static UnicodeBlock COMBINING_DIACRITICAL_MARKS
253       = new UnicodeBlock('\u0300', '\u036F',
254                          "COMBINING_DIACRITICAL_MARKS");
255 
256     /**
257      * Greek.
258      * '\u0370' - '\u03FF'.
259      */
260     public final static UnicodeBlock GREEK
261       = new UnicodeBlock('\u0370', '\u03FF',
262                          "GREEK");
263 
264     /**
265      * Cyrillic.
266      * '\u0400' - '\u04FF'.
267      */
268     public final static UnicodeBlock CYRILLIC
269       = new UnicodeBlock('\u0400', '\u04FF',
270                          "CYRILLIC");
271 
272     /**
273      * Armenian.
274      * '\u0530' - '\u058F'.
275      */
276     public final static UnicodeBlock ARMENIAN
277       = new UnicodeBlock('\u0530', '\u058F',
278                          "ARMENIAN");
279 
280     /**
281      * Hebrew.
282      * '\u0590' - '\u05FF'.
283      */
284     public final static UnicodeBlock HEBREW
285       = new UnicodeBlock('\u0590', '\u05FF',
286                          "HEBREW");
287 
288     /**
289      * Arabic.
290      * '\u0600' - '\u06FF'.
291      */
292     public final static UnicodeBlock ARABIC
293       = new UnicodeBlock('\u0600', '\u06FF',
294                          "ARABIC");
295 
296     /**
297      * Syriac.
298      * '\u0700' - '\u074F'.
299      * @since 1.4
300      */
301     public final static UnicodeBlock SYRIAC
302       = new UnicodeBlock('\u0700', '\u074F',
303                          "SYRIAC");
304 
305     /**
306      * Thaana.
307      * '\u0780' - '\u07BF'.
308      * @since 1.4
309      */
310     public final static UnicodeBlock THAANA
311       = new UnicodeBlock('\u0780', '\u07BF',
312                          "THAANA");
313 
314     /**
315      * Devanagari.
316      * '\u0900' - '\u097F'.
317      */
318     public final static UnicodeBlock DEVANAGARI
319       = new UnicodeBlock('\u0900', '\u097F',
320                          "DEVANAGARI");
321 
322     /**
323      * Bengali.
324      * '\u0980' - '\u09FF'.
325      */
326     public final static UnicodeBlock BENGALI
327       = new UnicodeBlock('\u0980', '\u09FF',
328                          "BENGALI");
329 
330     /**
331      * Gurmukhi.
332      * '\u0A00' - '\u0A7F'.
333      */
334     public final static UnicodeBlock GURMUKHI
335       = new UnicodeBlock('\u0A00', '\u0A7F',
336                          "GURMUKHI");
337 
338     /**
339      * Gujarati.
340      * '\u0A80' - '\u0AFF'.
341      */
342     public final static UnicodeBlock GUJARATI
343       = new UnicodeBlock('\u0A80', '\u0AFF',
344                          "GUJARATI");
345 
346     /**
347      * Oriya.
348      * '\u0B00' - '\u0B7F'.
349      */
350     public final static UnicodeBlock ORIYA
351       = new UnicodeBlock('\u0B00', '\u0B7F',
352                          "ORIYA");
353 
354     /**
355      * Tamil.
356      * '\u0B80' - '\u0BFF'.
357      */
358     public final static UnicodeBlock TAMIL
359       = new UnicodeBlock('\u0B80', '\u0BFF',
360                          "TAMIL");
361 
362     /**
363      * Telugu.
364      * '\u0C00' - '\u0C7F'.
365      */
366     public final static UnicodeBlock TELUGU
367       = new UnicodeBlock('\u0C00', '\u0C7F',
368                          "TELUGU");
369 
370     /**
371      * Kannada.
372      * '\u0C80' - '\u0CFF'.
373      */
374     public final static UnicodeBlock KANNADA
375       = new UnicodeBlock('\u0C80', '\u0CFF',
376                          "KANNADA");
377 
378     /**
379      * Malayalam.
380      * '\u0D00' - '\u0D7F'.
381      */
382     public final static UnicodeBlock MALAYALAM
383       = new UnicodeBlock('\u0D00', '\u0D7F',
384                          "MALAYALAM");
385 
386     /**
387      * Sinhala.
388      * '\u0D80' - '\u0DFF'.
389      * @since 1.4
390      */
391     public final static UnicodeBlock SINHALA
392       = new UnicodeBlock('\u0D80', '\u0DFF',
393                          "SINHALA");
394 
395     /**
396      * Thai.
397      * '\u0E00' - '\u0E7F'.
398      */
399     public final static UnicodeBlock THAI
400       = new UnicodeBlock('\u0E00', '\u0E7F',
401                          "THAI");
402 
403     /**
404      * Lao.
405      * '\u0E80' - '\u0EFF'.
406      */
407     public final static UnicodeBlock LAO
408       = new UnicodeBlock('\u0E80', '\u0EFF',
409                          "LAO");
410 
411     /**
412      * Tibetan.
413      * '\u0F00' - '\u0FFF'.
414      */
415     public final static UnicodeBlock TIBETAN
416       = new UnicodeBlock('\u0F00', '\u0FFF',
417                          "TIBETAN");
418 
419     /**
420      * Myanmar.
421      * '\u1000' - '\u109F'.
422      * @since 1.4
423      */
424     public final static UnicodeBlock MYANMAR
425       = new UnicodeBlock('\u1000', '\u109F',
426                          "MYANMAR");
427 
428     /**
429      * Georgian.
430      * '\u10A0' - '\u10FF'.
431      */
432     public final static UnicodeBlock GEORGIAN
433       = new UnicodeBlock('\u10A0', '\u10FF',
434                          "GEORGIAN");
435 
436     /**
437      * Hangul Jamo.
438      * '\u1100' - '\u11FF'.
439      */
440     public final static UnicodeBlock HANGUL_JAMO
441       = new UnicodeBlock('\u1100', '\u11FF',
442                          "HANGUL_JAMO");
443 
444     /**
445      * Ethiopic.
446      * '\u1200' - '\u137F'.
447      * @since 1.4
448      */
449     public final static UnicodeBlock ETHIOPIC
450       = new UnicodeBlock('\u1200', '\u137F',
451                          "ETHIOPIC");
452 
453     /**
454      * Cherokee.
455      * '\u13A0' - '\u13FF'.
456      * @since 1.4
457      */
458     public final static UnicodeBlock CHEROKEE
459       = new UnicodeBlock('\u13A0', '\u13FF',
460                          "CHEROKEE");
461 
462     /**
463      * Unified Canadian Aboriginal Syllabics.
464      * '\u1400' - '\u167F'.
465      * @since 1.4
466      */
467     public final static UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
468       = new UnicodeBlock('\u1400', '\u167F',
469                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
470 
471     /**
472      * Ogham.
473      * '\u1680' - '\u169F'.
474      * @since 1.4
475      */
476     public final static UnicodeBlock OGHAM
477       = new UnicodeBlock('\u1680', '\u169F',
478                          "OGHAM");
479 
480     /**
481      * Runic.
482      * '\u16A0' - '\u16FF'.
483      * @since 1.4
484      */
485     public final static UnicodeBlock RUNIC
486       = new UnicodeBlock('\u16A0', '\u16FF',
487                          "RUNIC");
488 
489     /**
490      * Khmer.
491      * '\u1780' - '\u17FF'.
492      * @since 1.4
493      */
494     public final static UnicodeBlock KHMER
495       = new UnicodeBlock('\u1780', '\u17FF',
496                          "KHMER");
497 
498     /**
499      * Mongolian.
500      * '\u1800' - '\u18AF'.
501      * @since 1.4
502      */
503     public final static UnicodeBlock MONGOLIAN
504       = new UnicodeBlock('\u1800', '\u18AF',
505                          "MONGOLIAN");
506 
507     /**
508      * Latin Extended Additional.
509      * '\u1E00' - '\u1EFF'.
510      */
511     public final static UnicodeBlock LATIN_EXTENDED_ADDITIONAL
512       = new UnicodeBlock('\u1E00', '\u1EFF',
513                          "LATIN_EXTENDED_ADDITIONAL");
514 
515     /**
516      * Greek Extended.
517      * '\u1F00' - '\u1FFF'.
518      */
519     public final static UnicodeBlock GREEK_EXTENDED
520       = new UnicodeBlock('\u1F00', '\u1FFF',
521                          "GREEK_EXTENDED");
522 
523     /**
524      * General Punctuation.
525      * '\u2000' - '\u206F'.
526      */
527     public final static UnicodeBlock GENERAL_PUNCTUATION
528       = new UnicodeBlock('\u2000', '\u206F',
529                          "GENERAL_PUNCTUATION");
530 
531     /**
532      * Superscripts and Subscripts.
533      * '\u2070' - '\u209F'.
534      */
535     public final static UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
536       = new UnicodeBlock('\u2070', '\u209F',
537                          "SUPERSCRIPTS_AND_SUBSCRIPTS");
538 
539     /**
540      * Currency Symbols.
541      * '\u20A0' - '\u20CF'.
542      */
543     public final static UnicodeBlock CURRENCY_SYMBOLS
544       = new UnicodeBlock('\u20A0', '\u20CF',
545                          "CURRENCY_SYMBOLS");
546 
547     /**
548      * Combining Marks for Symbols.
549      * '\u20D0' - '\u20FF'.
550      */
551     public final static UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
552       = new UnicodeBlock('\u20D0', '\u20FF',
553                          "COMBINING_MARKS_FOR_SYMBOLS");
554 
555     /**
556      * Letterlike Symbols.
557      * '\u2100' - '\u214F'.
558      */
559     public final static UnicodeBlock LETTERLIKE_SYMBOLS
560       = new UnicodeBlock('\u2100', '\u214F',
561                          "LETTERLIKE_SYMBOLS");
562 
563     /**
564      * Number Forms.
565      * '\u2150' - '\u218F'.
566      */
567     public final static UnicodeBlock NUMBER_FORMS
568       = new UnicodeBlock('\u2150', '\u218F',
569                          "NUMBER_FORMS");
570 
571     /**
572      * Arrows.
573      * '\u2190' - '\u21FF'.
574      */
575     public final static UnicodeBlock ARROWS
576       = new UnicodeBlock('\u2190', '\u21FF',
577                          "ARROWS");
578 
579     /**
580      * Mathematical Operators.
581      * '\u2200' - '\u22FF'.
582      */
583     public final static UnicodeBlock MATHEMATICAL_OPERATORS
584       = new UnicodeBlock('\u2200', '\u22FF',
585                          "MATHEMATICAL_OPERATORS");
586 
587     /**
588      * Miscellaneous Technical.
589      * '\u2300' - '\u23FF'.
590      */
591     public final static UnicodeBlock MISCELLANEOUS_TECHNICAL
592       = new UnicodeBlock('\u2300', '\u23FF',
593                          "MISCELLANEOUS_TECHNICAL");
594 
595     /**
596      * Control Pictures.
597      * '\u2400' - '\u243F'.
598      */
599     public final static UnicodeBlock CONTROL_PICTURES
600       = new UnicodeBlock('\u2400', '\u243F',
601                          "CONTROL_PICTURES");
602 
603     /**
604      * Optical Character Recognition.
605      * '\u2440' - '\u245F'.
606      */
607     public final static UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
608       = new UnicodeBlock('\u2440', '\u245F',
609                          "OPTICAL_CHARACTER_RECOGNITION");
610 
611     /**
612      * Enclosed Alphanumerics.
613      * '\u2460' - '\u24FF'.
614      */
615     public final static UnicodeBlock ENCLOSED_ALPHANUMERICS
616       = new UnicodeBlock('\u2460', '\u24FF',
617                          "ENCLOSED_ALPHANUMERICS");
618 
619     /**
620      * Box Drawing.
621      * '\u2500' - '\u257F'.
622      */
623     public final static UnicodeBlock BOX_DRAWING
624       = new UnicodeBlock('\u2500', '\u257F',
625                          "BOX_DRAWING");
626 
627     /**
628      * Block Elements.
629      * '\u2580' - '\u259F'.
630      */
631     public final static UnicodeBlock BLOCK_ELEMENTS
632       = new UnicodeBlock('\u2580', '\u259F',
633                          "BLOCK_ELEMENTS");
634 
635     /**
636      * Geometric Shapes.
637      * '\u25A0' - '\u25FF'.
638      */
639     public final static UnicodeBlock GEOMETRIC_SHAPES
640       = new UnicodeBlock('\u25A0', '\u25FF',
641                          "GEOMETRIC_SHAPES");
642 
643     /**
644      * Miscellaneous Symbols.
645      * '\u2600' - '\u26FF'.
646      */
647     public final static UnicodeBlock MISCELLANEOUS_SYMBOLS
648       = new UnicodeBlock('\u2600', '\u26FF',
649                          "MISCELLANEOUS_SYMBOLS");
650 
651     /**
652      * Dingbats.
653      * '\u2700' - '\u27BF'.
654      */
655     public final static UnicodeBlock DINGBATS
656       = new UnicodeBlock('\u2700', '\u27BF',
657                          "DINGBATS");
658 
659     /**
660      * Braille Patterns.
661      * '\u2800' - '\u28FF'.
662      * @since 1.4
663      */
664     public final static UnicodeBlock BRAILLE_PATTERNS
665       = new UnicodeBlock('\u2800', '\u28FF',
666                          "BRAILLE_PATTERNS");
667 
668     /**
669      * CJK Radicals Supplement.
670      * '\u2E80' - '\u2EFF'.
671      * @since 1.4
672      */
673     public final static UnicodeBlock CJK_RADICALS_SUPPLEMENT
674       = new UnicodeBlock('\u2E80', '\u2EFF',
675                          "CJK_RADICALS_SUPPLEMENT");
676 
677     /**
678      * Kangxi Radicals.
679      * '\u2F00' - '\u2FDF'.
680      * @since 1.4
681      */
682     public final static UnicodeBlock KANGXI_RADICALS
683       = new UnicodeBlock('\u2F00', '\u2FDF',
684                          "KANGXI_RADICALS");
685 
686     /**
687      * Ideographic Description Characters.
688      * '\u2FF0' - '\u2FFF'.
689      * @since 1.4
690      */
691     public final static UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
692       = new UnicodeBlock('\u2FF0', '\u2FFF',
693                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
694 
695     /**
696      * CJK Symbols and Punctuation.
697      * '\u3000' - '\u303F'.
698      */
699     public final static UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
700       = new UnicodeBlock('\u3000', '\u303F',
701                          "CJK_SYMBOLS_AND_PUNCTUATION");
702 
703     /**
704      * Hiragana.
705      * '\u3040' - '\u309F'.
706      */
707     public final static UnicodeBlock HIRAGANA
708       = new UnicodeBlock('\u3040', '\u309F',
709                          "HIRAGANA");
710 
711     /**
712      * Katakana.
713      * '\u30A0' - '\u30FF'.
714      */
715     public final static UnicodeBlock KATAKANA
716       = new UnicodeBlock('\u30A0', '\u30FF',
717                          "KATAKANA");
718 
719     /**
720      * Bopomofo.
721      * '\u3100' - '\u312F'.
722      */
723     public final static UnicodeBlock BOPOMOFO
724       = new UnicodeBlock('\u3100', '\u312F',
725                          "BOPOMOFO");
726 
727     /**
728      * Hangul Compatibility Jamo.
729      * '\u3130' - '\u318F'.
730      */
731     public final static UnicodeBlock HANGUL_COMPATIBILITY_JAMO
732       = new UnicodeBlock('\u3130', '\u318F',
733                          "HANGUL_COMPATIBILITY_JAMO");
734 
735     /**
736      * Kanbun.
737      * '\u3190' - '\u319F'.
738      */
739     public final static UnicodeBlock KANBUN
740       = new UnicodeBlock('\u3190', '\u319F',
741                          "KANBUN");
742 
743     /**
744      * Bopomofo Extended.
745      * '\u31A0' - '\u31BF'.
746      * @since 1.4
747      */
748     public final static UnicodeBlock BOPOMOFO_EXTENDED
749       = new UnicodeBlock('\u31A0', '\u31BF',
750                          "BOPOMOFO_EXTENDED");
751 
752     /**
753      * Enclosed CJK Letters and Months.
754      * '\u3200' - '\u32FF'.
755      */
756     public final static UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
757       = new UnicodeBlock('\u3200', '\u32FF',
758                          "ENCLOSED_CJK_LETTERS_AND_MONTHS");
759 
760     /**
761      * CJK Compatibility.
762      * '\u3300' - '\u33FF'.
763      */
764     public final static UnicodeBlock CJK_COMPATIBILITY
765       = new UnicodeBlock('\u3300', '\u33FF',
766                          "CJK_COMPATIBILITY");
767 
768     /**
769      * CJK Unified Ideographs Extension A.
770      * '\u3400' - '\u4DB5'.
771      * @since 1.4
772      */
773     public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
774       = new UnicodeBlock('\u3400', '\u4DB5',
775                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
776 
777     /**
778      * CJK Unified Ideographs.
779      * '\u4E00' - '\u9FFF'.
780      */
781     public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
782       = new UnicodeBlock('\u4E00', '\u9FFF',
783                          "CJK_UNIFIED_IDEOGRAPHS");
784 
785     /**
786      * Yi Syllables.
787      * '\uA000' - '\uA48F'.
788      * @since 1.4
789      */
790     public final static UnicodeBlock YI_SYLLABLES
791       = new UnicodeBlock('\uA000', '\uA48F',
792                          "YI_SYLLABLES");
793 
794     /**
795      * Yi Radicals.
796      * '\uA490' - '\uA4CF'.
797      * @since 1.4
798      */
799     public final static UnicodeBlock YI_RADICALS
800       = new UnicodeBlock('\uA490', '\uA4CF',
801                          "YI_RADICALS");
802 
803     /**
804      * Hangul Syllables.
805      * '\uAC00' - '\uD7A3'.
806      */
807     public final static UnicodeBlock HANGUL_SYLLABLES
808       = new UnicodeBlock('\uAC00', '\uD7A3',
809                          "HANGUL_SYLLABLES");
810 
811     /**
812      * Surrogates Area.
813      * '\uD800' - '\uDFFF'.
814      */
815     public final static UnicodeBlock SURROGATES_AREA
816       = new UnicodeBlock('\uD800', '\uDFFF',
817                          "SURROGATES_AREA");
818 
819     /**
820      * Private Use Area.
821      * '\uE000' - '\uF8FF'.
822      */
823     public final static UnicodeBlock PRIVATE_USE_AREA
824       = new UnicodeBlock('\uE000', '\uF8FF',
825                          "PRIVATE_USE_AREA");
826 
827     /**
828      * CJK Compatibility Ideographs.
829      * '\uF900' - '\uFAFF'.
830      */
831     public final static UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
832       = new UnicodeBlock('\uF900', '\uFAFF',
833                          "CJK_COMPATIBILITY_IDEOGRAPHS");
834 
835     /**
836      * Alphabetic Presentation Forms.
837      * '\uFB00' - '\uFB4F'.
838      */
839     public final static UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
840       = new UnicodeBlock('\uFB00', '\uFB4F',
841                          "ALPHABETIC_PRESENTATION_FORMS");
842 
843     /**
844      * Arabic Presentation Forms-A.
845      * '\uFB50' - '\uFDFF'.
846      */
847     public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_A
848       = new UnicodeBlock('\uFB50', '\uFDFF',
849                          "ARABIC_PRESENTATION_FORMS_A");
850 
851     /**
852      * Combining Half Marks.
853      * '\uFE20' - '\uFE2F'.
854      */
855     public final static UnicodeBlock COMBINING_HALF_MARKS
856       = new UnicodeBlock('\uFE20', '\uFE2F',
857                          "COMBINING_HALF_MARKS");
858 
859     /**
860      * CJK Compatibility Forms.
861      * '\uFE30' - '\uFE4F'.
862      */
863     public final static UnicodeBlock CJK_COMPATIBILITY_FORMS
864       = new UnicodeBlock('\uFE30', '\uFE4F',
865                          "CJK_COMPATIBILITY_FORMS");
866 
867     /**
868      * Small Form Variants.
869      * '\uFE50' - '\uFE6F'.
870      */
871     public final static UnicodeBlock SMALL_FORM_VARIANTS
872       = new UnicodeBlock('\uFE50', '\uFE6F',
873                          "SMALL_FORM_VARIANTS");
874 
875     /**
876      * Arabic Presentation Forms-B.
877      * '\uFE70' - '\uFEFE'.
878      */
879     public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_B
880       = new UnicodeBlock('\uFE70', '\uFEFE',
881                          "ARABIC_PRESENTATION_FORMS_B");
882 
883     /**
884      * Halfwidth and Fullwidth Forms.
885      * '\uFF00' - '\uFFEF'.
886      */
887     public final static UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
888       = new UnicodeBlock('\uFF00', '\uFFEF',
889                          "HALFWIDTH_AND_FULLWIDTH_FORMS");
890 
891     /**
892      * Specials.
893      * '\uFEFF', '\uFFF0' - '\uFFFD'.
894      */
895     public final static UnicodeBlock SPECIALS
896       = new UnicodeBlock('\uFFF0', '\uFFFD',
897                          "SPECIALS");
898 
899     /**
900      * The defined subsets.
901      */
902     private static final UnicodeBlock sets[] = {
903       BASIC_LATIN,
904       LATIN_1_SUPPLEMENT,
905       LATIN_EXTENDED_A,
906       LATIN_EXTENDED_B,
907       IPA_EXTENSIONS,
908       SPACING_MODIFIER_LETTERS,
909       COMBINING_DIACRITICAL_MARKS,
910       GREEK,
911       CYRILLIC,
912       ARMENIAN,
913       HEBREW,
914       ARABIC,
915       SYRIAC,
916       THAANA,
917       DEVANAGARI,
918       BENGALI,
919       GURMUKHI,
920       GUJARATI,
921       ORIYA,
922       TAMIL,
923       TELUGU,
924       KANNADA,
925       MALAYALAM,
926       SINHALA,
927       THAI,
928       LAO,
929       TIBETAN,
930       MYANMAR,
931       GEORGIAN,
932       HANGUL_JAMO,
933       ETHIOPIC,
934       CHEROKEE,
935       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
936       OGHAM,
937       RUNIC,
938       KHMER,
939       MONGOLIAN,
940       LATIN_EXTENDED_ADDITIONAL,
941       GREEK_EXTENDED,
942       GENERAL_PUNCTUATION,
943       SUPERSCRIPTS_AND_SUBSCRIPTS,
944       CURRENCY_SYMBOLS,
945       COMBINING_MARKS_FOR_SYMBOLS,
946       LETTERLIKE_SYMBOLS,
947       NUMBER_FORMS,
948       ARROWS,
949       MATHEMATICAL_OPERATORS,
950       MISCELLANEOUS_TECHNICAL,
951       CONTROL_PICTURES,
952       OPTICAL_CHARACTER_RECOGNITION,
953       ENCLOSED_ALPHANUMERICS,
954       BOX_DRAWING,
955       BLOCK_ELEMENTS,
956       GEOMETRIC_SHAPES,
957       MISCELLANEOUS_SYMBOLS,
958       DINGBATS,
959       BRAILLE_PATTERNS,
960       CJK_RADICALS_SUPPLEMENT,
961       KANGXI_RADICALS,
962       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
963       CJK_SYMBOLS_AND_PUNCTUATION,
964       HIRAGANA,
965       KATAKANA,
966       BOPOMOFO,
967       HANGUL_COMPATIBILITY_JAMO,
968       KANBUN,
969       BOPOMOFO_EXTENDED,
970       ENCLOSED_CJK_LETTERS_AND_MONTHS,
971       CJK_COMPATIBILITY,
972       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
973       CJK_UNIFIED_IDEOGRAPHS,
974       YI_SYLLABLES,
975       YI_RADICALS,
976       HANGUL_SYLLABLES,
977       SURROGATES_AREA,
978       PRIVATE_USE_AREA,
979       CJK_COMPATIBILITY_IDEOGRAPHS,
980       ALPHABETIC_PRESENTATION_FORMS,
981       ARABIC_PRESENTATION_FORMS_A,
982       COMBINING_HALF_MARKS,
983       CJK_COMPATIBILITY_FORMS,
984       SMALL_FORM_VARIANTS,
985       ARABIC_PRESENTATION_FORMS_B,
986       HALFWIDTH_AND_FULLWIDTH_FORMS,
987       SPECIALS,
988     };
989   } // class UnicodeBlock
990 
991   /**
992    * The immutable value of this Character.
993    *
994    * @serial the value of this Character
995    */
996   private final char value;
997 
998   /**
999    * Compatible with JDK 1.0+.
1000    */
1001   private static final long serialVersionUID = 3786198910865385080L;
1002 
1003   /**
1004    * Smallest value allowed for radix arguments in Java. This value is 2.
1005    *
1006    * @see #digit(char, int)
1007    * @see #forDigit(int, int)
1008    * @see Integer#toString(int, int)
1009    * @see Integer#valueOf(String)
1010    */
1011   public static final int MIN_RADIX = 2;
1012 
1013   /**
1014    * Largest value allowed for radix arguments in Java. This value is 36.
1015    *
1016    * @see #digit(char, int)
1017    * @see #forDigit(int, int)
1018    * @see Integer#toString(int, int)
1019    * @see Integer#valueOf(String)
1020    */
1021   public static final int MAX_RADIX = 36;
1022 
1023   /**
1024    * The minimum value the char data type can hold.
1025    * This value is <code>'\\u0000'</code>.
1026    */
1027   public static final char MIN_VALUE = '\u0000';
1028 
1029   /**
1030    * The maximum value the char data type can hold.
1031    * This value is <code>'\\uFFFF'</code>.
1032    */
1033   public static final char MAX_VALUE = '\uFFFF';
1034 
1035   /**
1036    * Class object representing the primitive char data type.
1037    *
1038    * @since 1.1
1039    */
1040   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1041 
1042   /**
1043    * Lu = Letter, Uppercase (Informative).
1044    *
1045    * @since 1.1
1046    */
1047   public static final byte UPPERCASE_LETTER = 1;
1048 
1049   /**
1050    * Ll = Letter, Lowercase (Informative).
1051    *
1052    * @since 1.1
1053    */
1054   public static final byte LOWERCASE_LETTER = 2;
1055 
1056   /**
1057    * Lt = Letter, Titlecase (Informative).
1058    *
1059    * @since 1.1
1060    */
1061   public static final byte TITLECASE_LETTER = 3;
1062 
1063   /**
1064    * Mn = Mark, Non-Spacing (Normative).
1065    *
1066    * @since 1.1
1067    */
1068   public static final byte NON_SPACING_MARK = 6;
1069 
1070   /**
1071    * Mc = Mark, Spacing Combining (Normative).
1072    *
1073    * @since 1.1
1074    */
1075   public static final byte COMBINING_SPACING_MARK = 8;
1076 
1077   /**
1078    * Me = Mark, Enclosing (Normative).
1079    *
1080    * @since 1.1
1081    */
1082   public static final byte ENCLOSING_MARK = 7;
1083 
1084   /**
1085    * Nd = Number, Decimal Digit (Normative).
1086    *
1087    * @since 1.1
1088    */
1089   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1090 
1091   /**
1092    * Nl = Number, Letter (Normative).
1093    *
1094    * @since 1.1
1095    */
1096   public static final byte LETTER_NUMBER = 10;
1097 
1098   /**
1099    * No = Number, Other (Normative).
1100    *
1101    * @since 1.1
1102    */
1103   public static final byte OTHER_NUMBER = 11;
1104 
1105   /**
1106    * Zs = Separator, Space (Normative).
1107    *
1108    * @since 1.1
1109    */
1110   public static final byte SPACE_SEPARATOR = 12;
1111 
1112   /**
1113    * Zl = Separator, Line (Normative).
1114    *
1115    * @since 1.1
1116    */
1117   public static final byte LINE_SEPARATOR = 13;
1118 
1119   /**
1120    * Zp = Separator, Paragraph (Normative).
1121    *
1122    * @since 1.1
1123    */
1124   public static final byte PARAGRAPH_SEPARATOR = 14;
1125 
1126   /**
1127    * Cc = Other, Control (Normative).
1128    *
1129    * @since 1.1
1130    */
1131   public static final byte CONTROL = 15;
1132 
1133   /**
1134    * Cf = Other, Format (Normative).
1135    *
1136    * @since 1.1
1137    */
1138   public static final byte FORMAT = 16;
1139 
1140   /**
1141    * Cs = Other, Surrogate (Normative).
1142    *
1143    * @since 1.1
1144    */
1145   public static final byte SURROGATE = 19;
1146 
1147   /**
1148    * Co = Other, Private Use (Normative).
1149    *
1150    * @since 1.1
1151    */
1152   public static final byte PRIVATE_USE = 18;
1153 
1154   /**
1155    * Cn = Other, Not Assigned (Normative).
1156    *
1157    * @since 1.1
1158    */
1159   public static final byte UNASSIGNED = 0;
1160 
1161   /**
1162    * Lm = Letter, Modifier (Informative).
1163    *
1164    * @since 1.1
1165    */
1166   public static final byte MODIFIER_LETTER = 4;
1167 
1168   /**
1169    * Lo = Letter, Other (Informative).
1170    *
1171    * @since 1.1
1172    */
1173   public static final byte OTHER_LETTER = 5;
1174 
1175   /**
1176    * Pc = Punctuation, Connector (Informative).
1177    *
1178    * @since 1.1
1179    */
1180   public static final byte CONNECTOR_PUNCTUATION = 23;
1181 
1182   /**
1183    * Pd = Punctuation, Dash (Informative).
1184    *
1185    * @since 1.1
1186    */
1187   public static final byte DASH_PUNCTUATION = 20;
1188 
1189   /**
1190    * Ps = Punctuation, Open (Informative).
1191    *
1192    * @since 1.1
1193    */
1194   public static final byte START_PUNCTUATION = 21;
1195 
1196   /**
1197    * Pe = Punctuation, Close (Informative).
1198    *
1199    * @since 1.1
1200    */
1201   public static final byte END_PUNCTUATION = 22;
1202 
1203   /**
1204    * Pi = Punctuation, Initial Quote (Informative).
1205    *
1206    * @since 1.4
1207    */
1208   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1209 
1210   /**
1211    * Pf = Punctuation, Final Quote (Informative).
1212    *
1213    * @since 1.4
1214    */
1215   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1216 
1217   /**
1218    * Po = Punctuation, Other (Informative).
1219    *
1220    * @since 1.1
1221    */
1222   public static final byte OTHER_PUNCTUATION = 24;
1223 
1224   /**
1225    * Sm = Symbol, Math (Informative).
1226    *
1227    * @since 1.1
1228    */
1229   public static final byte MATH_SYMBOL = 25;
1230 
1231   /**
1232    * Sc = Symbol, Currency (Informative).
1233    *
1234    * @since 1.1
1235    */
1236   public static final byte CURRENCY_SYMBOL = 26;
1237 
1238   /**
1239    * Sk = Symbol, Modifier (Informative).
1240    *
1241    * @since 1.1
1242    */
1243   public static final byte MODIFIER_SYMBOL = 27;
1244 
1245   /**
1246    * So = Symbol, Other (Informative).
1247    *
1248    * @since 1.1
1249    */
1250   public static final byte OTHER_SYMBOL = 28;
1251 
1252   /**
1253    * Undefined bidirectional character type. Undefined char values have
1254    * undefined directionality in the Unicode specification.
1255    *
1256    * @since 1.4
1257    */
1258   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1259 
1260   /**
1261    * Strong bidirectional character type "L".
1262    *
1263    * @since 1.4
1264    */
1265   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1266 
1267   /**
1268    * Strong bidirectional character type "R".
1269    *
1270    * @since 1.4
1271    */
1272   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1273 
1274   /**
1275    * Strong bidirectional character type "AL".
1276    *
1277    * @since 1.4
1278    */
1279   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1280 
1281   /**
1282    * Weak bidirectional character type "EN".
1283    *
1284    * @since 1.4
1285    */
1286   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1287 
1288   /**
1289    * Weak bidirectional character type "ES".
1290    *
1291    * @since 1.4
1292    */
1293   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1294 
1295   /**
1296    * Weak bidirectional character type "ET".
1297    *
1298    * @since 1.4
1299    */
1300   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1301 
1302   /**
1303    * Weak bidirectional character type "AN".
1304    *
1305    * @since 1.4
1306    */
1307   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1308 
1309   /**
1310    * Weak bidirectional character type "CS".
1311    *
1312    * @since 1.4
1313    */
1314   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1315 
1316   /**
1317    * Weak bidirectional character type "NSM".
1318    *
1319    * @since 1.4
1320    */
1321   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1322 
1323   /**
1324    * Weak bidirectional character type "BN".
1325    *
1326    * @since 1.4
1327    */
1328   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1329 
1330   /**
1331    * Neutral bidirectional character type "B".
1332    *
1333    * @since 1.4
1334    */
1335   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1336 
1337   /**
1338    * Neutral bidirectional character type "S".
1339    *
1340    * @since 1.4
1341    */
1342   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1343 
1344   /**
1345    * Strong bidirectional character type "WS".
1346    *
1347    * @since 1.4
1348    */
1349   public static final byte DIRECTIONALITY_WHITESPACE = 12;
1350 
1351   /**
1352    * Neutral bidirectional character type "ON".
1353    *
1354    * @since 1.4
1355    */
1356   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1357 
1358   /**
1359    * Strong bidirectional character type "LRE".
1360    *
1361    * @since 1.4
1362    */
1363   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1364 
1365   /**
1366    * Strong bidirectional character type "LRO".
1367    *
1368    * @since 1.4
1369    */
1370   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1371 
1372   /**
1373    * Strong bidirectional character type "RLE".
1374    *
1375    * @since 1.4
1376    */
1377   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1378 
1379   /**
1380    * Strong bidirectional character type "RLO".
1381    *
1382    * @since 1.4
1383    */
1384   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1385 
1386   /**
1387    * Weak bidirectional character type "PDF".
1388    *
1389    * @since 1.4
1390    */
1391   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1392 
1393   /**
1394    * Mask for grabbing the type out of the result of readChar.
1395    * @see #readChar(char)
1396    */
1397   private static final int TYPE_MASK = 0x1F;
1398 
1399   /**
1400    * Mask for grabbing the non-breaking space flag out of the result of
1401    * readChar.
1402    * @see #readChar(char)
1403    */
1404   private static final int NO_BREAK_MASK = 0x20;
1405 
1406   /**
1407    * Mask for grabbing the mirrored directionality flag out of the result
1408    * of readChar.
1409    * @see #readChar(char)
1410    */
1411   private static final int MIRROR_MASK = 0x40;
1412 
1413   /**
1414    * Grabs an attribute offset from the Unicode attribute database. The lower
1415    * 5 bits are the character type, the next 2 bits are flags, and the top
1416    * 9 bits are the offset into the attribute tables. Note that the top 9
1417    * bits are meaningless in this context; they are useful only in the native
1418    * code.
1419    *
1420    * @param ch the character to look up
1421    * @return the character's attribute offset and type
1422    * @see #TYPE_MASK
1423    * @see #NO_BREAK_MASK
1424    * @see #MIRROR_MASK
1425    */
readChar(char ch)1426   private static native char readChar(char ch);
1427 
1428   /**
1429    * Wraps up a character.
1430    *
1431    * @param value the character to wrap
1432    */
Character(char value)1433   public Character(char value)
1434   {
1435     this.value = value;
1436   }
1437 
1438   /**
1439    * Returns the character which has been wrapped by this class.
1440    *
1441    * @return the character wrapped
1442    */
charValue()1443   public char charValue()
1444   {
1445     return value;
1446   }
1447 
1448   /**
1449    * Returns the numerical value (unsigned) of the wrapped character.
1450    * Range of returned values: 0x0000-0xFFFF.
1451    *
1452    * @return the value of the wrapped character
1453    */
hashCode()1454   public int hashCode()
1455   {
1456     return value;
1457   }
1458 
1459   /**
1460    * Determines if an object is equal to this object. This is only true for
1461    * another Character object wrapping the same value.
1462    *
1463    * @param o object to compare
1464    * @return true if o is a Character with the same value
1465    */
equals(Object o)1466   public boolean equals(Object o)
1467   {
1468     return o instanceof Character && value == ((Character) o).value;
1469   }
1470 
1471   /**
1472    * Converts the wrapped character into a String.
1473    *
1474    * @return a String containing one character -- the wrapped character
1475    *         of this instance
1476    */
toString()1477   public String toString()
1478   {
1479     // This assumes that String.valueOf(char) can create a single-character
1480     // String more efficiently than through the public API.
1481     return String.valueOf(value);
1482   }
1483 
1484   /**
1485    * Returns a String of length 1 representing the specified character.
1486    *
1487    * @param ch the character to convert
1488    * @return a String containing the character
1489    * @since 1.4
1490    */
toString(char ch)1491   public static String toString(char ch)
1492   {
1493     // This assumes that String.valueOf(char) can create a single-character
1494     // String more efficiently than through the public API.
1495     return String.valueOf(ch);
1496   }
1497 
1498   /**
1499    * Determines if a character is a Unicode lowercase letter. For example,
1500    * <code>'a'</code> is lowercase.
1501    * <br>
1502    * lowercase = [Ll]
1503    *
1504    * @param ch character to test
1505    * @return true if ch is a Unicode lowercase letter, else false
1506    * @see #isUpperCase(char)
1507    * @see #isTitleCase(char)
1508    * @see #toLowerCase(char)
1509    * @see #getType(char)
1510    */
isLowerCase(char ch)1511   public static boolean isLowerCase(char ch)
1512   {
1513     return getType(ch) == LOWERCASE_LETTER;
1514   }
1515 
1516   /**
1517    * Determines if a character is a Unicode uppercase letter. For example,
1518    * <code>'A'</code> is uppercase.
1519    * <br>
1520    * uppercase = [Lu]
1521    *
1522    * @param ch character to test
1523    * @return true if ch is a Unicode uppercase letter, else false
1524    * @see #isLowerCase(char)
1525    * @see #isTitleCase(char)
1526    * @see #toUpperCase(char)
1527    * @see #getType(char)
1528    */
isUpperCase(char ch)1529   public static boolean isUpperCase(char ch)
1530   {
1531     return getType(ch) == UPPERCASE_LETTER;
1532   }
1533 
1534   /**
1535    * Determines if a character is a Unicode titlecase letter. For example,
1536    * the character "Lj" (Latin capital L with small letter j) is titlecase.
1537    * <br>
1538    * titlecase = [Lt]
1539    *
1540    * @param ch character to test
1541    * @return true if ch is a Unicode titlecase letter, else false
1542    * @see #isLowerCase(char)
1543    * @see #isUpperCase(char)
1544    * @see #toTitleCase(char)
1545    * @see #getType(char)
1546    */
isTitleCase(char ch)1547   public static boolean isTitleCase(char ch)
1548   {
1549     return getType(ch) == TITLECASE_LETTER;
1550   }
1551 
1552   /**
1553    * Determines if a character is a Unicode decimal digit. For example,
1554    * <code>'0'</code> is a digit.
1555    * <br>
1556    * Unicode decimal digit = [Nd]
1557    *
1558    * @param ch character to test
1559    * @return true if ch is a Unicode decimal digit, else false
1560    * @see #digit(char, int)
1561    * @see #forDigit(int, int)
1562    * @see #getType(char)
1563    */
isDigit(char ch)1564   public static boolean isDigit(char ch)
1565   {
1566     return getType(ch) == DECIMAL_DIGIT_NUMBER;
1567   }
1568 
1569   /**
1570    * Determines if a character is part of the Unicode Standard. This is an
1571    * evolving standard, but covers every character in the data file.
1572    * <br>
1573    * defined = not [Cn]
1574    *
1575    * @param ch character to test
1576    * @return true if ch is a Unicode character, else false
1577    * @see #isDigit(char)
1578    * @see #isLetter(char)
1579    * @see #isLetterOrDigit(char)
1580    * @see #isLowerCase(char)
1581    * @see #isTitleCase(char)
1582    * @see #isUpperCase(char)
1583    */
isDefined(char ch)1584   public static boolean isDefined(char ch)
1585   {
1586     return getType(ch) != UNASSIGNED;
1587   }
1588 
1589   /**
1590    * Determines if a character is a Unicode letter. Not all letters have case,
1591    * so this may return true when isLowerCase and isUpperCase return false.
1592    * <br>
1593    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
1594    *
1595    * @param ch character to test
1596    * @return true if ch is a Unicode letter, else false
1597    * @see #isDigit(char)
1598    * @see #isJavaIdentifierStart(char)
1599    * @see #isJavaLetter(char)
1600    * @see #isJavaLetterOrDigit(char)
1601    * @see #isLetterOrDigit(char)
1602    * @see #isLowerCase(char)
1603    * @see #isTitleCase(char)
1604    * @see #isUnicodeIdentifierStart(char)
1605    * @see #isUpperCase(char)
1606    */
isLetter(char ch)1607   public static boolean isLetter(char ch)
1608   {
1609     return ((1 << getType(ch))
1610             & ((1 << UPPERCASE_LETTER)
1611                | (1 << LOWERCASE_LETTER)
1612                | (1 << TITLECASE_LETTER)
1613                | (1 << MODIFIER_LETTER)
1614                | (1 << OTHER_LETTER))) != 0;
1615   }
1616 
1617   /**
1618    * Determines if a character is a Unicode letter or a Unicode digit. This
1619    * is the combination of isLetter and isDigit.
1620    * <br>
1621    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
1622    *
1623    * @param ch character to test
1624    * @return true if ch is a Unicode letter or a Unicode digit, else false
1625    * @see #isDigit(char)
1626    * @see #isJavaIdentifierPart(char)
1627    * @see #isJavaLetter(char)
1628    * @see #isJavaLetterOrDigit(char)
1629    * @see #isLetter(char)
1630    * @see #isUnicodeIdentifierPart(char)
1631    */
isLetterOrDigit(char ch)1632   public static boolean isLetterOrDigit(char ch)
1633   {
1634     return ((1 << getType(ch))
1635             & ((1 << UPPERCASE_LETTER)
1636                | (1 << LOWERCASE_LETTER)
1637                | (1 << TITLECASE_LETTER)
1638                | (1 << MODIFIER_LETTER)
1639                | (1 << OTHER_LETTER)
1640                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
1641   }
1642 
1643   /**
1644    * Determines if a character can start a Java identifier. This is the
1645    * combination of isLetter, any character where getType returns
1646    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1647    * (like '_').
1648    *
1649    * @param ch character to test
1650    * @return true if ch can start a Java identifier, else false
1651    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
1652    * @see #isJavaLetterOrDigit(char)
1653    * @see #isJavaIdentifierStart(char)
1654    * @see #isJavaIdentifierPart(char)
1655    * @see #isLetter(char)
1656    * @see #isLetterOrDigit(char)
1657    * @see #isUnicodeIdentifierStart(char)
1658    */
isJavaLetter(char ch)1659   public static boolean isJavaLetter(char ch)
1660   {
1661     return isJavaIdentifierStart(ch);
1662   }
1663 
1664   /**
1665    * Determines if a character can follow the first letter in
1666    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1667    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1668    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1669    * or isIdentifierIgnorable.
1670    *
1671    * @param ch character to test
1672    * @return true if ch can follow the first letter in a Java identifier
1673    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
1674    * @see #isJavaLetter(char)
1675    * @see #isJavaIdentifierStart(char)
1676    * @see #isJavaIdentifierPart(char)
1677    * @see #isLetter(char)
1678    * @see #isLetterOrDigit(char)
1679    * @see #isUnicodeIdentifierPart(char)
1680    * @see #isIdentifierIgnorable(char)
1681    */
isJavaLetterOrDigit(char ch)1682   public static boolean isJavaLetterOrDigit(char ch)
1683   {
1684     return isJavaIdentifierPart(ch);
1685   }
1686 
1687   /**
1688    * Determines if a character can start a Java identifier. This is the
1689    * combination of isLetter, any character where getType returns
1690    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1691    * (like '_').
1692    * <br>
1693    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
1694    *
1695    * @param ch character to test
1696    * @return true if ch can start a Java identifier, else false
1697    * @see #isJavaIdentifierPart(char)
1698    * @see #isLetter(char)
1699    * @see #isUnicodeIdentifierStart(char)
1700    * @since 1.1
1701    */
isJavaIdentifierStart(char ch)1702   public static boolean isJavaIdentifierStart(char ch)
1703   {
1704     return ((1 << getType(ch))
1705             & ((1 << UPPERCASE_LETTER)
1706                | (1 << LOWERCASE_LETTER)
1707                | (1 << TITLECASE_LETTER)
1708                | (1 << MODIFIER_LETTER)
1709                | (1 << OTHER_LETTER)
1710                | (1 << LETTER_NUMBER)
1711                | (1 << CURRENCY_SYMBOL)
1712                | (1 << CONNECTOR_PUNCTUATION))) != 0;
1713   }
1714 
1715   /**
1716    * Determines if a character can follow the first letter in
1717    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1718    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1719    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1720    * or isIdentifierIgnorable.
1721    * <br>
1722    * Java identifier extender =
1723    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
1724    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1725    *
1726    * @param ch character to test
1727    * @return true if ch can follow the first letter in a Java identifier
1728    * @see #isIdentifierIgnorable(char)
1729    * @see #isJavaIdentifierStart(char)
1730    * @see #isLetterOrDigit(char)
1731    * @see #isUnicodeIdentifierPart(char)
1732    * @since 1.1
1733    */
isJavaIdentifierPart(char ch)1734   public static boolean isJavaIdentifierPart(char ch)
1735   {
1736     int category = getType(ch);
1737     return ((1 << category)
1738             & ((1 << UPPERCASE_LETTER)
1739                | (1 << LOWERCASE_LETTER)
1740                | (1 << TITLECASE_LETTER)
1741                | (1 << MODIFIER_LETTER)
1742                | (1 << OTHER_LETTER)
1743                | (1 << NON_SPACING_MARK)
1744                | (1 << COMBINING_SPACING_MARK)
1745                | (1 << DECIMAL_DIGIT_NUMBER)
1746                | (1 << LETTER_NUMBER)
1747                | (1 << CURRENCY_SYMBOL)
1748                | (1 << CONNECTOR_PUNCTUATION)
1749                | (1 << FORMAT))) != 0
1750       || (category == CONTROL && isIdentifierIgnorable(ch));
1751   }
1752 
1753   /**
1754    * Determines if a character can start a Unicode identifier.  Only
1755    * letters can start a Unicode identifier, but this includes characters
1756    * in LETTER_NUMBER.
1757    * <br>
1758    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
1759    *
1760    * @param ch character to test
1761    * @return true if ch can start a Unicode identifier, else false
1762    * @see #isJavaIdentifierStart(char)
1763    * @see #isLetter(char)
1764    * @see #isUnicodeIdentifierPart(char)
1765    * @since 1.1
1766    */
isUnicodeIdentifierStart(char ch)1767   public static boolean isUnicodeIdentifierStart(char ch)
1768   {
1769     return ((1 << getType(ch))
1770             & ((1 << UPPERCASE_LETTER)
1771                | (1 << LOWERCASE_LETTER)
1772                | (1 << TITLECASE_LETTER)
1773                | (1 << MODIFIER_LETTER)
1774                | (1 << OTHER_LETTER)
1775                | (1 << LETTER_NUMBER))) != 0;
1776   }
1777 
1778   /**
1779    * Determines if a character can follow the first letter in
1780    * a Unicode identifier. This includes letters, connecting punctuation,
1781    * digits, numeric letters, combining marks, non-spacing marks, and
1782    * isIdentifierIgnorable.
1783    * <br>
1784    * Unicode identifier extender =
1785    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
1786    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1787    *
1788    * @param ch character to test
1789    * @return true if ch can follow the first letter in a Unicode identifier
1790    * @see #isIdentifierIgnorable(char)
1791    * @see #isJavaIdentifierPart(char)
1792    * @see #isLetterOrDigit(char)
1793    * @see #isUnicodeIdentifierStart(char)
1794    * @since 1.1
1795    */
isUnicodeIdentifierPart(char ch)1796   public static boolean isUnicodeIdentifierPart(char ch)
1797   {
1798     int category = getType(ch);
1799     return ((1 << category)
1800             & ((1 << UPPERCASE_LETTER)
1801                | (1 << LOWERCASE_LETTER)
1802                | (1 << TITLECASE_LETTER)
1803                | (1 << MODIFIER_LETTER)
1804                | (1 << OTHER_LETTER)
1805                | (1 << NON_SPACING_MARK)
1806                | (1 << COMBINING_SPACING_MARK)
1807                | (1 << DECIMAL_DIGIT_NUMBER)
1808                | (1 << LETTER_NUMBER)
1809                | (1 << CONNECTOR_PUNCTUATION)
1810                | (1 << FORMAT))) != 0
1811       || (category == CONTROL && isIdentifierIgnorable(ch));
1812   }
1813 
1814   /**
1815    * Determines if a character is ignorable in a Unicode identifier. This
1816    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
1817    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
1818    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
1819    * <code>'\u009F'</code>), and FORMAT characters.
1820    * <br>
1821    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
1822    *    |U+007F-U+009F
1823    *
1824    * @param ch character to test
1825    * @return true if ch is ignorable in a Unicode or Java identifier
1826    * @see #isJavaIdentifierPart(char)
1827    * @see #isUnicodeIdentifierPart(char)
1828    * @since 1.1
1829    */
isIdentifierIgnorable(char ch)1830   public static boolean isIdentifierIgnorable(char ch)
1831   {
1832     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
1833                                || (ch <= '\u001B' && ch >= '\u000E')))
1834       || getType(ch) == FORMAT;
1835   }
1836 
1837   /**
1838    * Converts a Unicode character into its lowercase equivalent mapping.
1839    * If a mapping does not exist, then the character passed is returned.
1840    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
1841    *
1842    * @param ch character to convert to lowercase
1843    * @return lowercase mapping of ch, or ch if lowercase mapping does
1844    *         not exist
1845    * @see #isLowerCase(char)
1846    * @see #isUpperCase(char)
1847    * @see #toTitleCase(char)
1848    * @see #toUpperCase(char)
1849    */
toLowerCase(char ch)1850   public static native char toLowerCase(char ch);
1851 
1852   /**
1853    * Converts a Unicode character into its uppercase equivalent mapping.
1854    * If a mapping does not exist, then the character passed is returned.
1855    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
1856    *
1857    * @param ch character to convert to uppercase
1858    * @return uppercase mapping of ch, or ch if uppercase mapping does
1859    *         not exist
1860    * @see #isLowerCase(char)
1861    * @see #isUpperCase(char)
1862    * @see #toLowerCase(char)
1863    * @see #toTitleCase(char)
1864    */
toUpperCase(char ch)1865   public static native char toUpperCase(char ch);
1866 
1867   /**
1868    * Converts a Unicode character into its titlecase equivalent mapping.
1869    * If a mapping does not exist, then the character passed is returned.
1870    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
1871    *
1872    * @param ch character to convert to titlecase
1873    * @return titlecase mapping of ch, or ch if titlecase mapping does
1874    *         not exist
1875    * @see #isTitleCase(char)
1876    * @see #toLowerCase(char)
1877    * @see #toUpperCase(char)
1878    */
toTitleCase(char ch)1879   public static native char toTitleCase(char ch);
1880 
1881   /**
1882    * Converts a character into a digit of the specified radix. If the radix
1883    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
1884    * exceeds the radix, or if ch is not a decimal digit or in the case
1885    * insensitive set of 'a'-'z', the result is -1.
1886    * <br>
1887    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
1888    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
1889    *
1890    * @param ch character to convert into a digit
1891    * @param radix radix in which ch is a digit
1892    * @return digit which ch represents in radix, or -1 not a valid digit
1893    * @see #MIN_RADIX
1894    * @see #MAX_RADIX
1895    * @see #forDigit(int, int)
1896    * @see #isDigit(char)
1897    * @see #getNumericValue(char)
1898    */
digit(char ch, int radix)1899   public static native int digit(char ch, int radix);
1900 
1901   /**
1902    * Returns the Unicode numeric value property of a character. For example,
1903    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
1904    *
1905    * <p>This method also returns values for the letters A through Z, (not
1906    * specified by Unicode), in these ranges: <code>'\u0041'</code>
1907    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
1908    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
1909    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
1910    * <code>'\uFF5A'</code> (full width variants).
1911    *
1912    * <p>If the character lacks a numeric value property, -1 is returned.
1913    * If the character has a numeric value property which is not representable
1914    * as a nonnegative integer, such as a fraction, -2 is returned.
1915    *
1916    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
1917    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
1918    *
1919    * @param ch character from which the numeric value property will
1920    *        be retrieved
1921    * @return the numeric value property of ch, or -1 if it does not exist, or
1922    *         -2 if it is not representable as a nonnegative integer
1923    * @see #forDigit(int, int)
1924    * @see #digit(char, int)
1925    * @see #isDigit(char)
1926    * @since 1.1
1927    */
getNumericValue(char ch)1928   public static native int getNumericValue(char ch);
1929 
1930   /**
1931    * Determines if a character is a ISO-LATIN-1 space. This is only the five
1932    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
1933    * <code>'\r'</code>, and <code>' '</code>.
1934    * <br>
1935    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
1936    *
1937    * @param ch character to test
1938    * @return true if ch is a space, else false
1939    * @deprecated Replaced by {@link #isWhitespace(char)}
1940    * @see #isSpaceChar(char)
1941    * @see #isWhitespace(char)
1942    */
isSpace(char ch)1943   public static boolean isSpace(char ch)
1944   {
1945     // Performing the subtraction up front alleviates need to compare longs.
1946     return ch-- <= ' ' && ((1 << ch)
1947                            & ((1 << (' ' - 1))
1948                               | (1 << ('\t' - 1))
1949                               | (1 << ('\n' - 1))
1950                               | (1 << ('\r' - 1))
1951                               | (1 << ('\f' - 1)))) != 0;
1952   }
1953 
1954   /**
1955    * Determines if a character is a Unicode space character. This includes
1956    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
1957    * <br>
1958    * Unicode space = [Zs]|[Zp]|[Zl]
1959    *
1960    * @param ch character to test
1961    * @return true if ch is a Unicode space, else false
1962    * @see #isWhitespace(char)
1963    * @since 1.1
1964    */
isSpaceChar(char ch)1965   public static boolean isSpaceChar(char ch)
1966   {
1967     return ((1 << getType(ch))
1968             & ((1 << SPACE_SEPARATOR)
1969                | (1 << LINE_SEPARATOR)
1970                | (1 << PARAGRAPH_SEPARATOR))) != 0;
1971   }
1972 
1973   /**
1974    * Determines if a character is Java whitespace. This includes Unicode
1975    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
1976    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
1977    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
1978    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
1979    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
1980    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
1981    * and <code>'\u001F'</code>.
1982    * <br>
1983    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
1984    *
1985    * @param ch character to test
1986    * @return true if ch is Java whitespace, else false
1987    * @see #isSpaceChar(char)
1988    * @since 1.1
1989    */
isWhitespace(char ch)1990   public static boolean isWhitespace(char ch)
1991   {
1992     int attr = readChar(ch);
1993     return ((((1 << (attr & TYPE_MASK))
1994               & ((1 << SPACE_SEPARATOR)
1995                  | (1 << LINE_SEPARATOR)
1996                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
1997             && (attr & NO_BREAK_MASK) == 0)
1998       || (ch <= '\u001F' && ((1 << ch)
1999                              & ((1 << '\t')
2000                                 | (1 << '\n')
2001                                 | (1 << '\u000B')
2002                                 | (1 << '\u000C')
2003                                 | (1 << '\r')
2004                                 | (1 << '\u001C')
2005                                 | (1 << '\u001D')
2006                                 | (1 << '\u001E')
2007                                 | (1 << '\u001F'))) != 0);
2008   }
2009 
2010   /**
2011    * Determines if a character has the ISO Control property.
2012    * <br>
2013    * ISO Control = [Cc]
2014    *
2015    * @param ch character to test
2016    * @return true if ch is an ISO Control character, else false
2017    * @see #isSpaceChar(char)
2018    * @see #isWhitespace(char)
2019    * @since 1.1
2020    */
isISOControl(char ch)2021   public static boolean isISOControl(char ch)
2022   {
2023     return getType(ch) == CONTROL;
2024   }
2025 
2026   /**
2027    * Returns the Unicode general category property of a character.
2028    *
2029    * @param ch character from which the general category property will
2030    *        be retrieved
2031    * @return the character category property of ch as an integer
2032    * @see #UNASSIGNED
2033    * @see #UPPERCASE_LETTER
2034    * @see #LOWERCASE_LETTER
2035    * @see #TITLECASE_LETTER
2036    * @see #MODIFIER_LETTER
2037    * @see #OTHER_LETTER
2038    * @see #NON_SPACING_MARK
2039    * @see #ENCLOSING_MARK
2040    * @see #COMBINING_SPACING_MARK
2041    * @see #DECIMAL_DIGIT_NUMBER
2042    * @see #LETTER_NUMBER
2043    * @see #OTHER_NUMBER
2044    * @see #SPACE_SEPARATOR
2045    * @see #LINE_SEPARATOR
2046    * @see #PARAGRAPH_SEPARATOR
2047    * @see #CONTROL
2048    * @see #FORMAT
2049    * @see #PRIVATE_USE
2050    * @see #SURROGATE
2051    * @see #DASH_PUNCTUATION
2052    * @see #START_PUNCTUATION
2053    * @see #END_PUNCTUATION
2054    * @see #CONNECTOR_PUNCTUATION
2055    * @see #OTHER_PUNCTUATION
2056    * @see #MATH_SYMBOL
2057    * @see #CURRENCY_SYMBOL
2058    * @see #MODIFIER_SYMBOL
2059    * @see #INITIAL_QUOTE_PUNCTUATION
2060    * @see #FINAL_QUOTE_PUNCTUATION
2061    * @since 1.1
2062    */
getType(char ch)2063   public static native int getType(char ch);
2064 
2065   /**
2066    * Converts a digit into a character which represents that digit
2067    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2068    * or the digit exceeds the radix, then the null character <code>'\0'</code>
2069    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2070    * <br>
2071    * return value boundary = U+0030-U+0039|U+0061-U+007A
2072    *
2073    * @param digit digit to be converted into a character
2074    * @param radix radix of digit
2075    * @return character representing digit in radix, or '\0'
2076    * @see #MIN_RADIX
2077    * @see #MAX_RADIX
2078    * @see #digit(char, int)
2079    */
forDigit(int digit, int radix)2080   public static char forDigit(int digit, int radix)
2081   {
2082     if (radix < MIN_RADIX || radix > MAX_RADIX
2083 	|| digit < 0 || digit >= radix)
2084       return '\0';
2085     return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
2086   }
2087 
2088   /**
2089    * Returns the Unicode directionality property of the character. This
2090    * is used in the visual ordering of text.
2091    *
2092    * @param ch the character to look up
2093    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2094    * @see #DIRECTIONALITY_UNDEFINED
2095    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2096    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2097    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2098    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2099    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2100    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2101    * @see #DIRECTIONALITY_ARABIC_NUMBER
2102    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2103    * @see #DIRECTIONALITY_NONSPACING_MARK
2104    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2105    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2106    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2107    * @see #DIRECTIONALITY_WHITESPACE
2108    * @see #DIRECTIONALITY_OTHER_NEUTRALS
2109    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2110    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2111    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2112    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2113    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2114    * @since 1.4
2115    */
getDirectionality(char ch)2116   public static native byte getDirectionality(char ch);
2117 
2118   /**
2119    * Determines whether the character is mirrored according to Unicode. For
2120    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2121    * left-to-right text, but ')' in right-to-left text.
2122    *
2123    * @param ch the character to look up
2124    * @return true if the character is mirrored
2125    * @since 1.4
2126    */
isMirrored(char ch)2127   public static boolean isMirrored(char ch)
2128   {
2129     return (readChar(ch) & MIRROR_MASK) != 0;
2130   }
2131 
2132   /**
2133    * Compares another Character to this Character, numerically.
2134    *
2135    * @param anotherCharacter Character to compare with this Character
2136    * @return a negative integer if this Character is less than
2137    *         anotherCharacter, zero if this Character is equal, and
2138    *         a positive integer if this Character is greater
2139    * @throws NullPointerException if anotherCharacter is null
2140    * @since 1.2
2141    */
compareTo(Character anotherCharacter)2142   public int compareTo(Character anotherCharacter)
2143   {
2144     return value - anotherCharacter.value;
2145   }
2146 
2147   /**
2148    * Compares an object to this Character.  Assuming the object is a
2149    * Character object, this method performs the same comparison as
2150    * compareTo(Character).
2151    *
2152    * @param o object to compare
2153    * @return the comparison value
2154    * @throws ClassCastException if o is not a Character object
2155    * @throws NullPointerException if o is null
2156    * @see #compareTo(Character)
2157    * @since 1.2
2158    */
compareTo(Object o)2159   public int compareTo(Object o)
2160   {
2161     return compareTo((Character) o);
2162   }
2163 } // class Character
2164