1/*
2 * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables found in
29    Java.lang.Character. */
30
31class CharacterData02 extends CharacterData {
32    /* The character properties are currently encoded into 32 bits in the following manner:
33        1 bit   mirrored property
34        4 bits  directionality property
35        9 bits  signed offset used for converting case
36        1 bit   if 1, adding the signed offset converts the character to lowercase
37        1 bit   if 1, subtracting the signed offset converts the character to uppercase
38        1 bit   if 1, this character has a titlecase equivalent (possibly itself)
39        3 bits  0  may not be part of an identifier
40                1  ignorable control; may continue a Unicode identifier or Java identifier
41                2  may continue a Java identifier but not a Unicode identifier (unused)
42                3  may continue a Unicode identifier or Java identifier
43                4  is a Java whitespace character
44                5  may start or continue a Java identifier;
45                   may continue but not start a Unicode identifier (underscores)
46                6  may start or continue a Java identifier but not a Unicode identifier ($)
47                7  may start or continue a Unicode identifier or Java identifier
48                Thus:
49                   5, 6, 7 may start a Java identifier
50                   1, 2, 3, 5, 6, 7 may continue a Java identifier
51                   7 may start a Unicode identifier
52                   1, 3, 5, 7 may continue a Unicode identifier
53                   1 is ignorable within an identifier
54                   4 is Java whitespace
55        2 bits  0  this character has no numeric property
56                1  adding the digit offset to the character code and then
57                   masking with 0x1F will produce the desired numeric value
58                2  this character has a "strange" numeric value
59                3  a Java supradecimal digit: adding the digit offset to the
60                   character code, then masking with 0x1F, then adding 10
61                   will produce the desired numeric value
62        5 bits  digit offset
63        5 bits  character type
64
65        The encoding of character properties is subject to change at any time.
66     */
67
68    int getProperties(int ch) {
69	char offset = (char)ch;
70        int props = $$Lookup(offset);
71        return props;
72    }
73
74    int getPropertiesEx(int ch) {
75        char offset = (char)ch;
76        int props = $$LookupEx(offset);
77        return props;
78    }
79
80    boolean isOtherAlphabetic(int ch) {
81        int props = getPropertiesEx(ch);
82        return (props & $$maskOtherAlphabetic) != 0;
83    }
84
85    boolean isIdeographic(int ch) {
86        int props = getPropertiesEx(ch);
87        return (props & $$maskIdeographic) != 0;
88    }
89
90    int getType(int ch) {
91        int props = getProperties(ch);
92        return (props & $$maskType);
93    }
94
95    boolean isJavaIdentifierStart(int ch) {
96        int props = getProperties(ch);
97        return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
98    }
99
100    boolean isJavaIdentifierPart(int ch) {
101        int props = getProperties(ch);
102        return ((props & $$nonzeroJavaPart) != 0);
103    }
104
105    boolean isUnicodeIdentifierStart(int ch) {
106        return (getPropertiesEx(ch) & $$maskIDStart) != 0;
107    }
108
109    boolean isUnicodeIdentifierPart(int ch) {
110        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
111               isIdentifierIgnorable(ch);
112    }
113
114    boolean isIdentifierIgnorable(int ch) {
115        int props = getProperties(ch);
116        return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
117    }
118
119    int toLowerCase(int ch) {
120        int mapChar = ch;
121        int val = getProperties(ch);
122
123        if ((val & $$maskLowerCase) != 0) {
124            int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
125            mapChar = ch + offset;
126        }
127        return mapChar;
128    }
129
130    int toUpperCase(int ch) {
131        int mapChar = ch;
132        int val = getProperties(ch);
133
134        if ((val & $$maskUpperCase) != 0) {
135            int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
136            mapChar =  ch - offset;
137        }
138        return mapChar;
139    }
140
141    int toTitleCase(int ch) {
142        int mapChar = ch;
143        int val = getProperties(ch);
144
145        if ((val & $$maskTitleCase) != 0) {
146            // There is a titlecase equivalent.  Perform further checks:
147            if ((val & $$maskUpperCase) == 0) {
148                // The character does not have an uppercase equivalent, so it must
149                // already be uppercase; so add 1 to get the titlecase form.
150                mapChar = ch + 1;
151            }
152            else if ((val & $$maskLowerCase) == 0) {
153                // The character does not have a lowercase equivalent, so it must
154                // already be lowercase; so subtract 1 to get the titlecase form.
155                mapChar = ch - 1;
156            }
157            // else {
158            // The character has both an uppercase equivalent and a lowercase
159            // equivalent, so it must itself be a titlecase form; return it.
160            // return ch;
161            //}
162        }
163        else if ((val & $$maskUpperCase) != 0) {
164            // This character has no titlecase equivalent but it does have an
165            // uppercase equivalent, so use that (subtract the signed case offset).
166            mapChar = toUpperCase(ch);
167        }
168        return mapChar;
169    }
170
171    int digit(int ch, int radix) {
172        int value = -1;
173        if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
174            int val = getProperties(ch);
175            int kind = val & $$maskType;
176            if (kind == Character.DECIMAL_DIGIT_NUMBER) {
177                value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
178            }
179            else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
180                // Java supradecimal digit
181                value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
182            }
183        }
184        return (value < radix) ? value : -1;
185    }
186
187    int getNumericValue(int ch) {
188        int val = getProperties(ch);
189        int retval = -1;
190
191        switch (val & $$maskNumericType) {
192        default: // cannot occur
193        case ($$valueNotNumeric):         // not numeric
194            retval = -1;
195            break;
196        case ($$valueDigit):              // simple numeric
197            retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
198            break;
199        case ($$valueStrangeNumeric)      :       // "strange" numeric
200            retval = -2;
201            break;
202        case ($$valueJavaSupradecimal):           // Java supradecimal
203            retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
204            break;
205        }
206        return retval;
207    }
208
209    boolean isDigit(int ch) {
210        int props = getProperties(ch);
211        return (props & $$maskType) == Character.DECIMAL_DIGIT_NUMBER;
212    }
213
214    boolean isLowerCase(int ch) {
215        return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0;
216    }
217
218    boolean isUpperCase(int ch) {
219        return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0;
220    }
221
222    boolean isWhitespace(int ch) {
223        return (getProperties(ch) & $$maskIdentifierInfo) == $$valueJavaWhitespace;
224    }
225
226    byte getDirectionality(int ch) {
227        int val = getProperties(ch);
228        byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
229        if (directionality == 0xF ) {
230	        directionality = Character.DIRECTIONALITY_UNDEFINED;
231        }
232        return directionality;
233    }
234
235    boolean isMirrored(int ch) {
236        return (getProperties(ch) & $$maskMirrored) != 0;
237    }
238
239    static final CharacterData instance = new CharacterData02();
240    private CharacterData02() {};
241
242    $$Tables
243
244    static {
245        $$Initializers
246    }
247}
248