1 /*
2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.xpath.regex;
22 
23 import java.util.HashMap;
24 import java.util.Locale;
25 import java.util.Map;
26 
27 /**
28  * A regular expression parser for the XML Schema.
29  *
30  * @xerces.internal
31  *
32  * @author TAMURA Kent <kent@trl.ibm.co.jp>
33  */
34 class ParserForXMLSchema extends RegexParser {
35 
ParserForXMLSchema()36     public ParserForXMLSchema() {
37         //this.setLocale(Locale.getDefault());
38     }
ParserForXMLSchema(Locale locale)39     public ParserForXMLSchema(Locale locale) {
40         super(locale);
41     }
42 
processCaret()43     Token processCaret() throws ParseException {
44         this.next();
45         return Token.createChar('^');
46     }
processDollar()47     Token processDollar() throws ParseException {
48         this.next();
49         return Token.createChar('$');
50      }
processLookahead()51     Token processLookahead() throws ParseException {
52         throw ex("parser.process.1", this.offset);
53     }
processNegativelookahead()54     Token processNegativelookahead() throws ParseException {
55         throw ex("parser.process.1", this.offset);
56     }
processLookbehind()57     Token processLookbehind() throws ParseException {
58         throw ex("parser.process.1", this.offset);
59     }
processNegativelookbehind()60     Token processNegativelookbehind() throws ParseException {
61         throw ex("parser.process.1", this.offset);
62     }
processBacksolidus_A()63     Token processBacksolidus_A() throws ParseException {
64         throw ex("parser.process.1", this.offset);
65     }
processBacksolidus_Z()66     Token processBacksolidus_Z() throws ParseException {
67         throw ex("parser.process.1", this.offset);
68     }
processBacksolidus_z()69     Token processBacksolidus_z() throws ParseException {
70         throw ex("parser.process.1", this.offset);
71     }
processBacksolidus_b()72     Token processBacksolidus_b() throws ParseException {
73         throw ex("parser.process.1", this.offset);
74     }
processBacksolidus_B()75     Token processBacksolidus_B() throws ParseException {
76         throw ex("parser.process.1", this.offset);
77     }
processBacksolidus_lt()78     Token processBacksolidus_lt() throws ParseException {
79         throw ex("parser.process.1", this.offset);
80     }
processBacksolidus_gt()81     Token processBacksolidus_gt() throws ParseException {
82         throw ex("parser.process.1", this.offset);
83     }
processStar(Token tok)84     Token processStar(Token tok) throws ParseException {
85         this.next();
86         return Token.createClosure(tok);
87     }
processPlus(Token tok)88     Token processPlus(Token tok) throws ParseException {
89         // X+ -> XX*
90         this.next();
91         return Token.createConcat(tok, Token.createClosure(tok));
92     }
processQuestion(Token tok)93     Token processQuestion(Token tok) throws ParseException {
94         // X? -> X|
95         this.next();
96         Token par = Token.createUnion();
97         par.addChild(tok);
98         par.addChild(Token.createEmpty());
99         return par;
100     }
checkQuestion(int off)101     boolean checkQuestion(int off) {
102         return false;
103     }
processParen()104     Token processParen() throws ParseException {
105         this.next();
106         Token tok = Token.createParen(this.parseRegex(), 0);
107         if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);
108         this.next();                            // Skips ')'
109         return tok;
110     }
processParen2()111     Token processParen2() throws ParseException {
112         throw ex("parser.process.1", this.offset);
113     }
processCondition()114     Token processCondition() throws ParseException {
115         throw ex("parser.process.1", this.offset);
116     }
processModifiers()117     Token processModifiers() throws ParseException {
118         throw ex("parser.process.1", this.offset);
119     }
processIndependent()120     Token processIndependent() throws ParseException {
121         throw ex("parser.process.1", this.offset);
122     }
processBacksolidus_c()123     Token processBacksolidus_c() throws ParseException {
124         this.next();
125         return this.getTokenForShorthand('c');
126     }
processBacksolidus_C()127     Token processBacksolidus_C() throws ParseException {
128         this.next();
129         return this.getTokenForShorthand('C');
130     }
processBacksolidus_i()131     Token processBacksolidus_i() throws ParseException {
132         this.next();
133         return this.getTokenForShorthand('i');
134     }
processBacksolidus_I()135     Token processBacksolidus_I() throws ParseException {
136         this.next();
137         return this.getTokenForShorthand('I');
138     }
processBacksolidus_g()139     Token processBacksolidus_g() throws ParseException {
140         throw this.ex("parser.process.1", this.offset-2);
141     }
processBacksolidus_X()142     Token processBacksolidus_X() throws ParseException {
143         throw ex("parser.process.1", this.offset-2);
144     }
processBackreference()145     Token processBackreference() throws ParseException {
146         throw ex("parser.process.1", this.offset-4);
147     }
148 
processCIinCharacterClass(RangeToken tok, int c)149     int processCIinCharacterClass(RangeToken tok, int c) {
150         tok.mergeRanges(this.getTokenForShorthand(c));
151         return -1;
152     }
153 
154 
155     /**
156      * Parses a character-class-expression, not a character-class-escape.
157      *
158      * c-c-expression   ::= '[' c-group ']'
159      * c-group          ::= positive-c-group | negative-c-group | c-c-subtraction
160      * positive-c-group ::= (c-range | c-c-escape)+
161      * negative-c-group ::= '^' positive-c-group
162      * c-c-subtraction  ::= (positive-c-group | negative-c-group) subtraction
163      * subtraction      ::= '-' c-c-expression
164      * c-range          ::= single-range | from-to-range
165      * single-range     ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
166      * cc-normal-c      ::= <any character except [, ], \>
167      * from-to-range    ::= cc-normal-c '-' cc-normal-c
168      *
169      * @param useNrage Ignored.
170      * @return This returns no NrageToken.
171      */
parseCharacterClass(boolean useNrange)172     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
173         this.setContext(S_INBRACKETS);
174         this.next();                            // '['
175         boolean nrange = false;
176         boolean wasDecoded = false;                     // used to detect if the last - was escaped.
177         RangeToken base = null;
178         RangeToken tok;
179         if (this.read() == T_CHAR && this.chardata == '^') {
180             nrange = true;
181             this.next();                        // '^'
182             base = Token.createRange();
183             base.addRange(0, Token.UTF16_MAX);
184             tok = Token.createRange();
185         } else {
186             tok = Token.createRange();
187         }
188         int type;
189         boolean firstloop = true;
190         while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
191 
192                 wasDecoded = false;
193             // single-range | from-to-range | subtraction
194             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
195                 if (nrange) {
196                     base.subtractRanges(tok);
197                     tok = base;
198                 }
199                 break;
200             }
201             int c = this.chardata;
202             boolean end = false;
203             if (type == T_BACKSOLIDUS) {
204                 switch (c) {
205                   case 'd':  case 'D':
206                   case 'w':  case 'W':
207                   case 's':  case 'S':
208                     tok.mergeRanges(this.getTokenForShorthand(c));
209                     end = true;
210                     break;
211 
212                   case 'i':  case 'I':
213                   case 'c':  case 'C':
214                     c = this.processCIinCharacterClass(tok, c);
215                     if (c < 0)  end = true;
216                     break;
217 
218                   case 'p':
219                   case 'P':
220                     int pstart = this.offset;
221                     RangeToken tok2 = this.processBacksolidus_pP(c);
222                     if (tok2 == null)  throw this.ex("parser.atom.5", pstart);
223                     tok.mergeRanges(tok2);
224                     end = true;
225                     break;
226 
227                  case '-':
228                         c = this.decodeEscaped();
229                         wasDecoded = true;
230                         break;
231 
232                   default:
233                     c = this.decodeEscaped();
234                 } // \ + c
235             } // backsolidus
236             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
237                                                 // Subraction
238                 if (nrange) {
239                     base.subtractRanges(tok);
240                     tok = base;
241                 }
242                 RangeToken range2 = this.parseCharacterClass(false);
243                 tok.subtractRanges(range2);
244                 if (this.read() != T_CHAR || this.chardata != ']')
245                     throw this.ex("parser.cc.5", this.offset);
246                 break;                          // Exit this loop
247             }
248             this.next();
249             if (!end) {                         // if not shorthands...
250                 if (type == T_CHAR) {
251                     if (c == '[')  throw this.ex("parser.cc.6", this.offset-2);
252                     if (c == ']')  throw this.ex("parser.cc.7", this.offset-2);
253                     if (c == '-' && this.chardata != ']' && !firstloop)  throw this.ex("parser.cc.8", this.offset-2);   // if regex = '[-]' then invalid
254                 }
255                 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
256                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
257                         tok.addRange(c, c);
258                     }
259                     else {
260                         addCaseInsensitiveChar(tok, c);
261                     }
262                 } else {                        // Found '-'
263                                                 // Is this '-' is a from-to token??
264                     this.next(); // Skips '-'
265                     if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
266                                                 // c '-' ']' -> '-' is a single-range.
267                     if(type == T_CHAR && this.chardata == ']') {                                // if - is at the last position of the group
268                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
269                             tok.addRange(c, c);
270                         }
271                         else {
272                             addCaseInsensitiveChar(tok, c);
273                         }
274                         tok.addRange('-', '-');
275                     }
276                     else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
277                         throw this.ex("parser.cc.8", this.offset-1);
278                     } else {
279 
280                         int rangeend = this.chardata;
281                         if (type == T_CHAR) {
282                             if (rangeend == '[')  throw this.ex("parser.cc.6", this.offset-1);
283                             if (rangeend == ']')  throw this.ex("parser.cc.7", this.offset-1);
284                             if (rangeend == '-')  throw this.ex("parser.cc.8", this.offset-2);
285                         }
286                         else if (type == T_BACKSOLIDUS)
287                             rangeend = this.decodeEscaped();
288                         this.next();
289 
290                         if (c > rangeend)  throw this.ex("parser.ope.3", this.offset-1);
291                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
292                                 (c > 0xffff && rangeend > 0xffff)) {
293                             tok.addRange(c, rangeend);
294                         }
295                         else {
296                             addCaseInsensitiveCharRange(tok, c, rangeend);
297                         }
298                     }
299                 }
300             }
301             firstloop = false;
302         }
303         if (this.read() == T_EOF)
304             throw this.ex("parser.cc.2", this.offset);
305         tok.sortRanges();
306         tok.compactRanges();
307         //tok.dumpRanges();
308         this.setContext(S_NORMAL);
309         this.next();                    // Skips ']'
310 
311         return tok;
312     }
313 
parseSetOperations()314     protected RangeToken parseSetOperations() throws ParseException {
315         throw this.ex("parser.process.1", this.offset);
316     }
317 
getTokenForShorthand(int ch)318     Token getTokenForShorthand(int ch) {
319         switch (ch) {
320           case 'd':
321             return ParserForXMLSchema.getRange("xml:isDigit", true);
322           case 'D':
323             return ParserForXMLSchema.getRange("xml:isDigit", false);
324           case 'w':
325             return ParserForXMLSchema.getRange("xml:isWord", true);
326           case 'W':
327             return ParserForXMLSchema.getRange("xml:isWord", false);
328           case 's':
329             return ParserForXMLSchema.getRange("xml:isSpace", true);
330           case 'S':
331             return ParserForXMLSchema.getRange("xml:isSpace", false);
332           case 'c':
333             return ParserForXMLSchema.getRange("xml:isNameChar", true);
334           case 'C':
335             return ParserForXMLSchema.getRange("xml:isNameChar", false);
336           case 'i':
337             return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
338           case 'I':
339             return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
340           default:
341             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
342         }
343     }
decodeEscaped()344     int decodeEscaped() throws ParseException {
345         if (this.read() != T_BACKSOLIDUS)  throw ex("parser.next.1", this.offset-1);
346         int c = this.chardata;
347         switch (c) {
348           case 'n':  c = '\n';  break; // LINE FEED U+000A
349           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
350           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
351           case '\\':
352           case '|':
353           case '.':
354           case '^':
355           case '-':
356           case '?':
357           case '*':
358           case '+':
359           case '{':
360           case '}':
361           case '(':
362           case ')':
363           case '[':
364           case ']':
365             break; // return actucal char
366           default:
367             throw ex("parser.process.1", this.offset-2);
368         }
369         return c;
370     }
371 
372     static private Map<String, Token> ranges = null;
373     static private Map<String, Token> ranges2 = null;
getRange(String name, boolean positive)374     static synchronized protected RangeToken getRange(String name, boolean positive) {
375         if (ranges == null) {
376             ranges = new HashMap<>();
377             ranges2 = new HashMap<>();
378 
379             Token tok = Token.createRange();
380             setupRange(tok, SPACES);
381             ranges.put("xml:isSpace", tok);
382             ranges2.put("xml:isSpace", Token.complementRanges(tok));
383 
384             tok = Token.createRange();
385             setupRange(tok, DIGITS_INTS);
386             ranges.put("xml:isDigit", tok);
387             ranges2.put("xml:isDigit", Token.complementRanges(tok));
388 
389             /*
390              * \w is defined by the XML Schema specification to be:
391              * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
392              */
393             tok = Token.createRange();
394             tok.mergeRanges(Token.getRange("P", true));
395             tok.mergeRanges(Token.getRange("Z", true));
396             tok.mergeRanges(Token.getRange("C", true));
397             ranges2.put("xml:isWord", tok);
398             ranges.put("xml:isWord", Token.complementRanges(tok));
399 
400             tok = Token.createRange();
401             setupRange(tok, NAMECHARS);
402             ranges.put("xml:isNameChar", tok);
403             ranges2.put("xml:isNameChar", Token.complementRanges(tok));
404 
405             tok = Token.createRange();
406             setupRange(tok, LETTERS);
407             setupRange(tok, LETTERS_INT);
408             tok.addRange('_', '_');
409             tok.addRange(':', ':');
410             ranges.put("xml:isInitialNameChar", tok);
411             ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
412         }
413         RangeToken tok = positive ? (RangeToken)ranges.get(name)
414             : (RangeToken)ranges2.get(name);
415         return tok;
416     }
417 
setupRange(Token range, String src)418     static void setupRange(Token range, String src) {
419         int len = src.length();
420         for (int i = 0;  i < len;  i += 2)
421             range.addRange(src.charAt(i), src.charAt(i+1));
422     }
423 
setupRange(Token range, int[] src)424     static void setupRange(Token range, int[] src) {
425         int len = src.length;
426         for (int i = 0;  i < len;  i += 2)
427             range.addRange(src[i], src[i+1]);
428     }
429 
430     private static final String SPACES = "\t\n\r\r  ";
431     private static final String NAMECHARS =
432         "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
433         +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
434         +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
435         +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
436         +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
437         +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
438         +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
439         +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
440         +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
441         +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
442         +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
443         +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
444         +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
445         +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
446         +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
447         +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
448         +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
449         +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
450         +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
451         +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
452         +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
453         +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
454         +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
455         +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
456         +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
457         +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
458         +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
459         +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
460         +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
461         +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
462         +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
463         +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
464         +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
465         +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
466         +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
467         +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
468         +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
469         +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
470         +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
471         +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
472         +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
473         +"";
474     private static final String LETTERS =
475         "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
476         +"\u014a\u017e\u0180\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
477         +"\u02b0\u02d1"
478         +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
479         +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
480         +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
481         +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
482         +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
483         +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
484         +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
485         +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
486         +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
487         +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
488         +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
489         +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
490         +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
491         +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
492         +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
493         +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
494         +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
495         +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
496         +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
497         +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
498         +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
499         +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
500         +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
501         +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
502         +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
503         +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
504         +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
505         +"\uac00\ud7a3\uff66\uff9f";
506 
507     private static final int[] LETTERS_INT = {0x1d790, 0x1d7a8, 0x1d7aa, 0x1d7c9, 0x2fa1b, 0x2fa1d};
508 
509     private static final int[] DIGITS_INTS = {
510         0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
511         0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
512         0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
513         0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
514         0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
515         0x1D7CE, 0x1D7FF
516     };
517 }
518