1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xpath.regex; 22 23 import java.util.HashMap; 24 import java.util.Locale; 25 import java.util.Map; 26 27 /** 28 * A regular expression parser for the XML Schema. 29 * 30 * @xerces.internal 31 * 32 * @author TAMURA Kent <kent@trl.ibm.co.jp> 33 */ 34 class ParserForXMLSchema extends RegexParser { 35 ParserForXMLSchema()36 public ParserForXMLSchema() { 37 //this.setLocale(Locale.getDefault()); 38 } ParserForXMLSchema(Locale locale)39 public ParserForXMLSchema(Locale locale) { 40 super(locale); 41 } 42 processCaret()43 Token processCaret() throws ParseException { 44 this.next(); 45 return Token.createChar('^'); 46 } processDollar()47 Token processDollar() throws ParseException { 48 this.next(); 49 return Token.createChar('$'); 50 } processLookahead()51 Token processLookahead() throws ParseException { 52 throw ex("parser.process.1", this.offset); 53 } processNegativelookahead()54 Token processNegativelookahead() throws ParseException { 55 throw ex("parser.process.1", this.offset); 56 } processLookbehind()57 Token processLookbehind() throws ParseException { 58 throw ex("parser.process.1", this.offset); 59 } processNegativelookbehind()60 Token processNegativelookbehind() throws ParseException { 61 throw ex("parser.process.1", this.offset); 62 } processBacksolidus_A()63 Token processBacksolidus_A() throws ParseException { 64 throw ex("parser.process.1", this.offset); 65 } processBacksolidus_Z()66 Token processBacksolidus_Z() throws ParseException { 67 throw ex("parser.process.1", this.offset); 68 } processBacksolidus_z()69 Token processBacksolidus_z() throws ParseException { 70 throw ex("parser.process.1", this.offset); 71 } processBacksolidus_b()72 Token processBacksolidus_b() throws ParseException { 73 throw ex("parser.process.1", this.offset); 74 } processBacksolidus_B()75 Token processBacksolidus_B() throws ParseException { 76 throw ex("parser.process.1", this.offset); 77 } processBacksolidus_lt()78 Token processBacksolidus_lt() throws ParseException { 79 throw ex("parser.process.1", this.offset); 80 } processBacksolidus_gt()81 Token processBacksolidus_gt() throws ParseException { 82 throw ex("parser.process.1", this.offset); 83 } processStar(Token tok)84 Token processStar(Token tok) throws ParseException { 85 this.next(); 86 return Token.createClosure(tok); 87 } processPlus(Token tok)88 Token processPlus(Token tok) throws ParseException { 89 // X+ -> XX* 90 this.next(); 91 return Token.createConcat(tok, Token.createClosure(tok)); 92 } processQuestion(Token tok)93 Token processQuestion(Token tok) throws ParseException { 94 // X? -> X| 95 this.next(); 96 Token par = Token.createUnion(); 97 par.addChild(tok); 98 par.addChild(Token.createEmpty()); 99 return par; 100 } checkQuestion(int off)101 boolean checkQuestion(int off) { 102 return false; 103 } processParen()104 Token processParen() throws ParseException { 105 this.next(); 106 Token tok = Token.createParen(this.parseRegex(), 0); 107 if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); 108 this.next(); // Skips ')' 109 return tok; 110 } processParen2()111 Token processParen2() throws ParseException { 112 throw ex("parser.process.1", this.offset); 113 } processCondition()114 Token processCondition() throws ParseException { 115 throw ex("parser.process.1", this.offset); 116 } processModifiers()117 Token processModifiers() throws ParseException { 118 throw ex("parser.process.1", this.offset); 119 } processIndependent()120 Token processIndependent() throws ParseException { 121 throw ex("parser.process.1", this.offset); 122 } processBacksolidus_c()123 Token processBacksolidus_c() throws ParseException { 124 this.next(); 125 return this.getTokenForShorthand('c'); 126 } processBacksolidus_C()127 Token processBacksolidus_C() throws ParseException { 128 this.next(); 129 return this.getTokenForShorthand('C'); 130 } processBacksolidus_i()131 Token processBacksolidus_i() throws ParseException { 132 this.next(); 133 return this.getTokenForShorthand('i'); 134 } processBacksolidus_I()135 Token processBacksolidus_I() throws ParseException { 136 this.next(); 137 return this.getTokenForShorthand('I'); 138 } processBacksolidus_g()139 Token processBacksolidus_g() throws ParseException { 140 throw this.ex("parser.process.1", this.offset-2); 141 } processBacksolidus_X()142 Token processBacksolidus_X() throws ParseException { 143 throw ex("parser.process.1", this.offset-2); 144 } processBackreference()145 Token processBackreference() throws ParseException { 146 throw ex("parser.process.1", this.offset-4); 147 } 148 processCIinCharacterClass(RangeToken tok, int c)149 int processCIinCharacterClass(RangeToken tok, int c) { 150 tok.mergeRanges(this.getTokenForShorthand(c)); 151 return -1; 152 } 153 154 155 /** 156 * Parses a character-class-expression, not a character-class-escape. 157 * 158 * c-c-expression ::= '[' c-group ']' 159 * c-group ::= positive-c-group | negative-c-group | c-c-subtraction 160 * positive-c-group ::= (c-range | c-c-escape)+ 161 * negative-c-group ::= '^' positive-c-group 162 * c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction 163 * subtraction ::= '-' c-c-expression 164 * c-range ::= single-range | from-to-range 165 * single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char> 166 * cc-normal-c ::= <any character except [, ], \> 167 * from-to-range ::= cc-normal-c '-' cc-normal-c 168 * 169 * @param useNrage Ignored. 170 * @return This returns no NrageToken. 171 */ parseCharacterClass(boolean useNrange)172 protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException { 173 this.setContext(S_INBRACKETS); 174 this.next(); // '[' 175 boolean nrange = false; 176 boolean wasDecoded = false; // used to detect if the last - was escaped. 177 RangeToken base = null; 178 RangeToken tok; 179 if (this.read() == T_CHAR && this.chardata == '^') { 180 nrange = true; 181 this.next(); // '^' 182 base = Token.createRange(); 183 base.addRange(0, Token.UTF16_MAX); 184 tok = Token.createRange(); 185 } else { 186 tok = Token.createRange(); 187 } 188 int type; 189 boolean firstloop = true; 190 while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop. 191 192 wasDecoded = false; 193 // single-range | from-to-range | subtraction 194 if (type == T_CHAR && this.chardata == ']' && !firstloop) { 195 if (nrange) { 196 base.subtractRanges(tok); 197 tok = base; 198 } 199 break; 200 } 201 int c = this.chardata; 202 boolean end = false; 203 if (type == T_BACKSOLIDUS) { 204 switch (c) { 205 case 'd': case 'D': 206 case 'w': case 'W': 207 case 's': case 'S': 208 tok.mergeRanges(this.getTokenForShorthand(c)); 209 end = true; 210 break; 211 212 case 'i': case 'I': 213 case 'c': case 'C': 214 c = this.processCIinCharacterClass(tok, c); 215 if (c < 0) end = true; 216 break; 217 218 case 'p': 219 case 'P': 220 int pstart = this.offset; 221 RangeToken tok2 = this.processBacksolidus_pP(c); 222 if (tok2 == null) throw this.ex("parser.atom.5", pstart); 223 tok.mergeRanges(tok2); 224 end = true; 225 break; 226 227 case '-': 228 c = this.decodeEscaped(); 229 wasDecoded = true; 230 break; 231 232 default: 233 c = this.decodeEscaped(); 234 } // \ + c 235 } // backsolidus 236 else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) { 237 // Subraction 238 if (nrange) { 239 base.subtractRanges(tok); 240 tok = base; 241 } 242 RangeToken range2 = this.parseCharacterClass(false); 243 tok.subtractRanges(range2); 244 if (this.read() != T_CHAR || this.chardata != ']') 245 throw this.ex("parser.cc.5", this.offset); 246 break; // Exit this loop 247 } 248 this.next(); 249 if (!end) { // if not shorthands... 250 if (type == T_CHAR) { 251 if (c == '[') throw this.ex("parser.cc.6", this.offset-2); 252 if (c == ']') throw this.ex("parser.cc.7", this.offset-2); 253 if (c == '-' && this.chardata != ']' && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid 254 } 255 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'. 256 if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) { 257 tok.addRange(c, c); 258 } 259 else { 260 addCaseInsensitiveChar(tok, c); 261 } 262 } else { // Found '-' 263 // Is this '-' is a from-to token?? 264 this.next(); // Skips '-' 265 if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset); 266 // c '-' ']' -> '-' is a single-range. 267 if(type == T_CHAR && this.chardata == ']') { // if - is at the last position of the group 268 if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) { 269 tok.addRange(c, c); 270 } 271 else { 272 addCaseInsensitiveChar(tok, c); 273 } 274 tok.addRange('-', '-'); 275 } 276 else if (type == T_XMLSCHEMA_CC_SUBTRACTION) { 277 throw this.ex("parser.cc.8", this.offset-1); 278 } else { 279 280 int rangeend = this.chardata; 281 if (type == T_CHAR) { 282 if (rangeend == '[') throw this.ex("parser.cc.6", this.offset-1); 283 if (rangeend == ']') throw this.ex("parser.cc.7", this.offset-1); 284 if (rangeend == '-') throw this.ex("parser.cc.8", this.offset-2); 285 } 286 else if (type == T_BACKSOLIDUS) 287 rangeend = this.decodeEscaped(); 288 this.next(); 289 290 if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1); 291 if (!this.isSet(RegularExpression.IGNORE_CASE) || 292 (c > 0xffff && rangeend > 0xffff)) { 293 tok.addRange(c, rangeend); 294 } 295 else { 296 addCaseInsensitiveCharRange(tok, c, rangeend); 297 } 298 } 299 } 300 } 301 firstloop = false; 302 } 303 if (this.read() == T_EOF) 304 throw this.ex("parser.cc.2", this.offset); 305 tok.sortRanges(); 306 tok.compactRanges(); 307 //tok.dumpRanges(); 308 this.setContext(S_NORMAL); 309 this.next(); // Skips ']' 310 311 return tok; 312 } 313 parseSetOperations()314 protected RangeToken parseSetOperations() throws ParseException { 315 throw this.ex("parser.process.1", this.offset); 316 } 317 getTokenForShorthand(int ch)318 Token getTokenForShorthand(int ch) { 319 switch (ch) { 320 case 'd': 321 return ParserForXMLSchema.getRange("xml:isDigit", true); 322 case 'D': 323 return ParserForXMLSchema.getRange("xml:isDigit", false); 324 case 'w': 325 return ParserForXMLSchema.getRange("xml:isWord", true); 326 case 'W': 327 return ParserForXMLSchema.getRange("xml:isWord", false); 328 case 's': 329 return ParserForXMLSchema.getRange("xml:isSpace", true); 330 case 'S': 331 return ParserForXMLSchema.getRange("xml:isSpace", false); 332 case 'c': 333 return ParserForXMLSchema.getRange("xml:isNameChar", true); 334 case 'C': 335 return ParserForXMLSchema.getRange("xml:isNameChar", false); 336 case 'i': 337 return ParserForXMLSchema.getRange("xml:isInitialNameChar", true); 338 case 'I': 339 return ParserForXMLSchema.getRange("xml:isInitialNameChar", false); 340 default: 341 throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16)); 342 } 343 } decodeEscaped()344 int decodeEscaped() throws ParseException { 345 if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1); 346 int c = this.chardata; 347 switch (c) { 348 case 'n': c = '\n'; break; // LINE FEED U+000A 349 case 'r': c = '\r'; break; // CRRIAGE RETURN U+000D 350 case 't': c = '\t'; break; // HORIZONTAL TABULATION U+0009 351 case '\\': 352 case '|': 353 case '.': 354 case '^': 355 case '-': 356 case '?': 357 case '*': 358 case '+': 359 case '{': 360 case '}': 361 case '(': 362 case ')': 363 case '[': 364 case ']': 365 break; // return actucal char 366 default: 367 throw ex("parser.process.1", this.offset-2); 368 } 369 return c; 370 } 371 372 static private Map<String, Token> ranges = null; 373 static private Map<String, Token> ranges2 = null; getRange(String name, boolean positive)374 static synchronized protected RangeToken getRange(String name, boolean positive) { 375 if (ranges == null) { 376 ranges = new HashMap<>(); 377 ranges2 = new HashMap<>(); 378 379 Token tok = Token.createRange(); 380 setupRange(tok, SPACES); 381 ranges.put("xml:isSpace", tok); 382 ranges2.put("xml:isSpace", Token.complementRanges(tok)); 383 384 tok = Token.createRange(); 385 setupRange(tok, DIGITS_INTS); 386 ranges.put("xml:isDigit", tok); 387 ranges2.put("xml:isDigit", Token.complementRanges(tok)); 388 389 /* 390 * \w is defined by the XML Schema specification to be: 391 * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 392 */ 393 tok = Token.createRange(); 394 tok.mergeRanges(Token.getRange("P", true)); 395 tok.mergeRanges(Token.getRange("Z", true)); 396 tok.mergeRanges(Token.getRange("C", true)); 397 ranges2.put("xml:isWord", tok); 398 ranges.put("xml:isWord", Token.complementRanges(tok)); 399 400 tok = Token.createRange(); 401 setupRange(tok, NAMECHARS); 402 ranges.put("xml:isNameChar", tok); 403 ranges2.put("xml:isNameChar", Token.complementRanges(tok)); 404 405 tok = Token.createRange(); 406 setupRange(tok, LETTERS); 407 setupRange(tok, LETTERS_INT); 408 tok.addRange('_', '_'); 409 tok.addRange(':', ':'); 410 ranges.put("xml:isInitialNameChar", tok); 411 ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok)); 412 } 413 RangeToken tok = positive ? (RangeToken)ranges.get(name) 414 : (RangeToken)ranges2.get(name); 415 return tok; 416 } 417 setupRange(Token range, String src)418 static void setupRange(Token range, String src) { 419 int len = src.length(); 420 for (int i = 0; i < len; i += 2) 421 range.addRange(src.charAt(i), src.charAt(i+1)); 422 } 423 setupRange(Token range, int[] src)424 static void setupRange(Token range, int[] src) { 425 int len = src.length; 426 for (int i = 0; i < len; i += 2) 427 range.addRange(src[i], src[i+1]); 428 } 429 430 private static final String SPACES = "\t\n\r\r "; 431 private static final String NAMECHARS = 432 "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6" 433 +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0" 434 +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361" 435 +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc" 436 +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481" 437 +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9" 438 +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf" 439 +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669" 440 +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9" 441 +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983" 442 +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc" 443 +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1" 444 +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36" 445 +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e" 446 +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0" 447 +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef" 448 +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39" 449 +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f" 450 +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f" 451 +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd" 452 +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33" 453 +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f" 454 +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4" 455 +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03" 456 +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d" 457 +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59" 458 +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f" 459 +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd" 460 +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35" 461 +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95" 462 +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100" 463 +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e" 464 +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161" 465 +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175" 466 +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2" 467 +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d" 468 +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d" 469 +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb" 470 +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b" 471 +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094" 472 +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3" 473 +""; 474 private static final String LETTERS = 475 "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148" 476 +"\u014a\u017e\u0180\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1" 477 +"\u02b0\u02d1" 478 +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da" 479 +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c" 480 +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9" 481 +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a" 482 +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939" 483 +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2" 484 +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28" 485 +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74" 486 +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9" 487 +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33" 488 +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95" 489 +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9" 490 +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c" 491 +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c" 492 +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33" 493 +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97" 494 +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0" 495 +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6" 496 +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c" 497 +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159" 498 +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173" 499 +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba" 500 +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15" 501 +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d" 502 +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3" 503 +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e" 504 +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5" 505 +"\uac00\ud7a3\uff66\uff9f"; 506 507 private static final int[] LETTERS_INT = {0x1d790, 0x1d7a8, 0x1d7aa, 0x1d7c9, 0x2fa1b, 0x2fa1d}; 508 509 private static final int[] DIGITS_INTS = { 510 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 511 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 512 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 513 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049, 514 0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19, 515 0x1D7CE, 0x1D7FF 516 }; 517 } 518