1 /* 2 * $Id: XMLChecker.java,v 1.11 2005/09/12 08:40:02 znerd Exp $ 3 */ 4 package org.znerd.xmlenc; 5 6 /** 7 * Utility class that provides XML checking functionality. 8 * 9 * @version $Revision: 1.11 $ $Date: 2005/09/12 08:40:02 $ 10 * @author Ernst de Haan (<a href="mailto:wfe.dehaan@gmail.com">wfe.dehaan@gmail.com</a>) 11 * 12 * @since xmlenc 0.41 13 */ 14 public final class XMLChecker extends Object { 15 16 //------------------------------------------------------------------------- 17 // Class functions 18 //------------------------------------------------------------------------- 19 20 /** 21 * Checks if the specified string matches the <em>S</em> (white space) 22 * production. 23 * 24 * <p>See: 25 * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>. 26 * 27 * @param s 28 * the character string to check, cannot be <code>null</code>. 29 * 30 * @throws NullPointerException 31 * if <code>s == null</code>. 32 * 33 * @throws InvalidXMLException 34 * if the specified character string does not match the <em>S</em> 35 * production. 36 */ checkS(String s)37 public static final void checkS(String s) 38 throws NullPointerException { 39 checkS(s.toCharArray(), 0, s.length()); 40 } 41 42 /** 43 * Checks if the specified part of a character array matches the <em>S</em> 44 * (white space) production. 45 * 46 * <p>See: 47 * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>. 48 * 49 * @param ch 50 * the character array that contains the characters to be checked, 51 * cannot be <code>null</code>. 52 * 53 * @param start 54 * the start index into <code>ch</code>, must be >= 0. 55 * 56 * @param length 57 * the number of characters to take from <code>ch</code>, starting at 58 * the <code>start</code> index. 59 * 60 * @throws NullPointerException 61 * if <code>ch == null</code>. 62 * 63 * @throws IndexOutOfBoundsException 64 * if <code>start < 0 65 * || start + length > ch.length</code>. 66 * 67 * @throws InvalidXMLException 68 * if the specified character string does not match the <em>S</em> 69 * production. 70 */ checkS(char[] ch, int start, int length)71 public static final void checkS(char[] ch, int start, int length) 72 throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException { 73 74 // Loop through the array and check each character 75 for (int i = start; i < length; i++) { 76 int c = (int) ch[i]; 77 78 if (c != 0x20 && c != 0x9 && c != 0xD && c != 0xA) { 79 throw new InvalidXMLException("The character 0x" + Integer.toHexString(c) + " is not valid for the 'S' production (white space)."); 80 } 81 } 82 } 83 84 /** 85 * Determines if the specified string matches the <em>Name</em> production. 86 * 87 * <p>See: 88 * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>. 89 * 90 * @param s 91 * the character string to check, cannot be <code>null</code>. 92 * 93 * @throws NullPointerException 94 * if <code>s == null</code>. 95 * 96 * @return 97 * <code>true</code> if the {@link String} matches the production, or 98 * <code>false</code> otherwise. 99 */ isName(String s)100 public static final boolean isName(String s) 101 throws NullPointerException { 102 try { 103 checkName(s); 104 return true; 105 } catch (InvalidXMLException exception) { 106 return false; 107 } 108 } 109 110 /** 111 * Checks if the specified string matches the <em>Name</em> production. 112 * 113 * <p>See: 114 * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>. 115 * 116 * @param s 117 * the character string to check, cannot be <code>null</code>. 118 * 119 * @throws NullPointerException 120 * if <code>s == null</code>. 121 * 122 * @throws InvalidXMLException 123 * if the specified character string does not match the <em>Name</em> 124 * production. 125 */ checkName(String s)126 public static final void checkName(String s) 127 throws NullPointerException, InvalidXMLException { 128 checkName(s.toCharArray(), 0, s.length()); 129 } 130 131 /** 132 * Checks if the specified part of a character array matches the 133 * <em>Name</em> production. 134 * 135 * <p>See: 136 * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>. 137 * 138 * @param ch 139 * the character array that contains the characters to be checked, 140 * cannot be <code>null</code>. 141 * 142 * @param start 143 * the start index into <code>ch</code>, must be >= 0. 144 * 145 * @param length 146 * the number of characters to take from <code>ch</code>, starting at 147 * the <code>start</code> index. 148 * 149 * @throws NullPointerException 150 * if <code>ch == null</code>. 151 * 152 * @throws IndexOutOfBoundsException 153 * if <code>start < 0 154 * || start + length > ch.length</code>. 155 * 156 * @throws InvalidXMLException 157 * if the specified character string does not match the <em>Name</em> 158 * production. 159 */ checkName(char[] ch, int start, int length)160 public static final void checkName(char[] ch, int start, int length) 161 throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException { 162 163 // Minimum length is 1 164 if (length < 1) { 165 throw new InvalidXMLException("An empty string does not match the 'Name' production."); 166 } 167 168 // First char must match: (Letter | '_' | ':') 169 int i = start; 170 char c = ch[i]; 171 if (c != '_' && c != ':' && !isLetter(c)) { 172 throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is invalid as a starting character in the 'Name' production."); 173 } 174 175 // Loop through the array and check each character 176 for (i++; i < length; i++) { 177 c = ch[i]; 178 179 if (!isNameChar(c)) { 180 throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid for the 'Name' production."); 181 } 182 } 183 } 184 185 /** 186 * Determines if the specified string matches the <em>SystemLiteral</em> 187 * production. 188 * 189 * <p>See: 190 * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>. 191 * 192 * @param s 193 * the character string to check, cannot be <code>null</code>. 194 * 195 * @throws NullPointerException 196 * if <code>s == null</code>. 197 * 198 * @return 199 * <code>true</code> if the {@link String} matches the production, or 200 * <code>false</code> otherwise. 201 */ isSystemLiteral(String s)202 public static final boolean isSystemLiteral(String s) 203 throws NullPointerException { 204 try { 205 checkSystemLiteral(s); 206 return true; 207 } catch (InvalidXMLException exception) { 208 return false; 209 } 210 } 211 212 /** 213 * Checks if the specified string matches the <em>SystemLiteral</em> 214 * production. 215 * 216 * <p>See: 217 * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>. 218 * 219 * @param s 220 * the character string to check, cannot be <code>null</code>. 221 * 222 * @throws NullPointerException 223 * if <code>s == null</code>. 224 * 225 * @throws InvalidXMLException 226 * if the specified character string does not match the 227 * <em>PubidLiteral</em> production. 228 */ checkSystemLiteral(String s)229 public static final void checkSystemLiteral(String s) 230 throws NullPointerException, InvalidXMLException { 231 checkSystemLiteral(s.toCharArray(), 0, s.length()); 232 } 233 234 /** 235 * Checks if the specified part of a character array matches the 236 * <em>SystemLiteral</em> production. 237 * 238 * <p>See: 239 * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>. 240 * 241 * @param ch 242 * the character array that contains the characters to be checked, 243 * cannot be <code>null</code>. 244 * 245 * @param start 246 * the start index into <code>ch</code>, must be >= 0. 247 * 248 * @param length 249 * the number of characters to take from <code>ch</code>, starting at 250 * the <code>start</code> index. 251 * 252 * @throws NullPointerException 253 * if <code>ch == null</code>. 254 * 255 * @throws IndexOutOfBoundsException 256 * if <code>start < 0 257 * || start + length > ch.length</code>. 258 * 259 * @throws InvalidXMLException 260 * if the specified character string does not match the 261 * <em>SystemLiteral</em> production. 262 */ checkSystemLiteral(char[] ch, int start, int length)263 public static final void checkSystemLiteral(char[] ch, 264 int start, 265 int length) 266 throws NullPointerException, 267 IndexOutOfBoundsException, 268 InvalidXMLException { 269 270 // Minimum length is 3 271 if (length < 3) { 272 throw new InvalidXMLException("Minimum length for the 'SystemLiteral' production is 3 characters."); 273 } 274 275 int lastIndex = start + length - 1; 276 char firstChar = ch[0]; 277 char lastChar = ch[lastIndex]; 278 279 // First and last char: single qoute (apostrophe) 280 String otherAllowedChars; 281 if (firstChar == '\'') { 282 if (lastChar != '\'') { 283 throw new InvalidXMLException("First character is '\\'', but the " 284 + "last character is 0x" 285 + Integer.toHexString((int) lastChar) 286 + '.'); 287 } 288 otherAllowedChars = "-()+,./:=?;!*#@$_%"; 289 290 // First and last char: double qoute character 291 } else if (firstChar == '"') { 292 if (lastChar != '"') { 293 throw new InvalidXMLException("First character is '\"', but the " 294 + "last character is 0x" 295 + Integer.toHexString((int) lastChar) 296 + '.'); 297 } 298 otherAllowedChars = "-'()+,./:=?;!*#@$_%"; 299 300 // First character is invalid 301 } else { 302 throw new InvalidXMLException("First char must either be '\\'' or " 303 + "'\"' instead of 0x" 304 + Integer.toHexString((int) firstChar) 305 + '.'); 306 } 307 308 309 // Check each character 310 for (int i = 1; i < (length - 1); i++) { 311 char c = ch[i]; 312 313 if (c == firstChar) { 314 if (firstChar == '\'') { 315 throw new InvalidXMLException("Found '\\'' at position " + i + '.'); 316 } else { 317 throw new InvalidXMLException("Found '\"' at position " + i + '.'); 318 } 319 } 320 } 321 } 322 323 /** 324 * Determines if the specified string matches the <em>PubidLiteral</em> 325 * production. 326 * 327 * <p>See: 328 * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>. 329 * 330 * @param s 331 * the character string to check, cannot be <code>null</code>. 332 * 333 * @throws NullPointerException 334 * if <code>s == null</code>. 335 * 336 * @return 337 * <code>true</code> if the {@link String} matches the production, or 338 * <code>false</code> otherwise. 339 */ isPubidLiteral(String s)340 public static final boolean isPubidLiteral(String s) 341 throws NullPointerException { 342 try { 343 checkPubidLiteral(s); 344 return true; 345 } catch (InvalidXMLException exception) { 346 return false; 347 } 348 } 349 350 /** 351 * Checks if the specified string matches the <em>PubidLiteral</em> 352 * production. 353 * 354 * <p>See: 355 * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>. 356 * 357 * @param s 358 * the character string to check, cannot be <code>null</code>. 359 * 360 * @throws NullPointerException 361 * if <code>s == null</code>. 362 * 363 * @throws InvalidXMLException 364 * if the specified character string does not match the 365 * <em>PubidLiteral</em> production. 366 */ checkPubidLiteral(String s)367 public static final void checkPubidLiteral(String s) 368 throws NullPointerException, InvalidXMLException { 369 checkPubidLiteral(s.toCharArray(), 0, s.length()); 370 } 371 372 /** 373 * Checks if the specified part of a character array matches the 374 * <em>PubidLiteral</em> production. 375 * 376 * <p>See: 377 * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>. 378 * 379 * @param ch 380 * the character array that contains the characters to be checked, 381 * cannot be <code>null</code>. 382 * 383 * @param start 384 * the start index into <code>ch</code>, must be >= 0. 385 * 386 * @param length 387 * the number of characters to take from <code>ch</code>, starting at 388 * the <code>start</code> index. 389 * 390 * @throws NullPointerException 391 * if <code>ch == null</code>. 392 * 393 * @throws IndexOutOfBoundsException 394 * if <code>start < 0 395 * || start + length > ch.length</code>. 396 * 397 * @throws InvalidXMLException 398 * if the specified character string does not match the 399 * <em>PubidLiteral</em> production. 400 */ checkPubidLiteral(char[] ch, int start, int length)401 public static final void checkPubidLiteral(char[] ch, 402 int start, 403 int length) 404 throws NullPointerException, 405 IndexOutOfBoundsException, 406 InvalidXMLException { 407 408 // Minimum length is 3 409 if (length < 3) { 410 throw new InvalidXMLException("Minimum length for the 'PubidLiteral' production is 3 characters."); 411 } 412 413 int lastIndex = start + length - 1; 414 char firstChar = ch[0]; 415 char lastChar = ch[lastIndex]; 416 417 // First and last char: single qoute (apostrophe) 418 String otherAllowedChars; 419 if (firstChar == '\'') { 420 if (lastChar != '\'') { 421 throw new InvalidXMLException("First character is '\\'', but the " 422 + "last character is 0x" 423 + Integer.toHexString((int) lastChar) 424 + '.'); 425 } 426 otherAllowedChars = "-()+,./:=?;!*#@$_%"; 427 428 // First and last char: double qoute character 429 } else if (firstChar == '"') { 430 if (lastChar != '"') { 431 throw new InvalidXMLException("First character is '\"', but the " 432 + "last character is 0x" 433 + Integer.toHexString((int) lastChar) 434 + '.'); 435 } 436 otherAllowedChars = "-'()+,./:=?;!*#@$_%"; 437 438 // First character is invalid 439 } else { 440 throw new InvalidXMLException("First char must either be '\\'' or " 441 + "'\"' instead of 0x" 442 + Integer.toHexString((int) firstChar) 443 + '.'); 444 } 445 446 447 // Check each character 448 for (int i = 1; i < (length - 1); i++) { 449 char c = ch[i]; 450 451 if (c != 0x20 && c != 0x0D && c != 0x0A && !isLetter(c) && !isDigit(c) 452 && otherAllowedChars.indexOf(c) < 0) { 453 // TODO: Quote character properly, even if it is an apostrophe 454 throw new InvalidXMLException("The character '" 455 + c 456 + "' (0x" 457 + Integer.toHexString((int) c) 458 + ") is not valid for the " 459 + "'PubidLiteral' production."); 460 } 461 } 462 } 463 464 /** 465 * Determines if the specified character matches the <em>NameChar</em> 466 * production. 467 * 468 * <p>See: 469 * <a href="http://www.w3.org/TR/REC-xml#NT-NameChar">Definition of NameChar</a>. 470 * 471 * @param c 472 * the character to check. 473 * 474 * @return 475 * <code>true</code> if the character matches the production, or 476 * <code>false</code> if it does not. 477 */ isNameChar(char c)478 private static final boolean isNameChar(char c) { 479 return c == '.' 480 || c == '-' 481 || c == '_' 482 || c == ':' 483 || isDigit(c) 484 || isLetter(c) 485 || isCombiningChar(c) 486 || isExtender(c); 487 } 488 489 /** 490 * Determines if the specified character matches the <em>Letter</em> 491 * production. 492 * 493 * <p>See: 494 * <a href="http://www.w3.org/TR/REC-xml#NT-Letter">Definition of Letter</a>. 495 * 496 * @param c 497 * the character to check. 498 * 499 * @return 500 * <code>true</code> if the character matches the production, or 501 * <code>false</code> if it does not. 502 */ isLetter(char c)503 private static final boolean isLetter(char c) { 504 return isBaseChar(c) || isIdeographic(c); 505 } 506 507 /** 508 * Determines if the specified character matches the <em>BaseChar</em> 509 * production. 510 * 511 * <p>See: 512 * <a href="http://www.w3.org/TR/REC-xml#NT-BaseChar">Definition of BaseChar</a>. 513 * 514 * @param c 515 * the character to check. 516 * 517 * @return 518 * <code>true</code> if the character matches the production, or 519 * <code>false</code> if it does not. 520 */ isBaseChar(char c)521 private static final boolean isBaseChar(char c) { 522 int n = (int) c; 523 return (n >= 0x0041 && n <= 0x005A) 524 || (n >= 0x0061 && n <= 0x007A) 525 || (n >= 0x00C0 && n <= 0x00D6) 526 || (n >= 0x00D8 && n <= 0x00F6) 527 || (n >= 0x00F8 && n <= 0x00FF) 528 || (n >= 0x0100 && n <= 0x0131) 529 || (n >= 0x0134 && n <= 0x013E) 530 || (n >= 0x0141 && n <= 0x0148) 531 || (n >= 0x014A && n <= 0x017E) 532 || (n >= 0x0180 && n <= 0x01C3) 533 || (n >= 0x01CD && n <= 0x01F0) 534 || (n >= 0x01F4 && n <= 0x01F5) 535 || (n >= 0x01FA && n <= 0x0217) 536 || (n >= 0x0250 && n <= 0x02A8) 537 || (n >= 0x02BB && n <= 0x02C1) 538 || (n == 0x0386) 539 || (n >= 0x0388 && n <= 0x038A) 540 || (n == 0x038C) 541 || (n >= 0x038E && n <= 0x03A1) 542 || (n >= 0x03A3 && n <= 0x03CE) 543 || (n >= 0x03D0 && n <= 0x03D6) 544 || (n == 0x03DA) 545 || (n == 0x03DC) 546 || (n == 0x03DE) 547 || (n == 0x03E0) 548 || (n >= 0x03E2 && n <= 0x03F3) 549 || (n >= 0x0401 && n <= 0x040C) 550 || (n >= 0x040E && n <= 0x044F) 551 || (n >= 0x0451 && n <= 0x045C) 552 || (n >= 0x045E && n <= 0x0481) 553 || (n >= 0x0490 && n <= 0x04C4) 554 || (n >= 0x04C7 && n <= 0x04C8) 555 || (n >= 0x04CB && n <= 0x04CC) 556 || (n >= 0x04D0 && n <= 0x04EB) 557 || (n >= 0x04EE && n <= 0x04F5) 558 || (n >= 0x04F8 && n <= 0x04F9) 559 || (n >= 0x0531 && n <= 0x0556) 560 || (n == 0x0559) 561 || (n >= 0x0561 && n <= 0x0586) 562 || (n >= 0x05D0 && n <= 0x05EA) 563 || (n >= 0x05F0 && n <= 0x05F2) 564 || (n >= 0x0621 && n <= 0x063A) 565 || (n >= 0x0641 && n <= 0x064A) 566 || (n >= 0x0671 && n <= 0x06B7) 567 || (n >= 0x06BA && n <= 0x06BE) 568 || (n >= 0x06C0 && n <= 0x06CE) 569 || (n >= 0x06D0 && n <= 0x06D3) 570 || (n == 0x06D5) 571 || (n >= 0x06E5 && n <= 0x06E6) 572 || (n >= 0x0905 && n <= 0x0939) 573 || (n == 0x093D) 574 || (n >= 0x0958 && n <= 0x0961) 575 || (n >= 0x0985 && n <= 0x098C) 576 || (n >= 0x098F && n <= 0x0990) 577 || (n >= 0x0993 && n <= 0x09A8) 578 || (n >= 0x09AA && n <= 0x09B0) 579 || (n == 0x09B2) 580 || (n >= 0x09B6 && n <= 0x09B9) 581 || (n >= 0x09DC && n <= 0x09DD) 582 || (n >= 0x09DF && n <= 0x09E1) 583 || (n >= 0x09F0 && n <= 0x09F1) 584 || (n >= 0x0A05 && n <= 0x0A0A) 585 || (n >= 0x0A0F && n <= 0x0A10) 586 || (n >= 0x0A13 && n <= 0x0A28) 587 || (n >= 0x0A2A && n <= 0x0A30) 588 || (n >= 0x0A32 && n <= 0x0A33) 589 || (n >= 0x0A35 && n <= 0x0A36) 590 || (n >= 0x0A38 && n <= 0x0A39) 591 || (n >= 0x0A59 && n <= 0x0A5C) 592 || (n == 0x0A5E) 593 || (n >= 0x0A72 && n <= 0x0A74) 594 || (n >= 0x0A85 && n <= 0x0A8B) 595 || (n == 0x0A8D) 596 || (n >= 0x0A8F && n <= 0x0A91) 597 || (n >= 0x0A93 && n <= 0x0AA8) 598 || (n >= 0x0AAA && n <= 0x0AB0) 599 || (n >= 0x0AB2 && n <= 0x0AB3) 600 || (n >= 0x0AB5 && n <= 0x0AB9) 601 || (n == 0x0ABD) 602 || (n == 0x0AE0) 603 || (n >= 0x0B05 && n <= 0x0B0C) 604 || (n >= 0x0B0F && n <= 0x0B10) 605 || (n >= 0x0B13 && n <= 0x0B28) 606 || (n >= 0x0B2A && n <= 0x0B30) 607 || (n >= 0x0B32 && n <= 0x0B33) 608 || (n >= 0x0B36 && n <= 0x0B39) 609 || (n == 0x0B3D) 610 || (n >= 0x0B5C && n <= 0x0B5D) 611 || (n >= 0x0B5F && n <= 0x0B61) 612 || (n >= 0x0B85 && n <= 0x0B8A) 613 || (n >= 0x0B8E && n <= 0x0B90) 614 || (n >= 0x0B92 && n <= 0x0B95) 615 || (n >= 0x0B99 && n <= 0x0B9A) 616 || (n == 0x0B9C) 617 || (n >= 0x0B9E && n <= 0x0B9F) 618 || (n >= 0x0BA3 && n <= 0x0BA4) 619 || (n >= 0x0BA8 && n <= 0x0BAA) 620 || (n >= 0x0BAE && n <= 0x0BB5) 621 || (n >= 0x0BB7 && n <= 0x0BB9) 622 || (n >= 0x0C05 && n <= 0x0C0C) 623 || (n >= 0x0C0E && n <= 0x0C10) 624 || (n >= 0x0C12 && n <= 0x0C28) 625 || (n >= 0x0C2A && n <= 0x0C33) 626 || (n >= 0x0C35 && n <= 0x0C39) 627 || (n >= 0x0C60 && n <= 0x0C61) 628 || (n >= 0x0C85 && n <= 0x0C8C) 629 || (n >= 0x0C8E && n <= 0x0C90) 630 || (n >= 0x0C92 && n <= 0x0CA8) 631 || (n >= 0x0CAA && n <= 0x0CB3) 632 || (n >= 0x0CB5 && n <= 0x0CB9) 633 || (n == 0x0CDE) 634 || (n >= 0x0CE0 && n <= 0x0CE1) 635 || (n >= 0x0D05 && n <= 0x0D0C) 636 || (n >= 0x0D0E && n <= 0x0D10) 637 || (n >= 0x0D12 && n <= 0x0D28) 638 || (n >= 0x0D2A && n <= 0x0D39) 639 || (n >= 0x0D60 && n <= 0x0D61) 640 || (n >= 0x0E01 && n <= 0x0E2E) 641 || (n == 0x0E30) 642 || (n >= 0x0E32 && n <= 0x0E33) 643 || (n >= 0x0E40 && n <= 0x0E45) 644 || (n >= 0x0E81 && n <= 0x0E82) 645 || (n == 0x0E84) 646 || (n >= 0x0E87 && n <= 0x0E88) 647 || (n == 0x0E8A) 648 || (n == 0x0E8D) 649 || (n >= 0x0E94 && n <= 0x0E97) 650 || (n >= 0x0E99 && n <= 0x0E9F) 651 || (n >= 0x0EA1 && n <= 0x0EA3) 652 || (n == 0x0EA5) 653 || (n == 0x0EA7) 654 || (n >= 0x0EAA && n <= 0x0EAB) 655 || (n >= 0x0EAD && n <= 0x0EAE) 656 || (n == 0x0EB0) 657 || (n >= 0x0EB2 && n <= 0x0EB3) 658 || (n == 0x0EBD) 659 || (n >= 0x0EC0 && n <= 0x0EC4) 660 || (n >= 0x0F40 && n <= 0x0F47) 661 || (n >= 0x0F49 && n <= 0x0F69) 662 || (n >= 0x10A0 && n <= 0x10C5) 663 || (n >= 0x10D0 && n <= 0x10F6) 664 || (n == 0x1100) 665 || (n >= 0x1102 && n <= 0x1103) 666 || (n >= 0x1105 && n <= 0x1107) 667 || (n == 0x1109) 668 || (n >= 0x110B && n <= 0x110C) 669 || (n >= 0x110E && n <= 0x1112) 670 || (n == 0x113C) 671 || (n == 0x113E) 672 || (n == 0x1140) 673 || (n == 0x114C) 674 || (n == 0x114E) 675 || (n == 0x1150) 676 || (n >= 0x1154 && n <= 0x1155) 677 || (n == 0x1159) 678 || (n >= 0x115F && n <= 0x1161) 679 || (n == 0x1163) 680 || (n == 0x1165) 681 || (n == 0x1167) 682 || (n == 0x1169) 683 || (n >= 0x116D && n <= 0x116E) 684 || (n >= 0x1172 && n <= 0x1173) 685 || (n == 0x1175) 686 || (n == 0x119E) 687 || (n == 0x11A8) 688 || (n == 0x11AB) 689 || (n >= 0x11AE && n <= 0x11AF) 690 || (n >= 0x11B7 && n <= 0x11B8) 691 || (n == 0x11BA) 692 || (n >= 0x11BC && n <= 0x11C2) 693 || (n == 0x11EB) 694 || (n == 0x11F0) 695 || (n == 0x11F9) 696 || (n >= 0x1E00 && n <= 0x1E9B) 697 || (n >= 0x1EA0 && n <= 0x1EF9) 698 || (n >= 0x1F00 && n <= 0x1F15) 699 || (n >= 0x1F18 && n <= 0x1F1D) 700 || (n >= 0x1F20 && n <= 0x1F45) 701 || (n >= 0x1F48 && n <= 0x1F4D) 702 || (n >= 0x1F50 && n <= 0x1F57) 703 || (n == 0x1F59) 704 || (n == 0x1F5B) 705 || (n == 0x1F5D) 706 || (n >= 0x1F5F && n <= 0x1F7D) 707 || (n >= 0x1F80 && n <= 0x1FB4) 708 || (n >= 0x1FB6 && n <= 0x1FBC) 709 || (n == 0x1FBE) 710 || (n >= 0x1FC2 && n <= 0x1FC4) 711 || (n >= 0x1FC6 && n <= 0x1FCC) 712 || (n >= 0x1FD0 && n <= 0x1FD3) 713 || (n >= 0x1FD6 && n <= 0x1FDB) 714 || (n >= 0x1FE0 && n <= 0x1FEC) 715 || (n >= 0x1FF2 && n <= 0x1FF4) 716 || (n >= 0x1FF6 && n <= 0x1FFC) 717 || (n == 0x2126) 718 || (n >= 0x212A && n <= 0x212B) 719 || (n == 0x212E) 720 || (n >= 0x2180 && n <= 0x2182) 721 || (n >= 0x3041 && n <= 0x3094) 722 || (n >= 0x30A1 && n <= 0x30FA) 723 || (n >= 0x3105 && n <= 0x312C) 724 || (n >= 0xAC00 && n <= 0xD7A3); 725 } 726 727 /** 728 * Determines if the specified character matches the <em>Ideographic</em> 729 * production. 730 * 731 * <p>See: 732 * <a href="http://www.w3.org/TR/REC-xml#NT-Ideographic">Definition of Ideographic</a>. 733 * 734 * @param c 735 * the character to check. 736 * 737 * @return 738 * <code>true</code> if the character matches the production, or 739 * <code>false</code> if it does not. 740 */ isIdeographic(char c)741 private static final boolean isIdeographic(char c) { 742 int n = (int) c; 743 return (n >= 0x4E00 && n <= 0x9FA5) 744 || (n == 0x3007) 745 || (n >= 0x3021 && n <= 0x3029); 746 } 747 748 /** 749 * Determines if the specified character matches the <em>CombiningChar</em> 750 * production. 751 * 752 * <p>See: 753 * <a href="http://www.w3.org/TR/REC-xml#NT-CombiningChar">Definition of CombiningChar</a>. 754 * 755 * @param c 756 * the character to check. 757 * 758 * @return 759 * <code>true</code> if the character matches the production, or 760 * <code>false</code> if it does not. 761 */ isCombiningChar(char c)762 private static final boolean isCombiningChar(char c) { 763 int n = (int) c; 764 return (n >= 0x0300 && n <= 0x0345) 765 || (n >= 0x0360 && n <= 0x0361) 766 || (n >= 0x0483 && n <= 0x0486) 767 || (n >= 0x0591 && n <= 0x05A1) 768 || (n >= 0x05A3 && n <= 0x05B9) 769 || (n >= 0x05BB && n <= 0x05BD) 770 || (n == 0x05BF) 771 || (n >= 0x05C1 && n <= 0x05C2) 772 || (n == 0x05C4) 773 || (n >= 0x064B && n <= 0x0652) 774 || (n == 0x0670) 775 || (n >= 0x06D6 && n <= 0x06DC) 776 || (n >= 0x06DD && n <= 0x06DF) 777 || (n >= 0x06E0 && n <= 0x06E4) 778 || (n >= 0x06E7 && n <= 0x06E8) 779 || (n >= 0x06EA && n <= 0x06ED) 780 || (n >= 0x0901 && n <= 0x0903) 781 || (n == 0x093C) 782 || (n >= 0x093E && n <= 0x094C) 783 || (n == 0x094D) 784 || (n >= 0x0951 && n <= 0x0954) 785 || (n >= 0x0962 && n <= 0x0963) 786 || (n >= 0x0981 && n <= 0x0983) 787 || (n == 0x09BC) 788 || (n == 0x09BE) 789 || (n == 0x09BF) 790 || (n >= 0x09C0 && n <= 0x09C4) 791 || (n >= 0x09C7 && n <= 0x09C8) 792 || (n >= 0x09CB && n <= 0x09CD) 793 || (n == 0x09D7) 794 || (n >= 0x09E2 && n <= 0x09E3) 795 || (n == 0x0A02) 796 || (n == 0x0A3C) 797 || (n == 0x0A3E) 798 || (n == 0x0A3F) 799 || (n >= 0x0A40 && n <= 0x0A42) 800 || (n >= 0x0A47 && n <= 0x0A48) 801 || (n >= 0x0A4B && n <= 0x0A4D) 802 || (n >= 0x0A70 && n <= 0x0A71) 803 || (n >= 0x0A81 && n <= 0x0A83) 804 || (n == 0x0ABC) 805 || (n >= 0x0ABE && n <= 0x0AC5) 806 || (n >= 0x0AC7 && n <= 0x0AC9) 807 || (n >= 0x0ACB && n <= 0x0ACD) 808 || (n >= 0x0B01 && n <= 0x0B03) 809 || (n == 0x0B3C) 810 || (n >= 0x0B3E && n <= 0x0B43) 811 || (n >= 0x0B47 && n <= 0x0B48) 812 || (n >= 0x0B4B && n <= 0x0B4D) 813 || (n >= 0x0B56 && n <= 0x0B57) 814 || (n >= 0x0B82 && n <= 0x0B83) 815 || (n >= 0x0BBE && n <= 0x0BC2) 816 || (n >= 0x0BC6 && n <= 0x0BC8) 817 || (n >= 0x0BCA && n <= 0x0BCD) 818 || (n == 0x0BD7) 819 || (n >= 0x0C01 && n <= 0x0C03) 820 || (n >= 0x0C3E && n <= 0x0C44) 821 || (n >= 0x0C46 && n <= 0x0C48) 822 || (n >= 0x0C4A && n <= 0x0C4D) 823 || (n >= 0x0C55 && n <= 0x0C56) 824 || (n >= 0x0C82 && n <= 0x0C83) 825 || (n >= 0x0CBE && n <= 0x0CC4) 826 || (n >= 0x0CC6 && n <= 0x0CC8) 827 || (n >= 0x0CCA && n <= 0x0CCD) 828 || (n >= 0x0CD5 && n <= 0x0CD6) 829 || (n >= 0x0D02 && n <= 0x0D03) 830 || (n >= 0x0D3E && n <= 0x0D43) 831 || (n >= 0x0D46 && n <= 0x0D48) 832 || (n >= 0x0D4A && n <= 0x0D4D) 833 || (n == 0x0D57) 834 || (n == 0x0E31) 835 || (n >= 0x0E34 && n <= 0x0E3A) 836 || (n >= 0x0E47 && n <= 0x0E4E) 837 || (n == 0x0EB1) 838 || (n >= 0x0EB4 && n <= 0x0EB9) 839 || (n >= 0x0EBB && n <= 0x0EBC) 840 || (n >= 0x0EC8 && n <= 0x0ECD) 841 || (n >= 0x0F18 && n <= 0x0F19) 842 || (n == 0x0F35) 843 || (n == 0x0F37) 844 || (n == 0x0F39) 845 || (n == 0x0F3E) 846 || (n == 0x0F3F) 847 || (n >= 0x0F71 && n <= 0x0F84) 848 || (n >= 0x0F86 && n <= 0x0F8B) 849 || (n >= 0x0F90 && n <= 0x0F95) 850 || (n == 0x0F97) 851 || (n >= 0x0F99 && n <= 0x0FAD) 852 || (n >= 0x0FB1 && n <= 0x0FB7) 853 || (n == 0x0FB9) 854 || (n >= 0x20D0 && n <= 0x20DC) 855 || (n == 0x20E1) 856 || (n >= 0x302A && n <= 0x302F) 857 || (n == 0x3099) 858 || (n == 0x309A); 859 } 860 861 /** 862 * Determines if the specified character matches the <em>Digit</em> 863 * production. 864 * 865 * <p>See: 866 * <a href="http://www.w3.org/TR/REC-xml#NT-Digit">Definition of Digit</a>. 867 * 868 * @param c 869 * the character to check. 870 * 871 * @return 872 * <code>true</code> if the character matches the production, or 873 * <code>false</code> if it does not. 874 */ isDigit(char c)875 private static final boolean isDigit(char c) { 876 int n = (int) c; 877 return (n >= 0x0030 && n <= 0x0039) 878 || (n >= 0x0660 && n <= 0x0669) 879 || (n >= 0x06F0 && n <= 0x06F9) 880 || (n >= 0x0966 && n <= 0x096F) 881 || (n >= 0x09E6 && n <= 0x09EF) 882 || (n >= 0x0A66 && n <= 0x0A6F) 883 || (n >= 0x0AE6 && n <= 0x0AEF) 884 || (n >= 0x0B66 && n <= 0x0B6F) 885 || (n >= 0x0BE7 && n <= 0x0BEF) 886 || (n >= 0x0C66 && n <= 0x0C6F) 887 || (n >= 0x0CE6 && n <= 0x0CEF) 888 || (n >= 0x0D66 && n <= 0x0D6F) 889 || (n >= 0x0E50 && n <= 0x0E59) 890 || (n >= 0x0ED0 && n <= 0x0ED9) 891 || (n >= 0x0F20 && n <= 0x0F29); 892 } 893 894 /** 895 * Determines if the specified character matches the <em>Extender</em> 896 * production. 897 * 898 * <p>See: 899 * <a href="http://www.w3.org/TR/REC-xml#NT-Extender">Definition of Extender</a>. 900 * 901 * @param c 902 * the character to check. 903 * 904 * @return 905 * <code>true</code> if the character matches the production, or 906 * <code>false</code> if it does not. 907 */ isExtender(char c)908 private static final boolean isExtender(char c) { 909 int n = (int) c; 910 return (n == 0x00B7) 911 || (n == 0x02D0) 912 || (n == 0x02D1) 913 || (n == 0x0387) 914 || (n == 0x0640) 915 || (n == 0x0E46) 916 || (n == 0x0EC6) 917 || (n == 0x3005) 918 || (n >= 0x3031 && n <= 0x3035) 919 || (n >= 0x309D && n <= 0x309E) 920 || (n >= 0x30FC && n <= 0x30FE); 921 } 922 923 924 //------------------------------------------------------------------------- 925 // Class fields 926 //------------------------------------------------------------------------- 927 928 //------------------------------------------------------------------------- 929 // Constructor 930 //------------------------------------------------------------------------- 931 932 /** 933 * Constructs a new <code>XMLChecker</code> object. This constructor is 934 * private since no objects of this class should be created. 935 */ XMLChecker()936 private XMLChecker() { 937 // empty 938 } 939 940 941 //------------------------------------------------------------------------- 942 // Fields 943 //------------------------------------------------------------------------- 944 945 //------------------------------------------------------------------------- 946 // Methods 947 //------------------------------------------------------------------------- 948 } 949